├── .gitignore ├── Pipfile ├── README.md ├── Supercharged Web Scraping with Asyncio.ipynb ├── Use the supercharged module.ipynb ├── async_scrape.py ├── lessons ├── 10 - Extract Product Data.ipynb ├── 11 - Async Product Data Extraction.ipynb ├── 3 - Sync vs Async.ipynb ├── 4 - Blocking & Timeouts.ipynb ├── 5 - Scraping with Selenium.ipynb ├── 6 - Async Web Scraping with chromedriver and arsenic.ipynb ├── 7 - Hide `arsenic` Logs.ipynb ├── 8 - Async Data with Pandas.ipynb └── 9 - Prepare to Scrape Multiple URLs.ipynb ├── local.csv ├── pyvenv.cfg ├── requirements.txt ├── spoonflower_fabrics.csv ├── spoonflower_links.csv └── supercharged ├── __init__.py ├── conf.py ├── db.py ├── logging.py ├── main.py ├── projects ├── __init__.py └── spoonflower.py ├── scrapers.py └── storage.py /.gitignore: -------------------------------------------------------------------------------- 1 | bin/ 2 | include/ 3 | share/ 4 | etc/ 5 | 6 | .DS_Store 7 | *.pkl 8 | .pyvenv.cfg 9 | 10 | # Byte-compiled / optimized / DLL files 11 | __pycache__/ 12 | *.py[cod] 13 | *$py.class 14 | 15 | # C extensions 16 | *.so 17 | 18 | # Distribution / packaging 19 | .Python 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | wheels/ 32 | share/python-wheels/ 33 | *.egg-info/ 34 | .installed.cfg 35 | *.egg 36 | MANIFEST 37 | 38 | # PyInstaller 39 | # Usually these files are written by a python script from a template 40 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 41 | *.manifest 42 | *.spec 43 | 44 | # Installer logs 45 | pip-log.txt 46 | pip-delete-this-directory.txt 47 | 48 | # Unit test / coverage reports 49 | htmlcov/ 50 | .tox/ 51 | .nox/ 52 | .coverage 53 | .coverage.* 54 | .cache 55 | nosetests.xml 56 | coverage.xml 57 | *.cover 58 | *.py,cover 59 | .hypothesis/ 60 | .pytest_cache/ 61 | cover/ 62 | 63 | # Translations 64 | *.mo 65 | *.pot 66 | 67 | # Django stuff: 68 | *.log 69 | local_settings.py 70 | db.sqlite3 71 | db.sqlite3-journal 72 | 73 | # Flask stuff: 74 | instance/ 75 | .webassets-cache 76 | 77 | # Scrapy stuff: 78 | .scrapy 79 | 80 | # Sphinx documentation 81 | docs/_build/ 82 | 83 | # PyBuilder 84 | .pybuilder/ 85 | target/ 86 | 87 | # Jupyter Notebook 88 | .ipynb_checkpoints 89 | 90 | # IPython 91 | profile_default/ 92 | ipython_config.py 93 | 94 | # pyenv 95 | # For a library or package, you might want to ignore these files since the code is 96 | # intended to run in multiple environments; otherwise, check them in: 97 | # .python-version 98 | 99 | # pipenv 100 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 101 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 102 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 103 | # install all needed dependencies. 104 | #Pipfile.lock 105 | 106 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 107 | __pypackages__/ 108 | 109 | # Celery stuff 110 | celerybeat-schedule 111 | celerybeat.pid 112 | 113 | # SageMath parsed files 114 | *.sage.py 115 | 116 | # Environments 117 | .env 118 | .venv 119 | env/ 120 | venv/ 121 | ENV/ 122 | env.bak/ 123 | venv.bak/ 124 | 125 | # Spyder project settings 126 | .spyderproject 127 | .spyproject 128 | 129 | # Rope project settings 130 | .ropeproject 131 | 132 | # mkdocs documentation 133 | /site 134 | 135 | # mypy 136 | .mypy_cache/ 137 | .dmypy.json 138 | dmypy.json 139 | 140 | # Pyre type checker 141 | .pyre/ 142 | 143 | # pytype static type analyzer 144 | .pytype/ 145 | 146 | # Cython debug symbols 147 | cython_debug/ 148 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | 8 | [packages] 9 | jupyter = "*" 10 | fire = "*" 11 | sqlalchemy = "*" 12 | pandas = "*" 13 | arsenic = "*" 14 | requests-html = "*" 15 | 16 | [requires] 17 | python_version = "3.8" 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Superchaged Web Scraping with Asyncio Logo](https://static.codingforentrepreneurs.com/media/projects/supercharged-web-scraping-with-asyncio/images/share/Supercharged_Web_Scraping_with_Asyn.jpg)](https://www.codingforentrepreneurs.com/projects/supercharged-web-scraping-with-asyncio) 2 | 3 | # Supercharged Web Scraping with Asyncio 4 | 5 | 6 | Web scraping is simply automatically opening up any website and grabbing the data you find important on that website. It's fundamental to the internet, search engines, Data Science, automation, machine learning, and much more. 7 | 8 | *Opening* websites and *extracting* data are only part of what makes web scraping great. It's the parsing of the data that's where the value is. 9 | 10 | This project will cover: 11 | - Basic web scraping with Python 12 | - Web scraping with Selenium 13 | - Sync vs Async 14 | - Asynchronous Web scraping with Asyncio 15 | 16 | 17 | Requirements: 18 | - Python experience (at least the first 15 days of [this project](https://www.codingforentrepreneurs.com/projects/30-days-python-38/)). 19 | - Selenium & [chromedriver](https://chromedriver.chromium.org/) installed (watch how in [this one](https://www.codingforentrepreneurs.com/projects/30-days-python-38/day-16-use-selenium-scape-automate-behind-password)). 20 | 21 | #### [Watch the series](https://www.codingforentrepreneurs.com/projects/supercharged-web-scraping-with-asyncio) 22 | 23 | 24 | ### To use this code: 25 | 26 | **1. Clone** 27 | ```bash 28 | git clone https://github.com/codingforentrepreneurs/Supercharged-Web-Scraping-with-Asyncio supercharged 29 | ``` 30 | 31 | **2. Create Virtual Environment** 32 | ```bash 33 | cd supercharged 34 | python3.6 -m venv . 35 | ``` 36 | 37 | **3. Activate virtual environment and install requirements** 38 | Mac/Linux 39 | ``` 40 | source bin/activate 41 | ``` 42 | 43 | Windows: 44 | ``` 45 | .\Scripts\activate 46 | ``` 47 | 48 | > If using **pipenv**, run `pipenv shell` && `pipenv install` 49 | 50 | 51 | **Run jupyter** 52 | ``` 53 | jupyter notebook 54 | ``` 55 | or 56 | ``` 57 | python -m jupyter notebook 58 | ``` 59 | > If using **pipenv**, run `pipenv run jupyter notebook` -------------------------------------------------------------------------------- /Use the supercharged module.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## View Stored Data (if any)\n", 8 | "Using SQL & Pandas" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "from supercharged.storage import df_from_sql" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "tables = [\n", 27 | " 'spoonflower_links',\n", 28 | " 'spoonflower_fabrics'\n", 29 | "]" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "links_df = df_from_sql(tables[0])\n", 39 | "products_df = df_from_sql(tables[1])" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "links_df.head()" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "links_df.shape" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "products_df.head(n=100)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "products_df.shape" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "## Scrape fabric links \n", 83 | "From the paginated list view on https://www.spoonflower.com/en/shop?on=fabric" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "!python -m supercharged.main spoonflower --use_list_range --limit 5 --save_csv" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "## Scrape fabric product detail \n", 100 | "Extract specific data for stored links (from above)." 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "!python -m supercharged.main spoonflower --limit 10" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | " " 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [] 148 | } 149 | ], 150 | "metadata": { 151 | "kernelspec": { 152 | "display_name": "Python 3", 153 | "language": "python", 154 | "name": "python3" 155 | }, 156 | "language_info": { 157 | "codemirror_mode": { 158 | "name": "ipython", 159 | "version": 3 160 | }, 161 | "file_extension": ".py", 162 | "mimetype": "text/x-python", 163 | "name": "python", 164 | "nbconvert_exporter": "python", 165 | "pygments_lexer": "ipython3", 166 | "version": "3.8.2" 167 | } 168 | }, 169 | "nbformat": 4, 170 | "nbformat_minor": 4 171 | } 172 | -------------------------------------------------------------------------------- /async_scrape.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import asyncio 4 | from arsenic import get_session, keys, browsers, services 5 | import pandas as pd 6 | from requests_html import HTML 7 | import itertools 8 | import re 9 | import time 10 | import pathlib 11 | from urllib.parse import urlparse 12 | 13 | import logging 14 | import structlog # pip install structlog 15 | 16 | 17 | 18 | def store_links_as_df_pickle(datas=[], name='links.pkl'): 19 | new_df = pd.DataFrame(datas) 20 | og_df = pd.DataFrame([{'id': 0}]) 21 | if pathlib.Path(name).exists(): 22 | og_df = pd.read_pickle(name) # read_csv 23 | df = pd.concat([og_df, new_df]) 24 | df.reset_index(inplace=True, drop=False) 25 | df = df[['id', 'slug', 'path', 'scraped']] 26 | df = df.loc[~df.id.duplicated(keep='first')] 27 | # df.set_index('id', inplace=True, drop=True) 28 | df.dropna(inplace=True) 29 | df.to_pickle(name) 30 | return df 31 | 32 | 33 | def set_arsenic_log_level(level = logging.WARNING): 34 | # Create logger 35 | logger = logging.getLogger('arsenic') 36 | 37 | # We need factory, to return application-wide logger 38 | def logger_factory(): 39 | return logger 40 | 41 | structlog.configure(logger_factory=logger_factory) 42 | logger.setLevel(level) 43 | 44 | 45 | # /en/fabric/7137786-genevieve-floral-by-crystal_walen 46 | async def extract_id_slug(url_path): 47 | path = url_path 48 | if path.startswith('http'): 49 | parsed_url = urlparse(path) 50 | path = parsed_url.path 51 | regex = r"^[^\s]+/(?P\d+)-(?P[\w_-]+)$" 52 | group = re.match(regex, path) 53 | if not group: 54 | return None, None, path 55 | return group['id'], group['slug'], path 56 | 57 | 58 | 59 | async def get_product_data(url, content): 60 | id_, slug_, path = await extract_id_slug(url) 61 | titleEl = content.find(".design-title", first=True) 62 | data = { 63 | 'id': id_, 64 | 'slug': slug_, 65 | 'path': path, 66 | } 67 | title = None 68 | if titleEl == None: 69 | return data 70 | title = titleEl.text 71 | data['title'] = title 72 | sizeEl = content.find("#fabric-size", first=True) 73 | size = None 74 | if sizeEl != None: 75 | size = sizeEl.text 76 | data['size'] = size 77 | price_parent_el = content.find('.b-item-price', first=True) 78 | price_el = price_parent_el.find('.visuallyhidden', first=True) 79 | for i in price_el.element.iterchildren(): 80 | attrs = dict(**i.attrib) 81 | try: 82 | del attrs['itemprop'] 83 | except: 84 | pass 85 | attrs_keys = list(attrs.keys()) 86 | data[i.attrib['itemprop']] = i.attrib[attrs_keys[0]] 87 | return data 88 | 89 | async def get_parsable_html(body_html_str): 90 | return HTML(html=body_html_str) 91 | 92 | async def get_links(html_r): 93 | fabric_links = [x for x in list(html_r.links) if x.startswith("/en/fabric")] 94 | datas = [] 95 | for path in fabric_links: 96 | id_, slug_, _ = await extract_id_slug(path) 97 | data = { 98 | "id": id_, 99 | "slug": slug_, 100 | "path": path, 101 | "scraped": 0 # True / False -> 1 / 0 102 | } 103 | datas.append(data) 104 | return datas 105 | 106 | async def scraper(url, i=-1, timeout=60, start=None): 107 | service = services.Chromedriver() 108 | browser = browsers.Chrome(chromeOptions={ 109 | 'args': ['--headless', '--disable-gpu'] 110 | }) 111 | async with get_session(service, browser) as session: 112 | try: 113 | await asyncio.wait_for(session.get(url), timeout=timeout) 114 | except asyncio.TimeoutError: 115 | return [] 116 | await asyncio.sleep(10) 117 | body = await session.get_page_source() # save this locally?? 118 | content = await get_parsable_html(body) 119 | links = await get_links(content) 120 | product_data = await get_product_data(url, content) 121 | if start != None: 122 | end = time.time() - start 123 | print(f'{i} took {end} seconds') 124 | # print(body) 125 | dataset = { 126 | "links": links, 127 | "product_data": product_data 128 | } 129 | return dataset 130 | 131 | 132 | async def run(urls, timeout=60, start=None): 133 | results = [] 134 | for i, url in enumerate(urls): 135 | results.append( 136 | asyncio.create_task(scraper(url, i=i, timeout=60, start=start)) 137 | ) 138 | list_of_links = await asyncio.gather(*results) 139 | return list_of_links 140 | 141 | if __name__ == "__main__": 142 | set_arsenic_log_level() 143 | start = time.time() 144 | urls = ['https://www.spoonflower.com/en/shop?on=fabric', 145 | 'https://www.spoonflower.com/en/fabric/6444170-catching-fireflies-by-thestorysmith'] 146 | name = "link.pkl" 147 | results = asyncio.run(run(urls, start=start)) 148 | print(results) 149 | end = time.time() - start 150 | print(f'total time is {end}') 151 | # df = store_links_as_df_pickle(results, name=name) 152 | # print(df.head()) 153 | -------------------------------------------------------------------------------- /lessons/3 - Sync vs Async.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# !pip install requests-html selenium arsenic pandas" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## Sync vs Async\n", 17 | "\n", 18 | "The Chess Game Analogy\n", 19 | "\n", 20 | "Consecutive vs Concurrent" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 9, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "name": "stdout", 30 | "output_type": "stream", 31 | "text": [ 32 | "CPU times: user 10 µs, sys: 0 ns, total: 10 µs\n", 33 | "Wall time: 14.1 µs\n" 34 | ] 35 | } 36 | ], 37 | "source": [ 38 | "%%time\n", 39 | "\n", 40 | "import time\n", 41 | "\n", 42 | "iteration_times = [1, 3, 2, 4]\n", 43 | "\n", 44 | "\n", 45 | "def sleeper(seconds, i=-1):\n", 46 | " if i != -1:\n", 47 | " print(f\"{i}\\t{seconds}s\")\n", 48 | " time.sleep(seconds)\n", 49 | "\n", 50 | "\n", 51 | "def run():\n", 52 | " for i, second in enumerate(iteration_times):\n", 53 | " sleeper(second, i=i)\n", 54 | " \n", 55 | "# run()" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 21, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "name": "stdout", 65 | "output_type": "stream", 66 | "text": [ 67 | "[:5>>, :5>>, :5>>, :5>>]\n", 68 | "0.0005660057067871094\n", 69 | "0\t1s\n", 70 | "1\t3s\n", 71 | "2\t2s\n", 72 | "3\t1s\n", 73 | "0 done 1.0051939487457275\n", 74 | "3 done 1.0053329467773438\n", 75 | "2 done 2.0058019161224365\n", 76 | "1 done 3.0049729347229004\n" 77 | ] 78 | } 79 | ], 80 | "source": [ 81 | "start = time.time()\n", 82 | "iteration_times = [1, 3, 2, 1]\n", 83 | "import asyncio\n", 84 | "\n", 85 | "async def a_sleeper(seconds, i=-1):\n", 86 | " if i != -1:\n", 87 | " print(f\"{i}\\t{seconds}s\")\n", 88 | " await asyncio.sleep(seconds) # coroutine\n", 89 | " \n", 90 | " ellap = time.time() - start\n", 91 | " print(f\"{i} done {ellap}\")\n", 92 | " return \"abc\"\n", 93 | "\n", 94 | "async def a_run():\n", 95 | " results = []\n", 96 | " for i, second in enumerate(iteration_times):\n", 97 | " results.append(\n", 98 | " asyncio.create_task(a_sleeper(second, i=i))\n", 99 | " )\n", 100 | " return results\n", 101 | " \n", 102 | "results = await a_run()\n", 103 | "print(results)\n", 104 | "end = time.time() - start\n", 105 | "\n", 106 | "print(end)" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [] 129 | } 130 | ], 131 | "metadata": { 132 | "kernelspec": { 133 | "display_name": "Python 3", 134 | "language": "python", 135 | "name": "python3" 136 | }, 137 | "language_info": { 138 | "codemirror_mode": { 139 | "name": "ipython", 140 | "version": 3 141 | }, 142 | "file_extension": ".py", 143 | "mimetype": "text/x-python", 144 | "name": "python", 145 | "nbconvert_exporter": "python", 146 | "pygments_lexer": "ipython3", 147 | "version": "3.8.2" 148 | } 149 | }, 150 | "nbformat": 4, 151 | "nbformat_minor": 4 152 | } 153 | -------------------------------------------------------------------------------- /lessons/4 - Blocking & Timeouts.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# !pip install requests-html selenium arsenic pandas" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## Sync vs Async\n", 17 | "\n", 18 | "The Chess Game Analogy\n", 19 | "\n", 20 | "Consecutive vs Concurrent" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 1, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "name": "stdout", 30 | "output_type": "stream", 31 | "text": [ 32 | "CPU times: user 5 µs, sys: 1 µs, total: 6 µs\n", 33 | "Wall time: 7.15 µs\n" 34 | ] 35 | } 36 | ], 37 | "source": [ 38 | "%%time\n", 39 | "\n", 40 | "import time\n", 41 | "\n", 42 | "iteration_times = [1, 3, 2, 4]\n", 43 | "\n", 44 | "\n", 45 | "def sleeper(seconds, i=-1):\n", 46 | " if i != -1:\n", 47 | " print(f\"{i}\\t{seconds}s\")\n", 48 | " time.sleep(seconds)\n", 49 | "\n", 50 | "\n", 51 | "def run():\n", 52 | " for i, second in enumerate(iteration_times):\n", 53 | " sleeper(second, i=i)\n", 54 | " \n", 55 | "# run()" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 2, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "name": "stdout", 65 | "output_type": "stream", 66 | "text": [ 67 | "[:5>>, :5>>, :5>>, :5>>]\n", 68 | "0.0005679130554199219\n", 69 | "0\t1s\n", 70 | "1\t3s\n", 71 | "2\t2s\n", 72 | "3\t1s\n", 73 | "0 done 1.0058197975158691\n", 74 | "3 done 1.005964994430542\n", 75 | "2 done 2.0058486461639404\n", 76 | "1 done 3.005863904953003\n" 77 | ] 78 | } 79 | ], 80 | "source": [ 81 | "start = time.time()\n", 82 | "iteration_times = [1, 3, 2, 1]\n", 83 | "import asyncio\n", 84 | "\n", 85 | "async def a_sleeper(seconds, i=-1):\n", 86 | " if i != -1:\n", 87 | " print(f\"{i}\\t{seconds}s\")\n", 88 | " await asyncio.sleep(seconds) # coroutine\n", 89 | " \n", 90 | " ellap = time.time() - start\n", 91 | " print(f\"{i} done {ellap}\")\n", 92 | " return \"abc\"\n", 93 | "\n", 94 | "async def a_run():\n", 95 | " results = []\n", 96 | " for i, second in enumerate(iteration_times):\n", 97 | " results.append(\n", 98 | " asyncio.create_task(a_sleeper(second, i=i))\n", 99 | " )\n", 100 | " return results\n", 101 | " \n", 102 | "results = await a_run()\n", 103 | "print(results)\n", 104 | "end = time.time() - start\n", 105 | "\n", 106 | "print(end)" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "## Blocking & Timeouts" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 3, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "def sleeper(seconds, i=-1):\n", 123 | " if i != -1:\n", 124 | " print(f\"{i}\\t{seconds}s\")\n", 125 | " time.sleep(seconds)\n", 126 | "\n", 127 | "sleeper(12)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 18, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "ename": "TimeoutError", 137 | "evalue": "", 138 | "output_type": "error", 139 | "traceback": [ 140 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 141 | "\u001b[0;31mTimeoutError\u001b[0m Traceback (most recent call last)", 142 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0masyncio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait_for\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0masyncio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseconds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0;32mawait\u001b[0m \u001b[0masleeper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m12\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 143 | "\u001b[0;32m\u001b[0m in \u001b[0;36masleeper\u001b[0;34m(seconds, i, timeout)\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mi\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"a{i}\\t{seconds}s\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0;32mawait\u001b[0m \u001b[0masyncio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait_for\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0masyncio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseconds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0masleeper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m12\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 144 | "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/asyncio/tasks.py\u001b[0m in \u001b[0;36mwait_for\u001b[0;34m(fut, timeout, loop)\u001b[0m\n\u001b[1;32m 488\u001b[0m \u001b[0;31m# See https://bugs.python.org/issue32751\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 489\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0m_cancel_and_wait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfut\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloop\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mloop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 490\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mexceptions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTimeoutError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 491\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 492\u001b[0m \u001b[0mtimeout_handle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcancel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 145 | "\u001b[0;31mTimeoutError\u001b[0m: " 146 | ] 147 | } 148 | ], 149 | "source": [ 150 | "async def asleeper(seconds, i=-1):\n", 151 | " # time.sleep(seconds)\n", 152 | " if i != -1:\n", 153 | " print(f\"a{i}\\t{seconds}s\")\n", 154 | " await asyncio.sleep(seconds)\n", 155 | " \n", 156 | "await asleeper(12)" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 6, 162 | "metadata": {}, 163 | "outputs": [ 164 | { 165 | "name": "stdout", 166 | "output_type": "stream", 167 | "text": [ 168 | "hello word\n" 169 | ] 170 | } 171 | ], 172 | "source": [ 173 | "print(\"hello word\")" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 7, 179 | "metadata": {}, 180 | "outputs": [ 181 | { 182 | "data": { 183 | "text/plain": [ 184 | ":1>>" 185 | ] 186 | }, 187 | "execution_count": 7, 188 | "metadata": {}, 189 | "output_type": "execute_result" 190 | } 191 | ], 192 | "source": [ 193 | "loop = asyncio.get_event_loop()\n", 194 | "# loop = asyncio.new_event_loop()\n", 195 | "# aysncio.run()\n", 196 | "\n", 197 | "\n", 198 | "loop.create_task(asleeper(123))" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 8, 204 | "metadata": {}, 205 | "outputs": [ 206 | { 207 | "name": "stdout", 208 | "output_type": "stream", 209 | "text": [ 210 | "hello word\n" 211 | ] 212 | } 213 | ], 214 | "source": [ 215 | "print(\"hello word\")" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 9, 221 | "metadata": {}, 222 | "outputs": [ 223 | { 224 | "data": { 225 | "text/plain": [ 226 | "({:1> result=None>},\n", 227 | " {:5> wait_for=()]>>})" 228 | ] 229 | }, 230 | "execution_count": 9, 231 | "metadata": {}, 232 | "output_type": "execute_result" 233 | } 234 | ], 235 | "source": [ 236 | "done, pending = await asyncio.wait([asleeper(1), asleeper(123)], timeout=2)\n", 237 | "done, pending" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 10, 243 | "metadata": {}, 244 | "outputs": [ 245 | { 246 | "data": { 247 | "text/plain": [ 248 | "{:1> result=None>}" 249 | ] 250 | }, 251 | "execution_count": 10, 252 | "metadata": {}, 253 | "output_type": "execute_result" 254 | } 255 | ], 256 | "source": [ 257 | "done" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 11, 263 | "metadata": {}, 264 | "outputs": [ 265 | { 266 | "data": { 267 | "text/plain": [ 268 | "{:5> wait_for=()]>>}" 269 | ] 270 | }, 271 | "execution_count": 11, 272 | "metadata": {}, 273 | "output_type": "execute_result" 274 | } 275 | ], 276 | "source": [ 277 | "pending" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 13, 283 | "metadata": {}, 284 | "outputs": [], 285 | "source": [ 286 | "# await asyncio.wait(pending)" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 14, 292 | "metadata": {}, 293 | "outputs": [ 294 | { 295 | "ename": "TimeoutError", 296 | "evalue": "", 297 | "output_type": "error", 298 | "traceback": [ 299 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 300 | "\u001b[0;31mTimeoutError\u001b[0m Traceback (most recent call last)", 301 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mawait\u001b[0m \u001b[0masyncio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait_for\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0masleeper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 302 | "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/asyncio/tasks.py\u001b[0m in \u001b[0;36mwait_for\u001b[0;34m(fut, timeout, loop)\u001b[0m\n\u001b[1;32m 488\u001b[0m \u001b[0;31m# See https://bugs.python.org/issue32751\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 489\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0m_cancel_and_wait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfut\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloop\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mloop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 490\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mexceptions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTimeoutError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 491\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 492\u001b[0m \u001b[0mtimeout_handle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcancel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 303 | "\u001b[0;31mTimeoutError\u001b[0m: " 304 | ] 305 | } 306 | ], 307 | "source": [ 308 | "await asyncio.wait_for(asleeper(5), timeout=3)" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": 15, 314 | "metadata": {}, 315 | "outputs": [ 316 | { 317 | "name": "stdout", 318 | "output_type": "stream", 319 | "text": [ 320 | "Task failed\n" 321 | ] 322 | } 323 | ], 324 | "source": [ 325 | "try:\n", 326 | " await asyncio.wait_for(asleeper(5), timeout=3)\n", 327 | "except asyncio.TimeoutError:\n", 328 | " print(\"Task failed\")" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": null, 334 | "metadata": {}, 335 | "outputs": [], 336 | "source": [ 337 | "async def asleeper_timeout(seconds, i=-1, timeout=4):\n", 338 | " # time.sleep(seconds)\n", 339 | " if i != -1:\n", 340 | " print(f\"a{i}\\t{seconds}s\")\n", 341 | " await asyncio.wait_for(asyncio.sleep(seconds), timeout=timeout)\n", 342 | " \n", 343 | "await asleeper_timeout(12, timeout=1)" 344 | ] 345 | } 346 | ], 347 | "metadata": { 348 | "kernelspec": { 349 | "display_name": "Python 3", 350 | "language": "python", 351 | "name": "python3" 352 | }, 353 | "language_info": { 354 | "codemirror_mode": { 355 | "name": "ipython", 356 | "version": 3 357 | }, 358 | "file_extension": ".py", 359 | "mimetype": "text/x-python", 360 | "name": "python", 361 | "nbconvert_exporter": "python", 362 | "pygments_lexer": "ipython3", 363 | "version": "3.8.2" 364 | } 365 | }, 366 | "nbformat": 4, 367 | "nbformat_minor": 4 368 | } 369 | -------------------------------------------------------------------------------- /lessons/5 - Scraping with Selenium.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# !pip install requests-html selenium arsenic pandas" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## Sync vs Async\n", 17 | "\n", 18 | "The Chess Game Analogy\n", 19 | "\n", 20 | "Consecutive vs Concurrent" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "name": "stdout", 30 | "output_type": "stream", 31 | "text": [ 32 | "CPU times: user 5 µs, sys: 1 µs, total: 6 µs\n", 33 | "Wall time: 7.39 µs\n" 34 | ] 35 | } 36 | ], 37 | "source": [ 38 | "%%time\n", 39 | "\n", 40 | "import time\n", 41 | "\n", 42 | "iteration_times = [1, 3, 2, 4]\n", 43 | "\n", 44 | "\n", 45 | "def sleeper(seconds, i=-1):\n", 46 | " if i != -1:\n", 47 | " print(f\"{i}\\t{seconds}s\")\n", 48 | " time.sleep(seconds)\n", 49 | "\n", 50 | "\n", 51 | "def run():\n", 52 | " for i, second in enumerate(iteration_times):\n", 53 | " sleeper(second, i=i)\n", 54 | " \n", 55 | "# run()" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 3, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "name": "stdout", 65 | "output_type": "stream", 66 | "text": [ 67 | "[:5>>, :5>>, :5>>, :5>>]\n", 68 | "0.0007688999176025391\n", 69 | "0\t1s\n", 70 | "1\t3s\n", 71 | "2\t2s\n", 72 | "3\t1s\n" 73 | ] 74 | } 75 | ], 76 | "source": [ 77 | "start = time.time()\n", 78 | "iteration_times = [1, 3, 2, 1]\n", 79 | "import asyncio\n", 80 | "\n", 81 | "async def a_sleeper(seconds, i=-1):\n", 82 | " if i != -1:\n", 83 | " print(f\"{i}\\t{seconds}s\")\n", 84 | " await asyncio.sleep(seconds) # coroutine\n", 85 | " \n", 86 | " ellap = time.time() - start\n", 87 | " print(f\"{i} done {ellap}\")\n", 88 | " return \"abc\"\n", 89 | "\n", 90 | "async def a_run():\n", 91 | " results = []\n", 92 | " for i, second in enumerate(iteration_times):\n", 93 | " results.append(\n", 94 | " asyncio.create_task(a_sleeper(second, i=i))\n", 95 | " )\n", 96 | " return results\n", 97 | " \n", 98 | "results = await a_run()\n", 99 | "print(results)\n", 100 | "end = time.time() - start\n", 101 | "\n", 102 | "print(end)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "## Blocking & Timeouts" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 4, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "def sleeper(seconds, i=-1):\n", 119 | " if i != -1:\n", 120 | " print(f\"{i}\\t{seconds}s\")\n", 121 | " time.sleep(seconds)\n", 122 | "\n", 123 | "sleeper(12)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 5, 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "name": "stdout", 133 | "output_type": "stream", 134 | "text": [ 135 | "0 done 12.013737201690674\n", 136 | "3 done 12.01388692855835\n", 137 | "2 done 12.013914108276367\n", 138 | "1 done 12.013946056365967\n" 139 | ] 140 | } 141 | ], 142 | "source": [ 143 | "async def asleeper(seconds, i=-1):\n", 144 | " # time.sleep(seconds)\n", 145 | " if i != -1:\n", 146 | " print(f\"a{i}\\t{seconds}s\")\n", 147 | " await asyncio.sleep(seconds)\n", 148 | " \n", 149 | "await asleeper(12)" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 6, 155 | "metadata": {}, 156 | "outputs": [ 157 | { 158 | "name": "stdout", 159 | "output_type": "stream", 160 | "text": [ 161 | "hello word\n" 162 | ] 163 | } 164 | ], 165 | "source": [ 166 | "print(\"hello word\")" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 7, 172 | "metadata": {}, 173 | "outputs": [ 174 | { 175 | "data": { 176 | "text/plain": [ 177 | ":1>>" 178 | ] 179 | }, 180 | "execution_count": 7, 181 | "metadata": {}, 182 | "output_type": "execute_result" 183 | } 184 | ], 185 | "source": [ 186 | "loop = asyncio.get_event_loop()\n", 187 | "# loop = asyncio.new_event_loop()\n", 188 | "# aysncio.run()\n", 189 | "\n", 190 | "\n", 191 | "loop.create_task(asleeper(123))" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 8, 197 | "metadata": {}, 198 | "outputs": [ 199 | { 200 | "name": "stdout", 201 | "output_type": "stream", 202 | "text": [ 203 | "hello word\n" 204 | ] 205 | } 206 | ], 207 | "source": [ 208 | "print(\"hello word\")" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 9, 214 | "metadata": {}, 215 | "outputs": [ 216 | { 217 | "data": { 218 | "text/plain": [ 219 | "({:1> result=None>},\n", 220 | " {:5> wait_for=()]>>})" 221 | ] 222 | }, 223 | "execution_count": 9, 224 | "metadata": {}, 225 | "output_type": "execute_result" 226 | } 227 | ], 228 | "source": [ 229 | "done, pending = await asyncio.wait([asleeper(1), asleeper(123)], timeout=2)\n", 230 | "done, pending" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 10, 236 | "metadata": {}, 237 | "outputs": [ 238 | { 239 | "data": { 240 | "text/plain": [ 241 | "{:1> result=None>}" 242 | ] 243 | }, 244 | "execution_count": 10, 245 | "metadata": {}, 246 | "output_type": "execute_result" 247 | } 248 | ], 249 | "source": [ 250 | "done" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 11, 256 | "metadata": {}, 257 | "outputs": [ 258 | { 259 | "data": { 260 | "text/plain": [ 261 | "{:5> wait_for=()]>>}" 262 | ] 263 | }, 264 | "execution_count": 11, 265 | "metadata": {}, 266 | "output_type": "execute_result" 267 | } 268 | ], 269 | "source": [ 270 | "pending" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 12, 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [ 279 | "# await asyncio.wait(pending)" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 13, 285 | "metadata": {}, 286 | "outputs": [ 287 | { 288 | "ename": "TimeoutError", 289 | "evalue": "", 290 | "output_type": "error", 291 | "traceback": [ 292 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 293 | "\u001b[0;31mTimeoutError\u001b[0m Traceback (most recent call last)", 294 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mawait\u001b[0m \u001b[0masyncio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait_for\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0masleeper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 295 | "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/asyncio/tasks.py\u001b[0m in \u001b[0;36mwait_for\u001b[0;34m(fut, timeout, loop)\u001b[0m\n\u001b[1;32m 488\u001b[0m \u001b[0;31m# See https://bugs.python.org/issue32751\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 489\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0m_cancel_and_wait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfut\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloop\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mloop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 490\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mexceptions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTimeoutError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 491\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 492\u001b[0m \u001b[0mtimeout_handle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcancel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 296 | "\u001b[0;31mTimeoutError\u001b[0m: " 297 | ] 298 | } 299 | ], 300 | "source": [ 301 | "await asyncio.wait_for(asleeper(5), timeout=3)" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": null, 307 | "metadata": {}, 308 | "outputs": [], 309 | "source": [ 310 | "try:\n", 311 | " await asyncio.wait_for(asleeper(5), timeout=3)\n", 312 | "except asyncio.TimeoutError:\n", 313 | " print(\"Task failed\")" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "metadata": {}, 320 | "outputs": [], 321 | "source": [ 322 | "async def asleeper_timeout(seconds, i=-1, timeout=4):\n", 323 | " # time.sleep(seconds)\n", 324 | " if i != -1:\n", 325 | " print(f\"a{i}\\t{seconds}s\")\n", 326 | " await asyncio.wait_for(asyncio.sleep(seconds), timeout=timeout)\n", 327 | " \n", 328 | "await asleeper_timeout(12, timeout=1)" 329 | ] 330 | }, 331 | { 332 | "cell_type": "markdown", 333 | "metadata": {}, 334 | "source": [ 335 | "## Scraping with Selenium - Synchronous\n", 336 | "New to selenium and web scraping? Watch [this series](https://kirr.co/dwy90n)." 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 20, 342 | "metadata": {}, 343 | "outputs": [], 344 | "source": [ 345 | "url = 'https://www.spoonflower.com/en/shop?on=fabric'" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": 29, 351 | "metadata": {}, 352 | "outputs": [], 353 | "source": [ 354 | "import re\n", 355 | "import requests\n", 356 | "from requests_html import HTML\n", 357 | "import pandas as pd\n", 358 | "\n", 359 | "from selenium import webdriver\n", 360 | "from selenium.webdriver.chrome.options import Options" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": 19, 366 | "metadata": {}, 367 | "outputs": [], 368 | "source": [ 369 | "def scraper(url):\n", 370 | " options = Options()\n", 371 | " options.add_argument(\"--headless\")\n", 372 | " driver = webdriver.Chrome(options=options)\n", 373 | " driver.get(url)\n", 374 | " return driver.page_source\n", 375 | "\n", 376 | "\n", 377 | "# /en/fabric/7137786-genevieve-floral-by-crystal_walen\n", 378 | "def extract_id_slug(url_path):\n", 379 | " regex = r\"^[^\\s]+/(?P\\d+)-(?P[\\w_-]+)$\"\n", 380 | " group = re.match(regex, url_path)\n", 381 | " if not group:\n", 382 | " return None, None\n", 383 | " return group['id'], group['slug']" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": 21, 389 | "metadata": {}, 390 | "outputs": [], 391 | "source": [ 392 | "content = scraper(url)\n" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": 23, 398 | "metadata": {}, 399 | "outputs": [], 400 | "source": [ 401 | "html_r = HTML(html=content)" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": 28, 407 | "metadata": {}, 408 | "outputs": [ 409 | { 410 | "name": "stdout", 411 | "output_type": "stream", 412 | "text": [ 413 | "7137786 genevieve-floral-by-crystal_walen\n", 414 | "8286001 hanging-out-by-sarah_knight\n", 415 | "4893900 half-scale-m81-woodland-camo-by-ricraynor\n", 416 | "8056679 ruth-bader-ginsgurg-rbg-bust-black-by-katerhees\n", 417 | "8197261 night-sky-stars-midnight-blue-by-at_the_cottage\n", 418 | "8692520 bees-lemons-large-blue-by-fernlesliestudio\n", 419 | "5048115 mexican-blanket-by-anchored_by_love\n", 420 | "2920223 m81-woodland-camo-by-ricraynor\n", 421 | "9060289 saints-fleur-de-lis-new-orleans-saints-football-football-fabric-fleur-de-lis-fabric-black-gold-gold-f-by-charlottewinter\n", 422 | "7137898 sierra-floral-by-crystal_walen\n", 423 | "6178734 fable-floral-blush-med-by-nouveau_bohemian\n", 424 | "1096407 skull-wall-by-ben_goetting\n", 425 | "6852245 cute-nurse-love-black-no-gradient-by-jannasalak\n", 426 | "5469666 galaxy-far-far-away-gray-by-studiofibonacci\n", 427 | "6327300 call-mountains-evergreen-med-by-nouveau_bohemian\n", 428 | "2330040 maryland-flags-by-elramsay\n", 429 | "5700186 puzzle-hearts-by-designedbygeeks\n", 430 | "5880084 mod-triangles-gold-indigo-by-crystal_walen\n", 431 | "7216659 rainbow-stars-watercolor-abstract-small-by-crystal_walen\n", 432 | "5588706 black-lives-matter-small-scale-by-ashleysummersdesign\n", 433 | "7236018 australian-native-eucalyptus-leaves-edition-1-australiana-fabric-wallpaper-by-erin__kendal\n", 434 | "6650888 love-nurse-whimsy-blue-by-phyllisdobbs\n", 435 | "7984669 8-love-care-medical-white-by-rebelmod\n", 436 | "4270747 happy-hair-stylist-friends-blue-by-clayvision_-_ahappybluetree\n", 437 | "7580754 ibd-gracie-grace-golden-jumbo-by-indybloomdesign\n", 438 | "4352750 loteria-by-jellymania\n", 439 | "6864327 love-lips-red-by-hipkiddesigns\n", 440 | "3817098 math-count-on-by-sammyk\n", 441 | "5544045 napoleonic-bees-faux-gilt-on-blackest-black-by-peacoquettedesigns\n", 442 | "5939834 classic-leopard-by-cinneworthington\n", 443 | "7587085 heres-heart-navy-white-gold2-sketch-1-by-doodleandcharm_\n", 444 | "6444170 catching-fireflies-by-thestorysmith\n", 445 | "6263258 navy-blue-watercolor-herringbone-by-laurapol\n", 446 | "2623675 black-white-music-notes-by-inspirationz\n", 447 | "7685381 dragon-fire-by-adenaj\n", 448 | "8618597 rainbow-watercolor-pawprints-by-dragonstarart\n", 449 | "5773745 rainbow-pride-stripes-by-furbuddy\n", 450 | "6715163 8-wild-heart-florals-white-by-shopcabin\n", 451 | "9240316 irish-notre-dame-irish-fabric-by-charlottewinter\n", 452 | "4995555 heart-health-awareness-light-gray-large-by-ohdarkthirty\n", 453 | "5964319 hearts-on-grey-linen-valentines-day-by-littlearrowdesign\n", 454 | "7812388 dnd-pattern-by-neonborealis\n", 455 | "7661255 just-jellies-jellyfish-by-katerhees\n", 456 | "1306112 opal-by-peacoquettedesigns\n", 457 | "7463028 seamless-watercolor-larger-leaves-pattern-1-by-daily_miracles\n", 458 | "7698482 scrubs-dr-stetho-scope-by-adrianne_vanalstine\n", 459 | "2632362 colorful-happy-smiley-face-squares-large-print-by-inspirationz\n", 460 | "5532389 blue-white-camouflage-pattern-by-artpics\n", 461 | "4981816 black-white-dogs-by-littleislandcompany\n", 462 | "7679631 scattered-earth-tones-watercolor-rainbows-by-anniemontgomerydesign\n", 463 | "991112 nursing-coordinates-by-bluevelvet\n", 464 | "1577333 marine-marpat-digital-woodland-camo-by-ricraynor\n", 465 | "4888888 flowers-skulls-by-elladorine\n", 466 | "2623792 purple-space-stars-small-print-by-inspirationz\n", 467 | "6573088 whale-s-song-by-katherine_quinn\n", 468 | "4995362 heart-health-awareness-black-by-ohdarkthirty\n", 469 | "4830872 cherry-blossom-watercolor-cherry-blossom-floral-by-magentarosedesigns\n", 470 | "5839396 spectacular-cats-by-cynthia_arre\n", 471 | "5312944 thank-being-friend-small-by-elladorine\n", 472 | "6650975 love-nurse-charcoal-gray-by-phyllisdobbs\n", 473 | "5033660 80s-accessories-by-diannemehta\n", 474 | "9119650 horror-friends-by-mariospeedwagon\n", 475 | "7522587 save-honey-bees-large-new-by-fernlesliestudio\n", 476 | "8039248 forest-animal-hot-air-balloon-night-adventure-by-at_the_cottage\n", 477 | "5514018 hair-cutting-shears-by-cloudycapevintage\n", 478 | "7944022 golden-girls-illustration-peach-by-yesterdaycollection\n", 479 | "7790777 turtles-aqua-blue-by-gingerlique\n", 480 | "7662668 mermaid-music-by-ceciliamok\n", 481 | "7368347 dear-clementine-oranges-teal-by-crystal_walen\n", 482 | "6590171 mermaid-scales-by-elladorine\n", 483 | "5513692 salon-barber-hairdresser-pattern-by-cloudycapevintage\n", 484 | "5034356 80s-hair-dryers-by-cjldesigns\n", 485 | "6079351 josie-meadow-floral-by-sweeterthanhoney\n", 486 | "7984649 8-love-care-medical-black-by-rebelmod\n", 487 | "6812243 cute-kawaii-sushi-small-size-by-penguinhouse\n", 488 | "5131007 scandinavian-sweet-hedgehog-illustration-kids-gender-neutral-black-white-by-littlesmilemakers\n", 489 | "8619105 flight-feathers-painted-by-xoxotique\n", 490 | "3840217 nurse-theme-by-hot4tees_bg-yahoo_com\n", 491 | "3584004 mexican-sugar-skulls-small-by-lusykoror\n", 492 | "509390 spoonflower-color-map-by-spoonflower_help\n", 493 | "1112778 rosie-riveter-by-spacefem\n", 494 | "9453318 african-american-girls-retro-pop-art-by-whimsical_brush\n", 495 | "5247883 hexo-blue-med-by-nouveau_bohemian\n", 496 | "6782514 eame-s-wildflower-meadow-by-hipkiddesigns\n" 497 | ] 498 | } 499 | ], 500 | "source": [ 501 | "fabric_links = [x for x in list(html_r.links) if x.startswith(\"/en/fabric\")]\n", 502 | "\n", 503 | "datas = []\n", 504 | "for path in fabric_links:\n", 505 | " id_, slug_ = extract_id_slug(path)\n", 506 | " print(id_, slug_)\n", 507 | " data = {\n", 508 | " \"id\": id_,\n", 509 | " \"slug\": slug_,\n", 510 | " \"path\": path,\n", 511 | " \"scraped\": 0 # True / False -> 1 / 0 \n", 512 | " }\n", 513 | " datas.append(data)" 514 | ] 515 | }, 516 | { 517 | "cell_type": "code", 518 | "execution_count": 30, 519 | "metadata": {}, 520 | "outputs": [ 521 | { 522 | "data": { 523 | "text/html": [ 524 | "
\n", 525 | "\n", 538 | "\n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | "
idslugpathscraped
07137786genevieve-floral-by-crystal_walen/en/fabric/7137786-genevieve-floral-by-crystal...0
18286001hanging-out-by-sarah_knight/en/fabric/8286001-hanging-out-by-sarah_knight0
24893900half-scale-m81-woodland-camo-by-ricraynor/en/fabric/4893900-half-scale-m81-woodland-cam...0
38056679ruth-bader-ginsgurg-rbg-bust-black-by-katerhees/en/fabric/8056679-ruth-bader-ginsgurg-rbg-bus...0
48197261night-sky-stars-midnight-blue-by-at_the_cottage/en/fabric/8197261-night-sky-stars-midnight-bl...0
\n", 586 | "
" 587 | ], 588 | "text/plain": [ 589 | " id slug \\\n", 590 | "0 7137786 genevieve-floral-by-crystal_walen \n", 591 | "1 8286001 hanging-out-by-sarah_knight \n", 592 | "2 4893900 half-scale-m81-woodland-camo-by-ricraynor \n", 593 | "3 8056679 ruth-bader-ginsgurg-rbg-bust-black-by-katerhees \n", 594 | "4 8197261 night-sky-stars-midnight-blue-by-at_the_cottage \n", 595 | "\n", 596 | " path scraped \n", 597 | "0 /en/fabric/7137786-genevieve-floral-by-crystal... 0 \n", 598 | "1 /en/fabric/8286001-hanging-out-by-sarah_knight 0 \n", 599 | "2 /en/fabric/4893900-half-scale-m81-woodland-cam... 0 \n", 600 | "3 /en/fabric/8056679-ruth-bader-ginsgurg-rbg-bus... 0 \n", 601 | "4 /en/fabric/8197261-night-sky-stars-midnight-bl... 0 " 602 | ] 603 | }, 604 | "execution_count": 30, 605 | "metadata": {}, 606 | "output_type": "execute_result" 607 | } 608 | ], 609 | "source": [ 610 | "df = pd.DataFrame(datas)\n", 611 | "df.head()" 612 | ] 613 | }, 614 | { 615 | "cell_type": "code", 616 | "execution_count": 33, 617 | "metadata": {}, 618 | "outputs": [], 619 | "source": [ 620 | "df.to_csv(\"local.csv\", index=False)" 621 | ] 622 | }, 623 | { 624 | "cell_type": "code", 625 | "execution_count": 34, 626 | "metadata": {}, 627 | "outputs": [ 628 | { 629 | "data": { 630 | "text/html": [ 631 | "
\n", 632 | "\n", 645 | "\n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | "
idslugpathscraped
07137786genevieve-floral-by-crystal_walen/en/fabric/7137786-genevieve-floral-by-crystal...0
18286001hanging-out-by-sarah_knight/en/fabric/8286001-hanging-out-by-sarah_knight0
24893900half-scale-m81-woodland-camo-by-ricraynor/en/fabric/4893900-half-scale-m81-woodland-cam...0
38056679ruth-bader-ginsgurg-rbg-bust-black-by-katerhees/en/fabric/8056679-ruth-bader-ginsgurg-rbg-bus...0
48197261night-sky-stars-midnight-blue-by-at_the_cottage/en/fabric/8197261-night-sky-stars-midnight-bl...0
...............
79509390spoonflower-color-map-by-spoonflower_help/en/fabric/509390-spoonflower-color-map-by-spo...0
801112778rosie-riveter-by-spacefem/en/fabric/1112778-rosie-riveter-by-spacefem0
819453318african-american-girls-retro-pop-art-by-whimsi.../en/fabric/9453318-african-american-girls-retr...0
825247883hexo-blue-med-by-nouveau_bohemian/en/fabric/5247883-hexo-blue-med-by-nouveau_bo...0
836782514eame-s-wildflower-meadow-by-hipkiddesigns/en/fabric/6782514-eame-s-wildflower-meadow-by...0
\n", 735 | "

84 rows × 4 columns

\n", 736 | "
" 737 | ], 738 | "text/plain": [ 739 | " id slug \\\n", 740 | "0 7137786 genevieve-floral-by-crystal_walen \n", 741 | "1 8286001 hanging-out-by-sarah_knight \n", 742 | "2 4893900 half-scale-m81-woodland-camo-by-ricraynor \n", 743 | "3 8056679 ruth-bader-ginsgurg-rbg-bust-black-by-katerhees \n", 744 | "4 8197261 night-sky-stars-midnight-blue-by-at_the_cottage \n", 745 | ".. ... ... \n", 746 | "79 509390 spoonflower-color-map-by-spoonflower_help \n", 747 | "80 1112778 rosie-riveter-by-spacefem \n", 748 | "81 9453318 african-american-girls-retro-pop-art-by-whimsi... \n", 749 | "82 5247883 hexo-blue-med-by-nouveau_bohemian \n", 750 | "83 6782514 eame-s-wildflower-meadow-by-hipkiddesigns \n", 751 | "\n", 752 | " path scraped \n", 753 | "0 /en/fabric/7137786-genevieve-floral-by-crystal... 0 \n", 754 | "1 /en/fabric/8286001-hanging-out-by-sarah_knight 0 \n", 755 | "2 /en/fabric/4893900-half-scale-m81-woodland-cam... 0 \n", 756 | "3 /en/fabric/8056679-ruth-bader-ginsgurg-rbg-bus... 0 \n", 757 | "4 /en/fabric/8197261-night-sky-stars-midnight-bl... 0 \n", 758 | ".. ... ... \n", 759 | "79 /en/fabric/509390-spoonflower-color-map-by-spo... 0 \n", 760 | "80 /en/fabric/1112778-rosie-riveter-by-spacefem 0 \n", 761 | "81 /en/fabric/9453318-african-american-girls-retr... 0 \n", 762 | "82 /en/fabric/5247883-hexo-blue-med-by-nouveau_bo... 0 \n", 763 | "83 /en/fabric/6782514-eame-s-wildflower-meadow-by... 0 \n", 764 | "\n", 765 | "[84 rows x 4 columns]" 766 | ] 767 | }, 768 | "execution_count": 34, 769 | "metadata": {}, 770 | "output_type": "execute_result" 771 | } 772 | ], 773 | "source": [ 774 | "pd.read_csv(\"local.csv\")" 775 | ] 776 | }, 777 | { 778 | "cell_type": "markdown", 779 | "metadata": {}, 780 | "source": [ 781 | "## Asynchronous Scraping with `chromedriver` and `arsenic`\n", 782 | "\n", 783 | "[arsenic Docs](https://arsenic.readthedocs.io/en/latest/)" 784 | ] 785 | }, 786 | { 787 | "cell_type": "code", 788 | "execution_count": 37, 789 | "metadata": {}, 790 | "outputs": [], 791 | "source": [ 792 | "# !pip install arsenic" 793 | ] 794 | }, 795 | { 796 | "cell_type": "code", 797 | "execution_count": null, 798 | "metadata": {}, 799 | "outputs": [], 800 | "source": [] 801 | } 802 | ], 803 | "metadata": { 804 | "kernelspec": { 805 | "display_name": "Python 3", 806 | "language": "python", 807 | "name": "python3" 808 | }, 809 | "language_info": { 810 | "codemirror_mode": { 811 | "name": "ipython", 812 | "version": 3 813 | }, 814 | "file_extension": ".py", 815 | "mimetype": "text/x-python", 816 | "name": "python", 817 | "nbconvert_exporter": "python", 818 | "pygments_lexer": "ipython3", 819 | "version": "3.8.2" 820 | } 821 | }, 822 | "nbformat": 4, 823 | "nbformat_minor": 4 824 | } 825 | -------------------------------------------------------------------------------- /lessons/6 - Async Web Scraping with chromedriver and arsenic.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# !pip install requests-html selenium arsenic pandas" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## Sync vs Async\n", 17 | "\n", 18 | "The Chess Game Analogy\n", 19 | "\n", 20 | "Consecutive vs Concurrent" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "name": "stdout", 30 | "output_type": "stream", 31 | "text": [ 32 | "CPU times: user 6 µs, sys: 0 ns, total: 6 µs\n", 33 | "Wall time: 8.82 µs\n" 34 | ] 35 | } 36 | ], 37 | "source": [ 38 | "%%time\n", 39 | "\n", 40 | "import time\n", 41 | "\n", 42 | "iteration_times = [1, 3, 2, 4]\n", 43 | "\n", 44 | "\n", 45 | "def sleeper(seconds, i=-1):\n", 46 | " if i != -1:\n", 47 | " print(f\"{i}\\t{seconds}s\")\n", 48 | " time.sleep(seconds)\n", 49 | "\n", 50 | "\n", 51 | "def run():\n", 52 | " for i, second in enumerate(iteration_times):\n", 53 | " sleeper(second, i=i)\n", 54 | " \n", 55 | "# run()" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 3, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "name": "stdout", 65 | "output_type": "stream", 66 | "text": [ 67 | "[:5>>, :5>>, :5>>, :5>>]\n", 68 | "0.0007050037384033203\n", 69 | "0\t1s\n", 70 | "1\t3s\n", 71 | "2\t2s\n", 72 | "3\t1s\n" 73 | ] 74 | } 75 | ], 76 | "source": [ 77 | "start = time.time()\n", 78 | "iteration_times = [1, 3, 2, 1]\n", 79 | "import asyncio\n", 80 | "\n", 81 | "async def a_sleeper(seconds, i=-1):\n", 82 | " if i != -1:\n", 83 | " print(f\"{i}\\t{seconds}s\")\n", 84 | " await asyncio.sleep(seconds) # coroutine\n", 85 | " \n", 86 | " ellap = time.time() - start\n", 87 | " print(f\"{i} done {ellap}\")\n", 88 | " return \"abc\"\n", 89 | "\n", 90 | "async def a_run():\n", 91 | " results = []\n", 92 | " for i, second in enumerate(iteration_times):\n", 93 | " results.append(\n", 94 | " asyncio.create_task(a_sleeper(second, i=i))\n", 95 | " )\n", 96 | " return results\n", 97 | " \n", 98 | "results = await a_run()\n", 99 | "print(results)\n", 100 | "end = time.time() - start\n", 101 | "\n", 102 | "print(end)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "## Blocking & Timeouts" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 4, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "def sleeper(seconds, i=-1):\n", 119 | " if i != -1:\n", 120 | " print(f\"{i}\\t{seconds}s\")\n", 121 | " time.sleep(seconds)\n", 122 | "\n", 123 | "sleeper(12)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 5, 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "name": "stdout", 133 | "output_type": "stream", 134 | "text": [ 135 | "0 done 12.023157119750977\n", 136 | "3 done 12.023277044296265\n", 137 | "2 done 12.023310899734497\n", 138 | "1 done 12.023338079452515\n" 139 | ] 140 | } 141 | ], 142 | "source": [ 143 | "async def asleeper(seconds, i=-1):\n", 144 | " # time.sleep(seconds)\n", 145 | " if i != -1:\n", 146 | " print(f\"a{i}\\t{seconds}s\")\n", 147 | " await asyncio.sleep(seconds)\n", 148 | " \n", 149 | "await asleeper(12)" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 6, 155 | "metadata": {}, 156 | "outputs": [ 157 | { 158 | "name": "stdout", 159 | "output_type": "stream", 160 | "text": [ 161 | "hello word\n" 162 | ] 163 | } 164 | ], 165 | "source": [ 166 | "print(\"hello word\")" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 7, 172 | "metadata": {}, 173 | "outputs": [ 174 | { 175 | "data": { 176 | "text/plain": [ 177 | ":1>>" 178 | ] 179 | }, 180 | "execution_count": 7, 181 | "metadata": {}, 182 | "output_type": "execute_result" 183 | } 184 | ], 185 | "source": [ 186 | "loop = asyncio.get_event_loop()\n", 187 | "# loop = asyncio.new_event_loop()\n", 188 | "# aysncio.run()\n", 189 | "\n", 190 | "\n", 191 | "loop.create_task(asleeper(123))" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 8, 197 | "metadata": {}, 198 | "outputs": [ 199 | { 200 | "name": "stdout", 201 | "output_type": "stream", 202 | "text": [ 203 | "hello word\n" 204 | ] 205 | } 206 | ], 207 | "source": [ 208 | "print(\"hello word\")" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 9, 214 | "metadata": {}, 215 | "outputs": [ 216 | { 217 | "data": { 218 | "text/plain": [ 219 | "({:1> result=None>},\n", 220 | " {:5> wait_for=()]>>})" 221 | ] 222 | }, 223 | "execution_count": 9, 224 | "metadata": {}, 225 | "output_type": "execute_result" 226 | } 227 | ], 228 | "source": [ 229 | "done, pending = await asyncio.wait([asleeper(1), asleeper(123)], timeout=2)\n", 230 | "done, pending" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 10, 236 | "metadata": {}, 237 | "outputs": [ 238 | { 239 | "data": { 240 | "text/plain": [ 241 | "{:1> result=None>}" 242 | ] 243 | }, 244 | "execution_count": 10, 245 | "metadata": {}, 246 | "output_type": "execute_result" 247 | } 248 | ], 249 | "source": [ 250 | "done" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 11, 256 | "metadata": {}, 257 | "outputs": [ 258 | { 259 | "data": { 260 | "text/plain": [ 261 | "{:5> wait_for=()]>>}" 262 | ] 263 | }, 264 | "execution_count": 11, 265 | "metadata": {}, 266 | "output_type": "execute_result" 267 | } 268 | ], 269 | "source": [ 270 | "pending" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 12, 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [ 279 | "# await asyncio.wait(pending)" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 13, 285 | "metadata": {}, 286 | "outputs": [ 287 | { 288 | "ename": "TimeoutError", 289 | "evalue": "", 290 | "output_type": "error", 291 | "traceback": [ 292 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 293 | "\u001b[0;31mTimeoutError\u001b[0m Traceback (most recent call last)", 294 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mawait\u001b[0m \u001b[0masyncio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait_for\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0masleeper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 295 | "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/asyncio/tasks.py\u001b[0m in \u001b[0;36mwait_for\u001b[0;34m(fut, timeout, loop)\u001b[0m\n\u001b[1;32m 488\u001b[0m \u001b[0;31m# See https://bugs.python.org/issue32751\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 489\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0m_cancel_and_wait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfut\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloop\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mloop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 490\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mexceptions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTimeoutError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 491\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 492\u001b[0m \u001b[0mtimeout_handle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcancel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 296 | "\u001b[0;31mTimeoutError\u001b[0m: " 297 | ] 298 | } 299 | ], 300 | "source": [ 301 | "await asyncio.wait_for(asleeper(5), timeout=3)" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": 14, 307 | "metadata": {}, 308 | "outputs": [ 309 | { 310 | "name": "stdout", 311 | "output_type": "stream", 312 | "text": [ 313 | "Task failed\n" 314 | ] 315 | } 316 | ], 317 | "source": [ 318 | "try:\n", 319 | " await asyncio.wait_for(asleeper(5), timeout=3)\n", 320 | "except asyncio.TimeoutError:\n", 321 | " print(\"Task failed\")" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": 15, 327 | "metadata": {}, 328 | "outputs": [ 329 | { 330 | "ename": "TimeoutError", 331 | "evalue": "", 332 | "output_type": "error", 333 | "traceback": [ 334 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 335 | "\u001b[0;31mTimeoutError\u001b[0m Traceback (most recent call last)", 336 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0masyncio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait_for\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0masyncio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseconds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0;32mawait\u001b[0m \u001b[0masleeper_timeout\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m12\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 337 | "\u001b[0;32m\u001b[0m in \u001b[0;36masleeper_timeout\u001b[0;34m(seconds, i, timeout)\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mi\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"a{i}\\t{seconds}s\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0;32mawait\u001b[0m \u001b[0masyncio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait_for\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0masyncio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseconds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0masleeper_timeout\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m12\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 338 | "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/asyncio/tasks.py\u001b[0m in \u001b[0;36mwait_for\u001b[0;34m(fut, timeout, loop)\u001b[0m\n\u001b[1;32m 488\u001b[0m \u001b[0;31m# See https://bugs.python.org/issue32751\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 489\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0m_cancel_and_wait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfut\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloop\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mloop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 490\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mexceptions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTimeoutError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 491\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 492\u001b[0m \u001b[0mtimeout_handle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcancel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 339 | "\u001b[0;31mTimeoutError\u001b[0m: " 340 | ] 341 | } 342 | ], 343 | "source": [ 344 | "async def asleeper_timeout(seconds, i=-1, timeout=4):\n", 345 | " # time.sleep(seconds)\n", 346 | " if i != -1:\n", 347 | " print(f\"a{i}\\t{seconds}s\")\n", 348 | " await asyncio.wait_for(asyncio.sleep(seconds), timeout=timeout)\n", 349 | " \n", 350 | "await asleeper_timeout(12, timeout=1)" 351 | ] 352 | }, 353 | { 354 | "cell_type": "markdown", 355 | "metadata": {}, 356 | "source": [ 357 | "## Scraping with Selenium - Synchronous\n", 358 | "New to selenium and web scraping? Watch [this series](https://kirr.co/dwy90n)." 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": 16, 364 | "metadata": {}, 365 | "outputs": [], 366 | "source": [ 367 | "url = 'https://www.spoonflower.com/en/shop?on=fabric'" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": 17, 373 | "metadata": {}, 374 | "outputs": [], 375 | "source": [ 376 | "import re\n", 377 | "import requests\n", 378 | "from requests_html import HTML\n", 379 | "import pandas as pd\n", 380 | "\n", 381 | "from selenium import webdriver\n", 382 | "from selenium.webdriver.chrome.options import Options" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": 18, 388 | "metadata": {}, 389 | "outputs": [], 390 | "source": [ 391 | "def scraper(url):\n", 392 | " options = Options()\n", 393 | " options.add_argument(\"--headless\")\n", 394 | " driver = webdriver.Chrome(options=options)\n", 395 | " driver.get(url)\n", 396 | " return driver.page_source\n", 397 | "\n", 398 | "\n", 399 | "# /en/fabric/7137786-genevieve-floral-by-crystal_walen\n", 400 | "def extract_id_slug(url_path):\n", 401 | " regex = r\"^[^\\s]+/(?P\\d+)-(?P[\\w_-]+)$\"\n", 402 | " group = re.match(regex, url_path)\n", 403 | " if not group:\n", 404 | " return None, None\n", 405 | " return group['id'], group['slug']" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": 19, 411 | "metadata": {}, 412 | "outputs": [], 413 | "source": [ 414 | "content = scraper(url)" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": null, 420 | "metadata": {}, 421 | "outputs": [], 422 | "source": [] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 20, 427 | "metadata": {}, 428 | "outputs": [ 429 | { 430 | "name": "stdout", 431 | "output_type": "stream", 432 | "text": [ 433 | "8692520 bees-lemons-large-blue-by-fernlesliestudio\n", 434 | "4352750 loteria-by-jellymania\n", 435 | "7137786 genevieve-floral-by-crystal_walen\n", 436 | "6650975 love-nurse-charcoal-gray-by-phyllisdobbs\n", 437 | "5544045 napoleonic-bees-faux-gilt-on-blackest-black-by-peacoquettedesigns\n", 438 | "7944022 golden-girls-illustration-peach-by-yesterdaycollection\n", 439 | "7580754 ibd-gracie-grace-golden-jumbo-by-indybloomdesign\n", 440 | "5247883 hexo-blue-med-by-nouveau_bohemian\n", 441 | "6444170 catching-fireflies-by-thestorysmith\n", 442 | "7236018 australian-native-eucalyptus-leaves-edition-1-australiana-fabric-wallpaper-by-erin__kendal\n", 443 | "3817098 math-count-on-by-sammyk\n", 444 | "9060289 saints-fleur-de-lis-new-orleans-saints-football-football-fabric-fleur-de-lis-fabric-black-gold-gold-f-by-charlottewinter\n", 445 | "7368347 dear-clementine-oranges-teal-by-crystal_walen\n", 446 | "6812243 cute-kawaii-sushi-small-size-by-penguinhouse\n", 447 | "7216659 rainbow-stars-watercolor-abstract-small-by-crystal_walen\n", 448 | "4995362 heart-health-awareness-black-by-ohdarkthirty\n", 449 | "5048115 mexican-blanket-by-anchored_by_love\n", 450 | "6327300 call-mountains-evergreen-med-by-nouveau_bohemian\n", 451 | "5880084 mod-triangles-gold-indigo-by-crystal_walen\n", 452 | "2920223 m81-woodland-camo-by-ricraynor\n", 453 | "7812388 dnd-pattern-by-neonborealis\n", 454 | "2632362 colorful-happy-smiley-face-squares-large-print-by-inspirationz\n", 455 | "6573088 whale-s-song-by-katherine_quinn\n", 456 | "5513692 salon-barber-hairdresser-pattern-by-cloudycapevintage\n", 457 | "4830872 cherry-blossom-watercolor-cherry-blossom-floral-by-magentarosedesigns\n", 458 | "9453318 african-american-girls-retro-pop-art-by-whimsical_brush\n", 459 | "4981816 black-white-dogs-by-littleislandcompany\n", 460 | "9240316 irish-notre-dame-irish-fabric-by-charlottewinter\n", 461 | "5532389 blue-white-camouflage-pattern-by-artpics\n", 462 | "6590171 mermaid-scales-by-elladorine\n", 463 | "7463028 seamless-watercolor-larger-leaves-pattern-1-by-daily_miracles\n", 464 | "7587085 heres-heart-navy-white-gold2-sketch-1-by-doodleandcharm_\n", 465 | "6178734 fable-floral-blush-med-by-nouveau_bohemian\n", 466 | "5700186 puzzle-hearts-by-designedbygeeks\n", 467 | "2623675 black-white-music-notes-by-inspirationz\n", 468 | "6079351 josie-meadow-floral-by-sweeterthanhoney\n", 469 | "8618597 rainbow-watercolor-pawprints-by-dragonstarart\n", 470 | "991112 nursing-coordinates-by-bluevelvet\n", 471 | "2623792 purple-space-stars-small-print-by-inspirationz\n", 472 | "5839396 spectacular-cats-by-cynthia_arre\n", 473 | "4893900 half-scale-m81-woodland-camo-by-ricraynor\n", 474 | "3584004 mexican-sugar-skulls-small-by-lusykoror\n", 475 | "8039248 forest-animal-hot-air-balloon-night-adventure-by-at_the_cottage\n", 476 | "1577333 marine-marpat-digital-woodland-camo-by-ricraynor\n", 477 | "8619105 flight-feathers-painted-by-xoxotique\n", 478 | "5131007 scandinavian-sweet-hedgehog-illustration-kids-gender-neutral-black-white-by-littlesmilemakers\n", 479 | "7790777 turtles-aqua-blue-by-gingerlique\n", 480 | "8286001 hanging-out-by-sarah_knight\n", 481 | "4995555 heart-health-awareness-light-gray-large-by-ohdarkthirty\n", 482 | "5034356 80s-hair-dryers-by-cjldesigns\n", 483 | "2330040 maryland-flags-by-elramsay\n", 484 | "7661255 just-jellies-jellyfish-by-katerhees\n", 485 | "7679631 scattered-earth-tones-watercolor-rainbows-by-anniemontgomerydesign\n", 486 | "5312944 thank-being-friend-small-by-elladorine\n", 487 | "6715163 8-wild-heart-florals-white-by-shopcabin\n", 488 | "1112778 rosie-riveter-by-spacefem\n", 489 | "509390 spoonflower-color-map-by-spoonflower_help\n", 490 | "5469666 galaxy-far-far-away-gray-by-studiofibonacci\n", 491 | "6263258 navy-blue-watercolor-herringbone-by-laurapol\n", 492 | "6852245 cute-nurse-love-black-no-gradient-by-jannasalak\n", 493 | "7984669 8-love-care-medical-white-by-rebelmod\n", 494 | "5939834 classic-leopard-by-cinneworthington\n", 495 | "7698482 scrubs-dr-stetho-scope-by-adrianne_vanalstine\n", 496 | "5514018 hair-cutting-shears-by-cloudycapevintage\n", 497 | "7137898 sierra-floral-by-crystal_walen\n", 498 | "7984649 8-love-care-medical-black-by-rebelmod\n", 499 | "1306112 opal-by-peacoquettedesigns\n", 500 | "8197261 night-sky-stars-midnight-blue-by-at_the_cottage\n", 501 | "4270747 happy-hair-stylist-friends-blue-by-clayvision_-_ahappybluetree\n", 502 | "4888888 flowers-skulls-by-elladorine\n", 503 | "5588706 black-lives-matter-small-scale-by-ashleysummersdesign\n", 504 | "8056679 ruth-bader-ginsgurg-rbg-bust-black-by-katerhees\n", 505 | "7522587 save-honey-bees-large-new-by-fernlesliestudio\n", 506 | "7685381 dragon-fire-by-adenaj\n", 507 | "7662668 mermaid-music-by-ceciliamok\n", 508 | "9119650 horror-friends-by-mariospeedwagon\n", 509 | "5033660 80s-accessories-by-diannemehta\n", 510 | "6782514 eame-s-wildflower-meadow-by-hipkiddesigns\n", 511 | "5773745 rainbow-pride-stripes-by-furbuddy\n", 512 | "3840217 nurse-theme-by-hot4tees_bg-yahoo_com\n", 513 | "6864327 love-lips-red-by-hipkiddesigns\n", 514 | "6650888 love-nurse-whimsy-blue-by-phyllisdobbs\n", 515 | "5964319 hearts-on-grey-linen-valentines-day-by-littlearrowdesign\n", 516 | "1096407 skull-wall-by-ben_goetting\n" 517 | ] 518 | } 519 | ], 520 | "source": [ 521 | "html_r = HTML(html=content)\n", 522 | "\n", 523 | "fabric_links = [x for x in list(html_r.links) if x.startswith(\"/en/fabric\")]\n", 524 | "\n", 525 | "datas = []\n", 526 | "for path in fabric_links:\n", 527 | " id_, slug_ = extract_id_slug(path)\n", 528 | " print(id_, slug_)\n", 529 | " data = {\n", 530 | " \"id\": id_,\n", 531 | " \"slug\": slug_,\n", 532 | " \"path\": path,\n", 533 | " \"scraped\": 0 # True / False -> 1 / 0 \n", 534 | " }\n", 535 | " datas.append(data)" 536 | ] 537 | }, 538 | { 539 | "cell_type": "code", 540 | "execution_count": 21, 541 | "metadata": {}, 542 | "outputs": [ 543 | { 544 | "data": { 545 | "text/html": [ 546 | "
\n", 547 | "\n", 560 | "\n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | "
idslugpathscraped
08692520bees-lemons-large-blue-by-fernlesliestudio/en/fabric/8692520-bees-lemons-large-blue-by-f...0
14352750loteria-by-jellymania/en/fabric/4352750-loteria-by-jellymania0
27137786genevieve-floral-by-crystal_walen/en/fabric/7137786-genevieve-floral-by-crystal...0
36650975love-nurse-charcoal-gray-by-phyllisdobbs/en/fabric/6650975-love-nurse-charcoal-gray-by...0
45544045napoleonic-bees-faux-gilt-on-blackest-black-by.../en/fabric/5544045-napoleonic-bees-faux-gilt-o...0
\n", 608 | "
" 609 | ], 610 | "text/plain": [ 611 | " id slug \\\n", 612 | "0 8692520 bees-lemons-large-blue-by-fernlesliestudio \n", 613 | "1 4352750 loteria-by-jellymania \n", 614 | "2 7137786 genevieve-floral-by-crystal_walen \n", 615 | "3 6650975 love-nurse-charcoal-gray-by-phyllisdobbs \n", 616 | "4 5544045 napoleonic-bees-faux-gilt-on-blackest-black-by... \n", 617 | "\n", 618 | " path scraped \n", 619 | "0 /en/fabric/8692520-bees-lemons-large-blue-by-f... 0 \n", 620 | "1 /en/fabric/4352750-loteria-by-jellymania 0 \n", 621 | "2 /en/fabric/7137786-genevieve-floral-by-crystal... 0 \n", 622 | "3 /en/fabric/6650975-love-nurse-charcoal-gray-by... 0 \n", 623 | "4 /en/fabric/5544045-napoleonic-bees-faux-gilt-o... 0 " 624 | ] 625 | }, 626 | "execution_count": 21, 627 | "metadata": {}, 628 | "output_type": "execute_result" 629 | } 630 | ], 631 | "source": [ 632 | "df = pd.DataFrame(datas)\n", 633 | "df.head()" 634 | ] 635 | }, 636 | { 637 | "cell_type": "code", 638 | "execution_count": 22, 639 | "metadata": {}, 640 | "outputs": [], 641 | "source": [ 642 | "df.to_csv(\"local.csv\", index=False)" 643 | ] 644 | }, 645 | { 646 | "cell_type": "code", 647 | "execution_count": 23, 648 | "metadata": {}, 649 | "outputs": [ 650 | { 651 | "data": { 652 | "text/html": [ 653 | "
\n", 654 | "\n", 667 | "\n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | "
idslugpathscraped
08692520bees-lemons-large-blue-by-fernlesliestudio/en/fabric/8692520-bees-lemons-large-blue-by-f...0
14352750loteria-by-jellymania/en/fabric/4352750-loteria-by-jellymania0
27137786genevieve-floral-by-crystal_walen/en/fabric/7137786-genevieve-floral-by-crystal...0
36650975love-nurse-charcoal-gray-by-phyllisdobbs/en/fabric/6650975-love-nurse-charcoal-gray-by...0
45544045napoleonic-bees-faux-gilt-on-blackest-black-by.../en/fabric/5544045-napoleonic-bees-faux-gilt-o...0
...............
793840217nurse-theme-by-hot4tees_bg-yahoo_com/en/fabric/3840217-nurse-theme-by-hot4tees_bg-...0
806864327love-lips-red-by-hipkiddesigns/en/fabric/6864327-love-lips-red-by-hipkiddesigns0
816650888love-nurse-whimsy-blue-by-phyllisdobbs/en/fabric/6650888-love-nurse-whimsy-blue-by-p...0
825964319hearts-on-grey-linen-valentines-day-by-littlea.../en/fabric/5964319-hearts-on-grey-linen-valent...0
831096407skull-wall-by-ben_goetting/en/fabric/1096407-skull-wall-by-ben_goetting0
\n", 757 | "

84 rows × 4 columns

\n", 758 | "
" 759 | ], 760 | "text/plain": [ 761 | " id slug \\\n", 762 | "0 8692520 bees-lemons-large-blue-by-fernlesliestudio \n", 763 | "1 4352750 loteria-by-jellymania \n", 764 | "2 7137786 genevieve-floral-by-crystal_walen \n", 765 | "3 6650975 love-nurse-charcoal-gray-by-phyllisdobbs \n", 766 | "4 5544045 napoleonic-bees-faux-gilt-on-blackest-black-by... \n", 767 | ".. ... ... \n", 768 | "79 3840217 nurse-theme-by-hot4tees_bg-yahoo_com \n", 769 | "80 6864327 love-lips-red-by-hipkiddesigns \n", 770 | "81 6650888 love-nurse-whimsy-blue-by-phyllisdobbs \n", 771 | "82 5964319 hearts-on-grey-linen-valentines-day-by-littlea... \n", 772 | "83 1096407 skull-wall-by-ben_goetting \n", 773 | "\n", 774 | " path scraped \n", 775 | "0 /en/fabric/8692520-bees-lemons-large-blue-by-f... 0 \n", 776 | "1 /en/fabric/4352750-loteria-by-jellymania 0 \n", 777 | "2 /en/fabric/7137786-genevieve-floral-by-crystal... 0 \n", 778 | "3 /en/fabric/6650975-love-nurse-charcoal-gray-by... 0 \n", 779 | "4 /en/fabric/5544045-napoleonic-bees-faux-gilt-o... 0 \n", 780 | ".. ... ... \n", 781 | "79 /en/fabric/3840217-nurse-theme-by-hot4tees_bg-... 0 \n", 782 | "80 /en/fabric/6864327-love-lips-red-by-hipkiddesigns 0 \n", 783 | "81 /en/fabric/6650888-love-nurse-whimsy-blue-by-p... 0 \n", 784 | "82 /en/fabric/5964319-hearts-on-grey-linen-valent... 0 \n", 785 | "83 /en/fabric/1096407-skull-wall-by-ben_goetting 0 \n", 786 | "\n", 787 | "[84 rows x 4 columns]" 788 | ] 789 | }, 790 | "execution_count": 23, 791 | "metadata": {}, 792 | "output_type": "execute_result" 793 | } 794 | ], 795 | "source": [ 796 | "pd.read_csv(\"local.csv\")" 797 | ] 798 | }, 799 | { 800 | "cell_type": "markdown", 801 | "metadata": {}, 802 | "source": [ 803 | "## Asynchronous Scraping with `chromedriver` and `arsenic`\n", 804 | "\n", 805 | "[arsenic Docs](https://arsenic.readthedocs.io/en/latest/)" 806 | ] 807 | }, 808 | { 809 | "cell_type": "code", 810 | "execution_count": 24, 811 | "metadata": {}, 812 | "outputs": [], 813 | "source": [ 814 | "# !pip install arsenic" 815 | ] 816 | }, 817 | { 818 | "cell_type": "code", 819 | "execution_count": 41, 820 | "metadata": {}, 821 | "outputs": [ 822 | { 823 | "name": "stdout", 824 | "output_type": "stream", 825 | "text": [ 826 | "Overwriting async_scrape.py\n" 827 | ] 828 | } 829 | ], 830 | "source": [ 831 | "%%writefile async_scrape.py\n", 832 | "\n", 833 | "import os\n", 834 | "import asyncio\n", 835 | "from arsenic import get_session, keys, browsers, services\n", 836 | "import pandas as pd\n", 837 | "from requests_html import HTML\n", 838 | "import itertools\n", 839 | "import re\n", 840 | "import time\n", 841 | "import pathlib\n", 842 | "\n", 843 | "\n", 844 | "# /en/fabric/7137786-genevieve-floral-by-crystal_walen\n", 845 | "async def extract_id_slug(url_path):\n", 846 | " regex = r\"^[^\\s]+/(?P\\d+)-(?P[\\w_-]+)$\"\n", 847 | " group = re.match(regex, url_path)\n", 848 | " if not group:\n", 849 | " return None, None\n", 850 | " return group['id'], group['slug']\n", 851 | "\n", 852 | "\n", 853 | "\n", 854 | "async def get_links(body_content):\n", 855 | " html_r = HTML(html=body_content)\n", 856 | " fabric_links = [x for x in list(html_r.links) if x.startswith(\"/en/fabric\")]\n", 857 | " datas = []\n", 858 | " for path in fabric_links:\n", 859 | " id_, slug_ = await extract_id_slug(path)\n", 860 | " data = {\n", 861 | " \"id\": id_,\n", 862 | " \"slug\": slug_,\n", 863 | " \"path\": path,\n", 864 | " \"scraped\": 0 # True / False -> 1 / 0 \n", 865 | " }\n", 866 | " datas.append(data)\n", 867 | " return datas\n", 868 | "\n", 869 | "async def scraper(url):\n", 870 | " service = services.Chromedriver()\n", 871 | " browser = browsers.Chrome(chromeOptions={\n", 872 | " 'args': ['--headless', '--disable-gpu']\n", 873 | " })\n", 874 | " async with get_session(service, browser) as session:\n", 875 | " await session.get(url)\n", 876 | " body = await session.get_page_source()\n", 877 | " # print(body)\n", 878 | " return body\n", 879 | "\n", 880 | "async def store_links_as_df_pickle(datas=[], name='links.pkl'):\n", 881 | " df = pd.DataFrame(datas)\n", 882 | " df.set_index('id', drop=True, inplace=True)\n", 883 | " df.to_pickle(name)\n", 884 | " return df\n", 885 | " \n", 886 | " \n", 887 | "async def run(url):\n", 888 | " body_content = await scraper(url)\n", 889 | " links = await get_links(body_content)\n", 890 | " df = await store_links_as_df_pickle(links)\n", 891 | " return links\n", 892 | " \n", 893 | "if __name__ == \"__main__\":\n", 894 | " url = 'https://www.spoonflower.com/en/shop?on=fabric'\n", 895 | " results = asyncio.run(run(url))\n", 896 | " print(results)\n" 897 | ] 898 | }, 899 | { 900 | "cell_type": "code", 901 | "execution_count": 43, 902 | "metadata": {}, 903 | "outputs": [ 904 | { 905 | "name": "stdout", 906 | "output_type": "stream", 907 | "text": [ 908 | "https://www.spoonflower.com/en/shop?on=fabric\r\n" 909 | ] 910 | } 911 | ], 912 | "source": [ 913 | "!python async_scrape.py" 914 | ] 915 | }, 916 | { 917 | "cell_type": "code", 918 | "execution_count": 36, 919 | "metadata": {}, 920 | "outputs": [ 921 | { 922 | "data": { 923 | "text/html": [ 924 | "
\n", 925 | "\n", 938 | "\n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | "
slugpathscraped
id
7137786genevieve-floral-by-crystal_walen/en/fabric/7137786-genevieve-floral-by-crystal...0
4893900half-scale-m81-woodland-camo-by-ricraynor/en/fabric/4893900-half-scale-m81-woodland-cam...0
7661255just-jellies-jellyfish-by-katerhees/en/fabric/7661255-just-jellies-jellyfish-by-k...0
503435680s-hair-dryers-by-cjldesigns/en/fabric/5034356-80s-hair-dryers-by-cjldesigns0
2623675black-white-music-notes-by-inspirationz/en/fabric/2623675-black-white-music-notes-by-...0
\n", 986 | "
" 987 | ], 988 | "text/plain": [ 989 | " slug \\\n", 990 | "id \n", 991 | "7137786 genevieve-floral-by-crystal_walen \n", 992 | "4893900 half-scale-m81-woodland-camo-by-ricraynor \n", 993 | "7661255 just-jellies-jellyfish-by-katerhees \n", 994 | "5034356 80s-hair-dryers-by-cjldesigns \n", 995 | "2623675 black-white-music-notes-by-inspirationz \n", 996 | "\n", 997 | " path scraped \n", 998 | "id \n", 999 | "7137786 /en/fabric/7137786-genevieve-floral-by-crystal... 0 \n", 1000 | "4893900 /en/fabric/4893900-half-scale-m81-woodland-cam... 0 \n", 1001 | "7661255 /en/fabric/7661255-just-jellies-jellyfish-by-k... 0 \n", 1002 | "5034356 /en/fabric/5034356-80s-hair-dryers-by-cjldesigns 0 \n", 1003 | "2623675 /en/fabric/2623675-black-white-music-notes-by-... 0 " 1004 | ] 1005 | }, 1006 | "execution_count": 36, 1007 | "metadata": {}, 1008 | "output_type": "execute_result" 1009 | } 1010 | ], 1011 | "source": [ 1012 | "name = 'links.pkl'\n", 1013 | "df = pd.read_pickle(name)\n", 1014 | "df.head()" 1015 | ] 1016 | }, 1017 | { 1018 | "cell_type": "code", 1019 | "execution_count": 33, 1020 | "metadata": {}, 1021 | "outputs": [ 1022 | { 1023 | "data": { 1024 | "text/plain": [ 1025 | "(84, 3)" 1026 | ] 1027 | }, 1028 | "execution_count": 33, 1029 | "metadata": {}, 1030 | "output_type": "execute_result" 1031 | } 1032 | ], 1033 | "source": [ 1034 | "df.shape" 1035 | ] 1036 | }, 1037 | { 1038 | "cell_type": "code", 1039 | "execution_count": null, 1040 | "metadata": {}, 1041 | "outputs": [], 1042 | "source": [] 1043 | } 1044 | ], 1045 | "metadata": { 1046 | "kernelspec": { 1047 | "display_name": "Python 3", 1048 | "language": "python", 1049 | "name": "python3" 1050 | }, 1051 | "language_info": { 1052 | "codemirror_mode": { 1053 | "name": "ipython", 1054 | "version": 3 1055 | }, 1056 | "file_extension": ".py", 1057 | "mimetype": "text/x-python", 1058 | "name": "python", 1059 | "nbconvert_exporter": "python", 1060 | "pygments_lexer": "ipython3", 1061 | "version": "3.8.2" 1062 | } 1063 | }, 1064 | "nbformat": 4, 1065 | "nbformat_minor": 4 1066 | } 1067 | -------------------------------------------------------------------------------- /local.csv: -------------------------------------------------------------------------------- 1 | id,slug,path,scraped 2 | 3840217,nurse-theme-by-hot4tees_bg-yahoo_com,/en/fabric/3840217-nurse-theme-by-hot4tees_bg-yahoo_com,0 3 | 8619105,flight-feathers-painted-by-xoxotique,/en/fabric/8619105-flight-feathers-painted-by-xoxotique,0 4 | 7662668,mermaid-music-by-ceciliamok,/en/fabric/7662668-mermaid-music-by-ceciliamok,0 5 | 5312944,thank-being-friend-small-by-elladorine,/en/fabric/5312944-thank-being-friend-small-by-elladorine,0 6 | 4893900,half-scale-m81-woodland-camo-by-ricraynor,/en/fabric/4893900-half-scale-m81-woodland-camo-by-ricraynor,0 7 | 509390,spoonflower-color-map-by-spoonflower_help,/en/fabric/509390-spoonflower-color-map-by-spoonflower_help,0 8 | 8039248,forest-animal-hot-air-balloon-night-adventure-by-at_the_cottage,/en/fabric/8039248-forest-animal-hot-air-balloon-night-adventure-by-at_the_cottage,0 9 | 2623675,black-white-music-notes-by-inspirationz,/en/fabric/2623675-black-white-music-notes-by-inspirationz,0 10 | 7812388,dnd-pattern-by-neonborealis,/en/fabric/7812388-dnd-pattern-by-neonborealis,0 11 | 5839396,spectacular-cats-by-cynthia_arre,/en/fabric/5839396-spectacular-cats-by-cynthia_arre,0 12 | 4352750,loteria-by-jellymania,/en/fabric/4352750-loteria-by-jellymania,0 13 | 5964319,hearts-on-grey-linen-valentines-day-by-littlearrowdesign,/en/fabric/5964319-hearts-on-grey-linen-valentines-day-by-littlearrowdesign,0 14 | 7580754,ibd-gracie-grace-golden-jumbo-by-indybloomdesign,/en/fabric/7580754-ibd-gracie-grace-golden-jumbo-by-indybloomdesign,0 15 | 4888888,flowers-skulls-by-elladorine,/en/fabric/4888888-flowers-skulls-by-elladorine,0 16 | 7216659,rainbow-stars-watercolor-abstract-small-by-crystal_walen,/en/fabric/7216659-rainbow-stars-watercolor-abstract-small-by-crystal_walen,0 17 | 5247883,hexo-blue-med-by-nouveau_bohemian,/en/fabric/5247883-hexo-blue-med-by-nouveau_bohemian,0 18 | 6650975,love-nurse-charcoal-gray-by-phyllisdobbs,/en/fabric/6650975-love-nurse-charcoal-gray-by-phyllisdobbs,0 19 | 5513692,salon-barber-hairdresser-pattern-by-cloudycapevintage,/en/fabric/5513692-salon-barber-hairdresser-pattern-by-cloudycapevintage,0 20 | 7790777,turtles-aqua-blue-by-gingerlique,/en/fabric/7790777-turtles-aqua-blue-by-gingerlique,0 21 | 6650888,love-nurse-whimsy-blue-by-phyllisdobbs,/en/fabric/6650888-love-nurse-whimsy-blue-by-phyllisdobbs,0 22 | 7698482,scrubs-dr-stetho-scope-by-adrianne_vanalstine,/en/fabric/7698482-scrubs-dr-stetho-scope-by-adrianne_vanalstine,0 23 | 5131007,scandinavian-sweet-hedgehog-illustration-kids-gender-neutral-black-white-by-littlesmilemakers,/en/fabric/5131007-scandinavian-sweet-hedgehog-illustration-kids-gender-neutral-black-white-by-littlesmilemakers,0 24 | 5033660,80s-accessories-by-diannemehta,/en/fabric/5033660-80s-accessories-by-diannemehta,0 25 | 4270747,happy-hair-stylist-friends-blue-by-clayvision_-_ahappybluetree,/en/fabric/4270747-happy-hair-stylist-friends-blue-by-clayvision_-_ahappybluetree,0 26 | 7137898,sierra-floral-by-crystal_walen,/en/fabric/7137898-sierra-floral-by-crystal_walen,0 27 | 9240316,irish-notre-dame-irish-fabric-by-charlottewinter,/en/fabric/9240316-irish-notre-dame-irish-fabric-by-charlottewinter,0 28 | 6573088,whale-s-song-by-katherine_quinn,/en/fabric/6573088-whale-s-song-by-katherine_quinn,0 29 | 5773745,rainbow-pride-stripes-by-furbuddy,/en/fabric/5773745-rainbow-pride-stripes-by-furbuddy,0 30 | 6590171,mermaid-scales-by-elladorine,/en/fabric/6590171-mermaid-scales-by-elladorine,0 31 | 8197261,night-sky-stars-midnight-blue-by-at_the_cottage,/en/fabric/8197261-night-sky-stars-midnight-blue-by-at_the_cottage,0 32 | 5544045,napoleonic-bees-faux-gilt-on-blackest-black-by-peacoquettedesigns,/en/fabric/5544045-napoleonic-bees-faux-gilt-on-blackest-black-by-peacoquettedesigns,0 33 | 5532389,blue-white-camouflage-pattern-by-artpics,/en/fabric/5532389-blue-white-camouflage-pattern-by-artpics,0 34 | 7137786,genevieve-floral-by-crystal_walen,/en/fabric/7137786-genevieve-floral-by-crystal_walen,0 35 | 2632362,colorful-happy-smiley-face-squares-large-print-by-inspirationz,/en/fabric/2632362-colorful-happy-smiley-face-squares-large-print-by-inspirationz,0 36 | 5588706,black-lives-matter-small-scale-by-ashleysummersdesign,/en/fabric/5588706-black-lives-matter-small-scale-by-ashleysummersdesign,0 37 | 6444170,catching-fireflies-by-thestorysmith,/en/fabric/6444170-catching-fireflies-by-thestorysmith,0 38 | 7679631,scattered-earth-tones-watercolor-rainbows-by-anniemontgomerydesign,/en/fabric/7679631-scattered-earth-tones-watercolor-rainbows-by-anniemontgomerydesign,0 39 | 8692520,bees-lemons-large-blue-by-fernlesliestudio,/en/fabric/8692520-bees-lemons-large-blue-by-fernlesliestudio,0 40 | 7368347,dear-clementine-oranges-teal-by-crystal_walen,/en/fabric/7368347-dear-clementine-oranges-teal-by-crystal_walen,0 41 | 5469666,galaxy-far-far-away-gray-by-studiofibonacci,/en/fabric/5469666-galaxy-far-far-away-gray-by-studiofibonacci,0 42 | 1577333,marine-marpat-digital-woodland-camo-by-ricraynor,/en/fabric/1577333-marine-marpat-digital-woodland-camo-by-ricraynor,0 43 | 2623792,purple-space-stars-small-print-by-inspirationz,/en/fabric/2623792-purple-space-stars-small-print-by-inspirationz,0 44 | 9119650,horror-friends-by-mariospeedwagon,/en/fabric/9119650-horror-friends-by-mariospeedwagon,0 45 | 1306112,opal-by-peacoquettedesigns,/en/fabric/1306112-opal-by-peacoquettedesigns,0 46 | 7984669,8-love-care-medical-white-by-rebelmod,/en/fabric/7984669-8-love-care-medical-white-by-rebelmod,0 47 | 6079351,josie-meadow-floral-by-sweeterthanhoney,/en/fabric/6079351-josie-meadow-floral-by-sweeterthanhoney,0 48 | 5880084,mod-triangles-gold-indigo-by-crystal_walen,/en/fabric/5880084-mod-triangles-gold-indigo-by-crystal_walen,0 49 | 5034356,80s-hair-dryers-by-cjldesigns,/en/fabric/5034356-80s-hair-dryers-by-cjldesigns,0 50 | 6782514,eame-s-wildflower-meadow-by-hipkiddesigns,/en/fabric/6782514-eame-s-wildflower-meadow-by-hipkiddesigns,0 51 | 7661255,just-jellies-jellyfish-by-katerhees,/en/fabric/7661255-just-jellies-jellyfish-by-katerhees,0 52 | 5048115,mexican-blanket-by-anchored_by_love,/en/fabric/5048115-mexican-blanket-by-anchored_by_love,0 53 | 7984649,8-love-care-medical-black-by-rebelmod,/en/fabric/7984649-8-love-care-medical-black-by-rebelmod,0 54 | 8286001,hanging-out-by-sarah_knight,/en/fabric/8286001-hanging-out-by-sarah_knight,0 55 | 7685381,dragon-fire-by-adenaj,/en/fabric/7685381-dragon-fire-by-adenaj,0 56 | 7236018,australian-native-eucalyptus-leaves-edition-1-australiana-fabric-wallpaper-by-erin__kendal,/en/fabric/7236018-australian-native-eucalyptus-leaves-edition-1-australiana-fabric-wallpaper-by-erin__kendal,0 57 | 5939834,classic-leopard-by-cinneworthington,/en/fabric/5939834-classic-leopard-by-cinneworthington,0 58 | 3584004,mexican-sugar-skulls-small-by-lusykoror,/en/fabric/3584004-mexican-sugar-skulls-small-by-lusykoror,0 59 | 6178734,fable-floral-blush-med-by-nouveau_bohemian,/en/fabric/6178734-fable-floral-blush-med-by-nouveau_bohemian,0 60 | 1096407,skull-wall-by-ben_goetting,/en/fabric/1096407-skull-wall-by-ben_goetting,0 61 | 9060289,saints-fleur-de-lis-new-orleans-saints-football-football-fabric-fleur-de-lis-fabric-black-gold-gold-f-by-charlottewinter,/en/fabric/9060289-saints-fleur-de-lis-new-orleans-saints-football-football-fabric-fleur-de-lis-fabric-black-gold-gold-f-by-charlottewinter,0 62 | 8056679,ruth-bader-ginsgurg-rbg-bust-black-by-katerhees,/en/fabric/8056679-ruth-bader-ginsgurg-rbg-bust-black-by-katerhees,0 63 | 7522587,save-honey-bees-large-new-by-fernlesliestudio,/en/fabric/7522587-save-honey-bees-large-new-by-fernlesliestudio,0 64 | 991112,nursing-coordinates-by-bluevelvet,/en/fabric/991112-nursing-coordinates-by-bluevelvet,0 65 | 5514018,hair-cutting-shears-by-cloudycapevintage,/en/fabric/5514018-hair-cutting-shears-by-cloudycapevintage,0 66 | 7587085,heres-heart-navy-white-gold2-sketch-1-by-doodleandcharm_,/en/fabric/7587085-heres-heart-navy-white-gold2-sketch-1-by-doodleandcharm_,0 67 | 3817098,math-count-on-by-sammyk,/en/fabric/3817098-math-count-on-by-sammyk,0 68 | 8618597,rainbow-watercolor-pawprints-by-dragonstarart,/en/fabric/8618597-rainbow-watercolor-pawprints-by-dragonstarart,0 69 | 2330040,maryland-flags-by-elramsay,/en/fabric/2330040-maryland-flags-by-elramsay,0 70 | 6812243,cute-kawaii-sushi-small-size-by-penguinhouse,/en/fabric/6812243-cute-kawaii-sushi-small-size-by-penguinhouse,0 71 | 7944022,golden-girls-illustration-peach-by-yesterdaycollection,/en/fabric/7944022-golden-girls-illustration-peach-by-yesterdaycollection,0 72 | 7463028,seamless-watercolor-larger-leaves-pattern-1-by-daily_miracles,/en/fabric/7463028-seamless-watercolor-larger-leaves-pattern-1-by-daily_miracles,0 73 | 4995362,heart-health-awareness-black-by-ohdarkthirty,/en/fabric/4995362-heart-health-awareness-black-by-ohdarkthirty,0 74 | 4830872,cherry-blossom-watercolor-cherry-blossom-floral-by-magentarosedesigns,/en/fabric/4830872-cherry-blossom-watercolor-cherry-blossom-floral-by-magentarosedesigns,0 75 | 1112778,rosie-riveter-by-spacefem,/en/fabric/1112778-rosie-riveter-by-spacefem,0 76 | 6852245,cute-nurse-love-black-no-gradient-by-jannasalak,/en/fabric/6852245-cute-nurse-love-black-no-gradient-by-jannasalak,0 77 | 2920223,m81-woodland-camo-by-ricraynor,/en/fabric/2920223-m81-woodland-camo-by-ricraynor,0 78 | 6864327,love-lips-red-by-hipkiddesigns,/en/fabric/6864327-love-lips-red-by-hipkiddesigns,0 79 | 5700186,puzzle-hearts-by-designedbygeeks,/en/fabric/5700186-puzzle-hearts-by-designedbygeeks,0 80 | 4981816,black-white-dogs-by-littleislandcompany,/en/fabric/4981816-black-white-dogs-by-littleislandcompany,0 81 | 9453318,african-american-girls-retro-pop-art-by-whimsical_brush,/en/fabric/9453318-african-american-girls-retro-pop-art-by-whimsical_brush,0 82 | 6327300,call-mountains-evergreen-med-by-nouveau_bohemian,/en/fabric/6327300-call-mountains-evergreen-med-by-nouveau_bohemian,0 83 | 6263258,navy-blue-watercolor-herringbone-by-laurapol,/en/fabric/6263258-navy-blue-watercolor-herringbone-by-laurapol,0 84 | 6715163,8-wild-heart-florals-white-by-shopcabin,/en/fabric/6715163-8-wild-heart-florals-white-by-shopcabin,0 85 | 4995555,heart-health-awareness-light-gray-large-by-ohdarkthirty,/en/fabric/4995555-heart-health-awareness-light-gray-large-by-ohdarkthirty,0 86 | -------------------------------------------------------------------------------- /pyvenv.cfg: -------------------------------------------------------------------------------- 1 | home = /Library/Frameworks/Python.framework/Versions/3.6/bin 2 | include-system-site-packages = false 3 | version = 3.6.8 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | jupyter 2 | fire 3 | sqlalchemy 4 | pandas 5 | arsenic 6 | requests-html -------------------------------------------------------------------------------- /spoonflower_fabrics.csv: -------------------------------------------------------------------------------- 1 | ,id,slug,path,title,size,price,priceCurrency,priceValidUntil 2 | 0,6444170,catching-fireflies-by-thestorysmith,/en/fabric/6444170-catching-fireflies-by-thestorysmith,Catching Fireflies,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 3 | 1,3584004,mexican-sugar-skulls-small-by-lusykoror,/en/fabric/3584004-mexican-sugar-skulls-small-by-lusykoror,Mexican Sugar Skulls (small),"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 4 | 2,7984669,8-love-care-medical-white-by-rebelmod,/en/fabric/7984669-8-love-care-medical-white-by-rebelmod,"8"" Love to Care Medical - White","Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 5 | 3,8619105,flight-feathers-painted-by-xoxotique,/en/fabric/8619105-flight-feathers-painted-by-xoxotique,Flight of Feathers Painted,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 6 | 4,7944022,golden-girls-illustration-peach-by-yesterdaycollection,/en/fabric/7944022-golden-girls-illustration-peach-by-yesterdaycollection,Golden Girls Illustration in Peach,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 7 | 5,5544045,napoleonic-bees-faux-gilt-on-blackest-black-by-peacoquettedesigns,/en/fabric/5544045-napoleonic-bees-faux-gilt-on-blackest-black-by-peacoquettedesigns,Napoleonic Bees ~ Faux Gilt on Blackest Black,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 8 | 6,5939834,classic-leopard-by-cinneworthington,/en/fabric/5939834-classic-leopard-by-cinneworthington,classic leopard,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 9 | 7,8618597,rainbow-watercolor-pawprints-by-dragonstarart,/en/fabric/8618597-rainbow-watercolor-pawprints-by-dragonstarart,Rainbow Watercolor Pawprints,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 10 | 8,4893900,half-scale-m81-woodland-camo-by-ricraynor,/en/fabric/4893900-half-scale-m81-woodland-camo-by-ricraynor,Half Scale M81 Woodland Camo,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 11 | 9,5880084,mod-triangles-gold-indigo-by-crystal_walen,/en/fabric/5880084-mod-triangles-gold-indigo-by-crystal_walen,Mod Triangles Gold Indigo,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 12 | 10,4534585,super-words-dark-by-robyriker,/en/fabric/4534585-super-words-dark-by-robyriker,Super Words! (Dark),"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 13 | 11,5514018,hair-cutting-shears-by-cloudycapevintage,/en/fabric/5514018-hair-cutting-shears-by-cloudycapevintage,Hair Cutting Shears,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 14 | 12,4995555,heart-health-awareness-light-gray-large-by-ohdarkthirty,/en/fabric/4995555-heart-health-awareness-light-gray-large-by-ohdarkthirty,Heart Health Awareness - Light Gray (large),"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 15 | 13,991112,nursing-coordinates-by-bluevelvet,/en/fabric/991112-nursing-coordinates-by-bluevelvet,NURSING_COORDINATES,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 16 | 14,9240316,irish-navy-green-shamrock-fabric-by-charlottewinter,/en/fabric/9240316-irish-navy-green-shamrock-fabric-by-charlottewinter,irish - navy and green shamrock fabric,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 17 | 15,5839396,spectacular-cats-by-cynthia_arre,/en/fabric/5839396-spectacular-cats-by-cynthia_arre,Spectacular Cats,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 18 | 16,1306112,opal-by-peacoquettedesigns,/en/fabric/1306112-opal-by-peacoquettedesigns,Opal,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 19 | 17,8197261,night-sky-stars-midnight-blue-by-at_the_cottage,/en/fabric/8197261-night-sky-stars-midnight-blue-by-at_the_cottage,Night Sky Stars Midnight Blue,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 20 | 18,7790777,turtles-aqua-blue-by-gingerlique,/en/fabric/7790777-turtles-aqua-blue-by-gingerlique,Turtles in Aqua and Blue,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 21 | 19,5033660,80s-accessories-by-diannemehta,/en/fabric/5033660-80s-accessories-by-diannemehta,80s Accessories,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 22 | 20,6327300,call-mountains-evergreen-med-by-nouveau_bohemian,/en/fabric/6327300-call-mountains-evergreen-med-by-nouveau_bohemian,Call of the Mountains (evergreen) MED,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 23 | 21,2632362,colorful-happy-smiley-face-squares-large-print-by-inspirationz,/en/fabric/2632362-colorful-happy-smiley-face-squares-large-print-by-inspirationz,Colorful Happy Smiley face Squares (large print),"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 24 | 22,7236018,australian-native-eucalyptus-leaves-edition-1-australiana-fabric-wallpaper-by-erin__kendal,/en/fabric/7236018-australian-native-eucalyptus-leaves-edition-1-australiana-fabric-wallpaper-by-erin__kendal,Australian Native Eucalyptus Leaves || Edition 1 || Australiana Fabric Wallpaper,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 25 | 23,4830872,cherry-blossom-watercolor-cherry-blossom-floral-by-magentarosedesigns,/en/fabric/4830872-cherry-blossom-watercolor-cherry-blossom-floral-by-magentarosedesigns,cherry blossom watercolor // cherry blossom floral,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 26 | 24,7580754,ibd-gracie-grace-golden-jumbo-by-indybloomdesign,/en/fabric/7580754-ibd-gracie-grace-golden-jumbo-by-indybloomdesign,IBD Gracie Grace Golden Jumbo,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 27 | 25,7679631,scattered-earth-tones-watercolor-rainbows-by-anniemontgomerydesign,/en/fabric/7679631-scattered-earth-tones-watercolor-rainbows-by-anniemontgomerydesign,Scattered Earth Tones Watercolor Rainbows,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 28 | 26,7463028,seamless-watercolor-larger-leaves-pattern-1-by-daily_miracles,/en/fabric/7463028-seamless-watercolor-larger-leaves-pattern-1-by-daily_miracles,SEAMLESS watercolor Larger leaves Pattern-1,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 29 | 27,6864327,love-lips-red-by-hipkiddesigns,/en/fabric/6864327-love-lips-red-by-hipkiddesigns,Love Lips // Red,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 30 | 28,4981816,black-white-dogs-by-littleislandcompany,/en/fabric/4981816-black-white-dogs-by-littleislandcompany,Black and White Dogs,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 31 | 29,2623792,purple-space-stars-small-print-by-inspirationz,/en/fabric/2623792-purple-space-stars-small-print-by-inspirationz,Purple Space Stars (small print),"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 32 | 30,5247883,hexo-blue-med-by-nouveau_bohemian,/en/fabric/5247883-hexo-blue-med-by-nouveau_bohemian,Hexo (blue) MED,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 33 | 31,7587209,8-woodland-trees-white-by-shopcabin,/en/fabric/7587209-8-woodland-trees-white-by-shopcabin,"8"" Woodland Trees - White","Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 34 | 32,5734850,dachshund-floral-vintage-flowers-doxie-fabric-doxie-dachshunds-design-cute-doxie-dog-by-petfriendly,/en/fabric/5734850-dachshund-floral-vintage-flowers-doxie-fabric-doxie-dachshunds-design-cute-doxie-dog-by-petfriendly,dachshund floral vintage flowers doxie fabric doxie dachshunds design cute doxie dog,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 35 | 33,7076547,small-scale-vintage-moroccan-dusty-blue-by-littlearrowdesign,/en/fabric/7076547-small-scale-vintage-moroccan-dusty-blue-by-littlearrowdesign,(small scale) vintage moroccan - dusty blue,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 36 | 34,7516450,asian-elephant-paisley-raindrops-by-honoluludesigns,/en/fabric/7516450-asian-elephant-paisley-raindrops-by-honoluludesigns,Asian Elephant Paisley Raindrops,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 37 | 35,7502677,fable-floral-blush-jumbo-by-nouveau_bohemian,/en/fabric/7502677-fable-floral-blush-jumbo-by-nouveau_bohemian,Fable Floral (blush) JUMBO,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 38 | 36,7772667,chibi-plague-doctors-black-on-white-by-ameliae,/en/fabric/7772667-chibi-plague-doctors-black-on-white-by-ameliae,Chibi Plague Doctors Black on White,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 39 | 37,6700493,mod-triangles-white-gold-by-crystal_walen,/en/fabric/6700493-mod-triangles-white-gold-by-crystal_walen,Mod Triangles - white + gold,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 40 | 38,1251898,maddox-ombre-stars-stripes-by-veritymaddox,/en/fabric/1251898-maddox-ombre-stars-stripes-by-veritymaddox,Maddox ombre stars and stripes,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 41 | 39,9612900,spring-pink-floral-highland-cow-big-54x36-inches-by-karolina_papiez,/en/fabric/9612900-spring-pink-floral-highland-cow-big-54x36-inches-by-karolina_papiez,spring pink floral highland cow - big 54x36 inches,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 42 | 40,5840025,sea-birds-navy-by-kirstenkatz,/en/fabric/5840025-sea-birds-navy-by-kirstenkatz,Sea Birds Navy,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 43 | 41,9914889,cat-noodle-navy-white-cute-cats-by-patricia_lima,/en/fabric/9914889-cat-noodle-navy-white-cute-cats-by-patricia_lima,Cat Noodle - Navy and White Cute Cats,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 44 | 42,7591282,cute-nurse-love-gray-no-gradient-by-jannasalak,/en/fabric/7591282-cute-nurse-love-gray-no-gradient-by-jannasalak,Cute Nurse Love Gray- No Gradient,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 45 | 43,7089223,tattoo-me-tattoo-by-stitchyrichie,/en/fabric/7089223-tattoo-me-tattoo-by-stitchyrichie,Tattoo me Tattoo you,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 46 | 44,4775911,philippines-flag-by-flagfabric,/en/fabric/4775911-philippines-flag-by-flagfabric,Philippines flag,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 47 | 45,2131603,solid-light-teal-by-misstiina,/en/fabric/2131603-solid-light-teal-by-misstiina,solid light teal,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 48 | 46,1384442,wildflowers-by-heatherross,/en/fabric/1384442-wildflowers-by-heatherross,wildflowers,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 49 | 47,7814050,lemon-blossoms-by-laurapol,/en/fabric/7814050-lemon-blossoms-by-laurapol,Lemon Blossoms,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 50 | 48,7905790,mid-century-droplets-kaleidoscope-large-by-ceciliamok,/en/fabric/7905790-mid-century-droplets-kaleidoscope-large-by-ceciliamok,Mid Century Droplets {Kaleidoscope} - large,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 51 | 49,4286392,happy-teeth-friends-light-teal-by-clayvision_-_ahappybluetree,/en/fabric/4286392-happy-teeth-friends-light-teal-by-clayvision_-_ahappybluetree,Happy Teeth & Friends - Light Teal,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 52 | 50,7540781,little-multicolored-dinos-on-white-by-micklyn,/en/fabric/7540781-little-multicolored-dinos-on-white-by-micklyn,Little Multicolored Dinos on White,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 53 | 51,7009777,rainbow-llamas-by-littlearrowdesign,/en/fabric/7009777-rainbow-llamas-by-littlearrowdesign,rainbow llamas,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 54 | 52,7967284,12-woodland-animals-baby-animals-forest-light-background-by-utart,/en/fabric/7967284-12-woodland-animals-baby-animals-forest-light-background-by-utart,"12"" Woodland Animals - Baby Animals in Forest light background","Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 55 | 53,9069096,black-cat-butterfly-garden-by-boszorka,/en/fabric/9069096-black-cat-butterfly-garden-by-boszorka,Black cat in the butterfly garden,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 56 | 54,3039000,cowgirl-cowboy-western-rodeo-aqua-by-parisbebe,/en/fabric/3039000-cowgirl-cowboy-western-rodeo-aqua-by-parisbebe,Cowgirl Cowboy western Rodeo Aqua,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 57 | 55,5125276,shark-sharks-nautical-boys-white-background-kids-ocean-sea-tiger-shark-hammerhead-shark-fabric-by-andrea_lauren,/en/fabric/5125276-shark-sharks-nautical-boys-white-background-kids-ocean-sea-tiger-shark-hammerhead-shark-fabric-by-andrea_lauren,shark // sharks nautical boys white background kids ocean sea tiger shark hammerhead shark fabric,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 58 | 56,4238535,chemistry-by-pinkpineappledesign,/en/fabric/4238535-chemistry-by-pinkpineappledesign,Chemistry,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 59 | 57,607484,fishy-felines-by-cjldesigns,/en/fabric/607484-fishy-felines-by-cjldesigns,Fishy_felines,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 60 | 58,6577017,haunted-cat-skeletons-black-white-by-pamelachi,/en/fabric/6577017-haunted-cat-skeletons-black-white-by-pamelachi,haunted cat skeletons black and white,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 61 | 59,4270747,happy-hair-stylist-friends-blue-by-clayvision_-_ahappybluetree,/en/fabric/4270747-happy-hair-stylist-friends-blue-by-clayvision_-_ahappybluetree,Happy Hair Stylist Friends - Blue,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 62 | 60,10112497,large-scale-alien-abduction-toile-de-jouy-pattern-blue-by-somecallmebeth,/en/fabric/10112497-large-scale-alien-abduction-toile-de-jouy-pattern-blue-by-somecallmebeth,Large-Scale Alien Abduction Toile De Jouy Pattern in Blue,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 63 | 61,7082655,frenchie-yoga-by-huebucket,/en/fabric/7082655-frenchie-yoga-by-huebucket,Frenchie Yoga,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 64 | 62,8985919,sunflowers-cream-3x3-by-indybloomdesign,/en/fabric/8985919-sunflowers-cream-3x3-by-indybloomdesign,Sunflowers and cream 3x3,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 65 | 63,4464173,cats-stripes-black-white-by-caja_design,/en/fabric/4464173-cats-stripes-black-white-by-caja_design,Cats with Stripes Black&White,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 66 | 64,7307966,art-deco-fleurs-d-or-by-j9design,/en/fabric/7307966-art-deco-fleurs-d-or-by-j9design,Art Deco fleurs d’or,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 67 | 65,5262779,mustard-painted-dots-yellow-spring-girls-sweet-yellow-coordinate-by-charlottewinter,/en/fabric/5262779-mustard-painted-dots-yellow-spring-girls-sweet-yellow-coordinate-by-charlottewinter,mustard painted dots yellow spring girls sweet yellow coordinate,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 68 | 66,9612358,black-forest-large-by-katherine_quinn,/en/fabric/9612358-black-forest-large-by-katherine_quinn,The Black Forest {large},"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 69 | 67,6058811,acoustic-guitars-by-jannasalak,/en/fabric/6058811-acoustic-guitars-by-jannasalak,Acoustic Guitars,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 70 | 68,9012014,jonah-s-whale-by-ivieclothco,/en/fabric/9012014-jonah-s-whale-by-ivieclothco,jonah's whale,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 71 | 69,2204162,virus-blue-scrubs-by-kightleys,/en/fabric/2204162-virus-blue-scrubs-by-kightleys,VIRUS Blue Scrubs,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 72 | 70,7068059,hamsters-on-light-blue-by-landpenguin,/en/fabric/7068059-hamsters-on-light-blue-by-landpenguin,Hamsters on Light Blue,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 73 | 71,5649370,gray-watercolor-herringbone-by-mrshervi,/en/fabric/5649370-gray-watercolor-herringbone-by-mrshervi,gray watercolor herringbone,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 74 | 72,1197446,ski-lifts-by-bluevelvet,/en/fabric/1197446-ski-lifts-by-bluevelvet,SKI LIFTS,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 75 | 73,7350122,american-triple-crown-winners-by-leroyj,/en/fabric/7350122-american-triple-crown-winners-by-leroyj,American Triple Crown Winners,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 76 | 74,7243056,baseballs-dark-blue-by-littlearrowdesign,/en/fabric/7243056-baseballs-dark-blue-by-littlearrowdesign,baseballs - dark blue,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 77 | 75,6815360,abstract-rainbow-soft-watercolour-paint-splatter-by-caja_design,/en/fabric/6815360-abstract-rainbow-soft-watercolour-paint-splatter-by-caja_design,Abstract Rainbow Soft Watercolour Paint & Splatter,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 78 | 76,8433849,black-white-leopard-leopard-print-ecru-tiny-scale-collection-leopard-spots-punk-rock-animal-print-by-borderlines,/en/fabric/8433849-black-white-leopard-leopard-print-ecru-tiny-scale-collection-leopard-spots-punk-rock-animal-print-by-borderlines,★ BLACK and WHITE LEOPARD - LEOPARD PRINT in ECRU ★ Tiny Scale / Collection : Leopard spots – Punk Rock Animal Print,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 79 | 77,8341105,traml-camo-woodland-by-mb4studio,/en/fabric/8341105-traml-camo-woodland-by-mb4studio,TRAML™ Camo - Woodland,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 80 | 78,7137786,genevieve-floral-by-crystal_walen,/en/fabric/7137786-genevieve-floral-by-crystal_walen,genevieve floral,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 81 | 79,2275816,game-world-by-chickoteria,/en/fabric/2275816-game-world-by-chickoteria,Game World,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 82 | 80,7939810,nautical-damask-by-katerhees,/en/fabric/7939810-nautical-damask-by-katerhees,Nautical Damask,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 83 | 81,10031369,hope-design-good-by-bound_textiles,/en/fabric/10031369-hope-design-good-by-bound_textiles,Hope - Design for good,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 84 | 82,5580363,watercolour-strawberries-by-emeryallardsmith,/en/fabric/5580363-watercolour-strawberries-by-emeryallardsmith,Watercolour Strawberries,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 85 | 83,2772127,happy-chihuahua-by-lil_creatures,/en/fabric/2772127-happy-chihuahua-by-lil_creatures,Happy Chihuahua,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 86 | 84,1392308,smile-world-smiles-back-by-bonnie_phantasm,/en/fabric/1392308-smile-world-smiles-back-by-bonnie_phantasm,Smile - and the world smiles back at you!,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 87 | 85,5749777,eyeglasses-by-kellyrenay,/en/fabric/5749777-eyeglasses-by-kellyrenay,Eyeglasses,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 88 | 86,6445895,busy-bees-watercolor-by-heatherdutton,/en/fabric/6445895-busy-bees-watercolor-by-heatherdutton,Busy Bees - Watercolor,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 89 | 87,6594616,library-card-scatter-by-lellobird,/en/fabric/6594616-library-card-scatter-by-lellobird,Library Card Scatter,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 90 | 88,6096792,max-s-map-charcoal-by-nouveau_bohemian,/en/fabric/6096792-max-s-map-charcoal-by-nouveau_bohemian,Max's Map (charcoal),"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 91 | 89,7269231,golden-retriever-cannon-beach-fabric-cute-dogs-on-beach-oregon-blue-by-petfriendly,/en/fabric/7269231-golden-retriever-cannon-beach-fabric-cute-dogs-on-beach-oregon-blue-by-petfriendly,golden retriever cannon beach fabric - cute dogs on the beach in oregon - blue,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 92 | 90,6842273,ambulance-on-grey-by-littlearrowdesign,/en/fabric/6842273-ambulance-on-grey-by-littlearrowdesign,ambulance on grey,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 93 | 91,2302934,blue-phone-boxes-black-swirls-on-white-small-swirls-by-risarocksit,/en/fabric/2302934-blue-phone-boxes-black-swirls-on-white-small-swirls-by-risarocksit,Blue Phone Boxes and Black Swirls on White - Small Swirls,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 94 | 92,5500782,isabelle-jr-s-island-getaway-by-loverlylibrarian,/en/fabric/5500782-isabelle-jr-s-island-getaway-by-loverlylibrarian,Isabelle Jr's Island Getaway,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 95 | 93,9925596,pastel-abstract-by-vagabond_folk_art,/en/fabric/9925596-pastel-abstract-by-vagabond_folk_art,Pastel Abstract,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 96 | 94,1440062,ocean-depth-map-by-ravynka,/en/fabric/1440062-ocean-depth-map-by-ravynka,Ocean depth map,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 97 | 95,6947878,gold-lips-on-graphite-grey-gray-gold-kiss-by-mlags,/en/fabric/6947878-gold-lips-on-graphite-grey-gray-gold-kiss-by-mlags,gold lips on graphite grey gray and gold kiss,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 98 | 96,5872295,fall-plaid-navy-rustic-woods-blue-white-by-littlearrowdesign,/en/fabric/5872295-fall-plaid-navy-rustic-woods-blue-white-by-littlearrowdesign,"fall plaid || navy, rustic woods blue, white","Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 99 | 97,7582998,leopard-rose-gold-spots-on-pink-by-paper_and_frill,/en/fabric/7582998-leopard-rose-gold-spots-on-pink-by-paper_and_frill,Leopard Rose Gold Spots on Pink,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 100 | 98,5566933,galaxy-far-far-away-1in-chibi-gray-by-studiofibonacci,/en/fabric/5566933-galaxy-far-far-away-1in-chibi-gray-by-studiofibonacci,A Galaxy Far Far Away - 1in chibi (gray),"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 101 | 99,6735421,jurassic-halloween-by-penguinhouse,/en/fabric/6735421-jurassic-halloween-by-penguinhouse,Jurassic halloween,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 102 | 100,9197320,halloween-stitch-orange-black-by-jacquelinehurd,/en/fabric/9197320-halloween-stitch-orange-black-by-jacquelinehurd,"Halloween Stitch, Orange & Black","Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 103 | 101,6808509,blue-stripes-watercolor-by-hipkiddesigns,/en/fabric/6808509-blue-stripes-watercolor-by-hipkiddesigns,Blue Stripes Watercolor,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 104 | 102,10322544,mid-century-lake-house-by-lellobird,/en/fabric/10322544-mid-century-lake-house-by-lellobird,Mid-Century Lake House,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 105 | 103,4110205,stars-slate-grey-stars-fabric-star-design-baby-nursery-fabric-andrea-lauren-by-andrea_lauren,/en/fabric/4110205-stars-slate-grey-stars-fabric-star-design-baby-nursery-fabric-andrea-lauren-by-andrea_lauren,stars // slate grey stars fabric star design baby nursery fabric andrea lauren,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 106 | 104,7141584,6-woodland-snow-white-by-hipkiddesigns,/en/fabric/7141584-6-woodland-snow-white-by-hipkiddesigns,"6"" Woodland Snow // White","Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 107 | 105,5534753,tropical-flamingo-by-pinkpineappledesign,/en/fabric/5534753-tropical-flamingo-by-pinkpineappledesign,Tropical Flamingo,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 108 | 106,7662668,mermaid-music-by-ceciliamok,/en/fabric/7662668-mermaid-music-by-ceciliamok,Mermaid Music,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 109 | 107,10318034,vacation-lake-maritime-by-brendazapotosky,/en/fabric/10318034-vacation-lake-maritime-by-brendazapotosky,Vacation at the Lake (Maritime),"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 110 | 108,9948235,hunting-butterflies-by-stolenpencil,/en/fabric/9948235-hunting-butterflies-by-stolenpencil,Hunting butterflies,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 111 | 109,1575773,mint-pink-by-jessica_d_,/en/fabric/1575773-mint-pink-by-jessica_d_,mint & pink,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 112 | 110,8529746,noir-cat-face-black-med-by-nouveau_bohemian,/en/fabric/8529746-noir-cat-face-black-med-by-nouveau_bohemian,Noir Cat Face (black) MED,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 113 | 111,7172110,female-heads-afro-purple-flower-by-unicia,/en/fabric/7172110-female-heads-afro-purple-flower-by-unicia,Female Heads with Afro and Purple Flower,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 114 | 112,8062179,big-floral-70s-mauve-by-morecandyshop,/en/fabric/8062179-big-floral-70s-mauve-by-morecandyshop,BIG FLORAL 70s MAUVE,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 115 | 113,9839084,colorful-bauhaus-rainbow-geometric-by-elsy,/en/fabric/9839084-colorful-bauhaus-rainbow-geometric-by-elsy,Colorful Bauhaus Rainbow Geometric,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 116 | 114,8510882,teal-leaves-botanical-foliage-nature-ferns-on-white-background-by-erin__kendal,/en/fabric/8510882-teal-leaves-botanical-foliage-nature-ferns-on-white-background-by-erin__kendal,Teal leaves botanical foliage nature ferns on white background,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 117 | 115,3510593,oodles-pink-poodles-by-bluevelvet,/en/fabric/3510593-oodles-pink-poodles-by-bluevelvet,OODLES OF PINK POODLES,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 118 | 116,7781393,doctors-plague-by-nixels,/en/fabric/7781393-doctors-plague-by-nixels,Doctors of Plague,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 119 | 117,10173525,can-t-catch-me-by-elmira_arts,/en/fabric/10173525-can-t-catch-me-by-elmira_arts,Can't Catch Me! :),"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 120 | 118,9887964,summer-blooms-by-lellobird,/en/fabric/9887964-summer-blooms-by-lellobird,Summer Blooms,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 121 | 119,7661337,swimming-manta-rays-by-avisnana,/en/fabric/7661337-swimming-manta-rays-by-avisnana,Swimming manta rays,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 122 | 120,4567213,blood-pattern-by-lanrete58,/en/fabric/4567213-blood-pattern-by-lanrete58,Blood Pattern,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 123 | 121,6003463,mackenzie-tartan-plaid-outlander-by-laurawrightstudio,/en/fabric/6003463-mackenzie-tartan-plaid-outlander-by-laurawrightstudio,MacKenzie tartan plaid outlander,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 124 | 122,5284534,baby-owls-purple-by-heleenvanbuul,/en/fabric/5284534-baby-owls-purple-by-heleenvanbuul,Baby owls in purple,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 125 | 123,5237735,hand-written-sheet-music-small-by-thinlinetextiles,/en/fabric/5237735-hand-written-sheet-music-small-by-thinlinetextiles,Hand Written Sheet Music // Small,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 126 | 124,9696600,safari-creatures-by-megdigdesign,/en/fabric/9696600-safari-creatures-by-megdigdesign,Safari Creatures,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 127 | 125,8589385,art-deco-swans-black-on-cream-by-katerhees,/en/fabric/8589385-art-deco-swans-black-on-cream-by-katerhees,Art Deco Swans - Black on Cream,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 128 | 126,5504768,french-bulldog-flowers-florals-frenchies-dog-girls-flowers-baby-nursery-sweet-painted-flower-by-petfriendly,/en/fabric/5504768-french-bulldog-flowers-florals-frenchies-dog-girls-flowers-baby-nursery-sweet-painted-flower-by-petfriendly,French Bulldog flowers florals frenchies dog girls flowers baby nursery sweet painted flower,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 129 | 127,7982836,bicycle-bikes-white-on-black-by-littlearrowdesign,/en/fabric/7982836-bicycle-bikes-white-on-black-by-littlearrowdesign,bicycle - bikes - white on black,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 130 | 128,9367973,whimsical-pegasus-forest-orange-large-version-by-tigatiga,/en/fabric/9367973-whimsical-pegasus-forest-orange-large-version-by-tigatiga,Whimsical Pegasus Forest - Orange - Large Version,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 131 | 129,1961516,mtp-purple-camo-by-ricraynor,/en/fabric/1961516-mtp-purple-camo-by-ricraynor,MTP Purple Camo,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 132 | 130,6547534,boreal-med-by-nouveau_bohemian,/en/fabric/6547534-boreal-med-by-nouveau_bohemian,The Boreal (MED),"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 133 | 131,10245022,ice-cream-vans-by-adenaj,/en/fabric/10245022-ice-cream-vans-by-adenaj,Ice cream vans,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 134 | 132,9679769,spring-birds-by-diseminger,/en/fabric/9679769-spring-birds-by-diseminger,spring birds,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 135 | 133,3585758,8-bit-darker-green-pixels-3-4ths-inch-by-joyfulrose,/en/fabric/3585758-8-bit-darker-green-pixels-3-4ths-inch-by-joyfulrose,8-bit Darker Green Pixels- 3/4ths of an inch,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 136 | 134,2069373,unicorns-garden-hesperides-by-demigoutte,/en/fabric/2069373-unicorns-garden-hesperides-by-demigoutte,Unicorns in the Garden of Hesperides,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 137 | 135,8056685,ruth-bader-ginsgurg-rbg-white-by-katerhees,/en/fabric/8056685-ruth-bader-ginsgurg-rbg-white-by-katerhees,Ruth Bader Ginsgurg RBG - White,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 138 | 136,1027431,may-now-kiss-bride-blue-by-kittenstitches,/en/fabric/1027431-may-now-kiss-bride-blue-by-kittenstitches,you may now kiss the bride blue,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 139 | 137,10051349,crowned-cranes-jumbo-midnight-copper-by-booboo_collective,/en/fabric/10051349-crowned-cranes-jumbo-midnight-copper-by-booboo_collective,crowned cranes_Jumbo midnight _ copper,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 140 | 138,7689339,skull-crossbones-black-white-by-lemon_chiffon,/en/fabric/7689339-skull-crossbones-black-white-by-lemon_chiffon,skull and crossbones // black and white,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 141 | 139,4653970,colorado-flag-fabric-2-6-x-1-75-by-flagfabric,/en/fabric/4653970-colorado-flag-fabric-2-6-x-1-75-by-flagfabric,Colorado flag fabric - 2.6 x 1.75,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 142 | 140,7944095,mistletoe-red-berries-by-crystal_walen,/en/fabric/7944095-mistletoe-red-berries-by-crystal_walen,mistletoe and red berries,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 143 | 141,6666574,japanese-block-print-pattern-ocean-waves-japanese-waves-pattern-indigo-blue-blue-boho-print-beach-fab-by-forest-sea,/en/fabric/6666574-japanese-block-print-pattern-ocean-waves-japanese-waves-pattern-indigo-blue-blue-boho-print-beach-fab-by-forest-sea,"Japanese Block Print Pattern of Ocean Waves, Japanese Waves Pattern in Indigo Blue, Blue Boho Print, Beach Fabric","Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 144 | 142,5452245,succulove-by-gabbymalpas,/en/fabric/5452245-succulove-by-gabbymalpas,Succulove,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 145 | 143,8752123,navy-leaves-antiqued-by-bluebirdcoop,/en/fabric/8752123-navy-leaves-antiqued-by-bluebirdcoop,Navy Leaves Antiqued,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 146 | 144,5575039,tiny-buffalo-check-flannel-red-black-by-sugarfresh,/en/fabric/5575039-tiny-buffalo-check-flannel-red-black-by-sugarfresh,Tiny Buffalo Check Flannel Red Black,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 147 | 145,7613045,camo-leopard-leopard-print-olive-green-medium-scale-collection-leopard-spots-punk-rock-animal-print-by-borderlines,/en/fabric/7613045-camo-leopard-leopard-print-olive-green-medium-scale-collection-leopard-spots-punk-rock-animal-print-by-borderlines,★ CAMO LEOPARD - LEOPARD PRINT in OLIVE GREEN ★ Medium Scale / Collection : Leopard spots – Punk Rock Animal Print,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 148 | 146,7690402,rose-all-day-wine-fabric-brunch-navy-by-charlottewinter,/en/fabric/7690402-rose-all-day-wine-fabric-brunch-navy-by-charlottewinter,rosé all day wine fabric brunch navy,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 149 | 147,3618307,christmas-ornaments-vintage-christmas-ornaments-cute-retro-ornaments-xmas-holiday-christmas-fabric-by-andrea_lauren,/en/fabric/3618307-christmas-ornaments-vintage-christmas-ornaments-cute-retro-ornaments-xmas-holiday-christmas-fabric-by-andrea_lauren,christmas ornaments // vintage christmas ornaments cute retro ornaments xmas holiday christmas fabric,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 150 | 148,10231195,vintage-ice-cream-by-kylie_33,/en/fabric/10231195-vintage-ice-cream-by-kylie_33,Vintage Ice Cream,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 151 | 149,6007356,star-rainbow-by-emeryallardsmith,/en/fabric/6007356-star-rainbow-by-emeryallardsmith,Star Rainbow,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 152 | 150,7452175,normal-scale-hot-dogs-lemonade-aqua-background-cute-dachshund-sausage-dogs-by-selmacardoso,/en/fabric/7452175-normal-scale-hot-dogs-lemonade-aqua-background-cute-dachshund-sausage-dogs-by-selmacardoso,Normal scale // Hot dogs and lemonade // aqua background cute Dachshund sausage dogs,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 153 | 151,8987265,cannabis-leaves-green-on-white-by-onelittleprintshop,/en/fabric/8987265-cannabis-leaves-green-on-white-by-onelittleprintshop,Cannabis leaves - green on white,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 154 | 152,8576999,clementine-sprigs-silver-sage-by-crystal_walen,/en/fabric/8576999-clementine-sprigs-silver-sage-by-crystal_walen,clementine sprigs-silver sage,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 155 | 153,5532389,blue-white-camouflage-pattern-by-artpics,/en/fabric/5532389-blue-white-camouflage-pattern-by-artpics,Blue and White Camouflage pattern,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 156 | 154,9162947,sea-urchin-shells-neutral-by-jenniejoyce,/en/fabric/9162947-sea-urchin-shells-neutral-by-jenniejoyce,Sea Urchin Shells Neutral,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 157 | 155,6151839,watercolor-watermelon-by-katerinaizotova,/en/fabric/6151839-watercolor-watermelon-by-katerinaizotova,Watercolor watermelon,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 158 | 156,8691750,marbled-unicorn-pattern-by-raccoongirl,/en/fabric/8691750-marbled-unicorn-pattern-by-raccoongirl,Marbled Unicorn Pattern,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 159 | 157,9002092,ginkgo-leaves-spring-rose-spearmint-by-new_branch_studio,/en/fabric/9002092-ginkgo-leaves-spring-rose-spearmint-by-new_branch_studio,Ginkgo Leaves - Spring Rose & Spearmint,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 160 | 158,5525275,supernatural-inspired-baby-67-impala-tattoo-by-tag_graphics,/en/fabric/5525275-supernatural-inspired-baby-67-impala-tattoo-by-tag_graphics,Supernatural Inspired Baby 67 Impala Tattoo,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 161 | 159,8574791,soft-eucalyptus-watercolor-smaller-leaves-pattern-by-daily_miracles,/en/fabric/8574791-soft-eucalyptus-watercolor-smaller-leaves-pattern-by-daily_miracles,Soft Eucalyptus Watercolor Smaller Leaves Pattern,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 162 | 160,9880402,merbaby-spring-by-paperbird-_crafts,/en/fabric/9880402-merbaby-spring-by-paperbird-_crafts,Merbaby Spring,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 163 | 161,8985918,sunflowers-cream-7x7-by-indybloomdesign,/en/fabric/8985918-sunflowers-cream-7x7-by-indybloomdesign,Sunflowers and cream 7x7,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 164 | 162,9989393,black-lives-matter-by-fabric_rocks,/en/fabric/9989393-black-lives-matter-by-fabric_rocks,Black Lives Matter,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 165 | 163,10224539,tropical-birthday-by-amy_maccready,/en/fabric/10224539-tropical-birthday-by-amy_maccready,Tropical birthday,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 166 | 164,9915637,ramen-bowl-by-katerhees,/en/fabric/9915637-ramen-bowl-by-katerhees,Ramen Bowl,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 167 | 165,5527354,dusty-rose-by-shopcabin,/en/fabric/5527354-dusty-rose-by-shopcabin,Dusty Rose,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 168 | 166,2623046,black-vintage-kraken-octopus-pattern-by-inspirationz,/en/fabric/2623046-black-vintage-kraken-octopus-pattern-by-inspirationz,Black Vintage Kraken Octopus pattern,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 169 | 167,9765153,ditsy-modern-vintage-floral-dots-by-lucybaribeau,/en/fabric/9765153-ditsy-modern-vintage-floral-dots-by-lucybaribeau,ditsy modern vintage floral dots,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 170 | 168,8917171,earth-tone-eucalyptus-white-by-hipkiddesigns,/en/fabric/8917171-earth-tone-eucalyptus-white-by-hipkiddesigns,Earth Tone Eucalyptus // White,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 171 | 169,2425715,coffee-beans-by-punqd_designs,/en/fabric/2425715-coffee-beans-by-punqd_designs,Coffee beans,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 172 | 170,8923709,tropical-raccoon-by-introducingemy,/en/fabric/8923709-tropical-raccoon-by-introducingemy,Tropical Raccoon,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 173 | 171,6650987,love-nurse-whimsy-white-by-phyllisdobbs,/en/fabric/6650987-love-nurse-whimsy-white-by-phyllisdobbs,Love a Nurse Whimsy White,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 174 | 172,5825413,border-collie-florals-cute-pink-flowers-dog-florals-print-best-dog-designs-best-dog-prints-cute-borde-by-petfriendly,/en/fabric/5825413-border-collie-florals-cute-pink-flowers-dog-florals-print-best-dog-designs-best-dog-prints-cute-borde-by-petfriendly,border collie florals cute pink flowers dog florals print best dog designs best dog prints cute border collies fabrics,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 175 | 173,9860542,frenchie-scrubs-fabric-french-bulldogs-fabric-nurse-fabric-grey-by-petfriendly,/en/fabric/9860542-frenchie-scrubs-fabric-french-bulldogs-fabric-nurse-fabric-grey-by-petfriendly,"frenchie scrubs fabric - french bulldogs fabric, nurse fabric - grey","Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 176 | 174,8533844,golfers-multi-navy-green-grey-lad19-by-littlearrowdesign,/en/fabric/8533844-golfers-multi-navy-green-grey-lad19-by-littlearrowdesign,golfers multi - navy green grey - LAD19,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 177 | 175,6216794,faux-denim-solid-by-littlearrowdesign,/en/fabric/6216794-faux-denim-solid-by-littlearrowdesign,faux denim solid,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 178 | 176,7113891,canadian-flag-by-thinlinetextiles,/en/fabric/7113891-canadian-flag-by-thinlinetextiles,Canadian Flag,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 179 | 177,10305106,fishing-lures-red-blue-2-by-diseminger,/en/fabric/10305106-fishing-lures-red-blue-2-by-diseminger,fishing lures - red and blue 2,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 180 | 178,8812866,indigo-shibori-stripe-by-radianthomestudio,/en/fabric/8812866-indigo-shibori-stripe-by-radianthomestudio,indigo shibori stripe,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 181 | 179,7916132,love-peace-60s-by-bluecoin,/en/fabric/7916132-love-peace-60s-by-bluecoin,Love and Peace 60s,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 182 | 180,5924805,sew-vintage-by-petite-circus-sewing-machine-black-white-by-petite_circus,/en/fabric/5924805-sew-vintage-by-petite-circus-sewing-machine-black-white-by-petite_circus,Sew Vintage // by petite_circus // sewing machine black and white,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 183 | 181,2276467,flamingo-summer-colorful-tropical-birds-retro-girls-print-black-by-littlesmilemakers,/en/fabric/2276467-flamingo-summer-colorful-tropical-birds-retro-girls-print-black-by-littlesmilemakers,Flamingo summer colorful tropical birds retro girls print black,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 184 | 182,9865418,spring-garden-pattern-by-stolenpencil,/en/fabric/9865418-spring-garden-pattern-by-stolenpencil,Spring garden pattern,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 185 | 183,7756057,emerald-forest-keepers-fairy-woodland-creatures-by-kostolom3000,/en/fabric/7756057-emerald-forest-keepers-fairy-woodland-creatures-by-kostolom3000,Emerald forest keepers. Fairy woodland creatures.,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 186 | 184,3410289,feminism-fists-on-black-by-spacefem,/en/fabric/3410289-feminism-fists-on-black-by-spacefem,Feminism fists on black,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 187 | 185,7587372,olive-bloom-by-holli_zollinger,/en/fabric/7587372-olive-bloom-by-holli_zollinger,OLIVE BLOOM,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 188 | 186,2760166,chicken-boots-knit-fabric-regular-size-by-chickenboots,/en/fabric/2760166-chicken-boots-knit-fabric-regular-size-by-chickenboots,Chicken Boots Knit Fabric-Regular size,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 189 | 187,7492145,horse-coats-horse-breeds-horses-fabrics-tan-by-petfriendly,/en/fabric/7492145-horse-coats-horse-breeds-horses-fabrics-tan-by-petfriendly,horse coats horse breeds horses fabrics tan,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 190 | 188,777803,ditsy-spring-flower-pink-by-lucybaribeau,/en/fabric/777803-ditsy-spring-flower-pink-by-lucybaribeau,Ditsy spring flower in pink,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 191 | 189,8799908,mystic-potion-bottles-on-black-by-sugargrave,/en/fabric/8799908-mystic-potion-bottles-on-black-by-sugargrave,Mystic potion bottles on black,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 192 | 190,10017098,mellow-figs-large-scale-by-heidi-abeline,/en/fabric/10017098-mellow-figs-large-scale-by-heidi-abeline,Mellow Figs / Large scale,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 193 | 191,7333061,demon-slaying-nezuko-asa-no-ha-pattern-dashed-sashiko-lines-on-pink-by-atashi,/en/fabric/7333061-demon-slaying-nezuko-asa-no-ha-pattern-dashed-sashiko-lines-on-pink-by-atashi,Demon-Slaying Nezuko Asa-no-Ha Pattern Dashed Sashiko Lines on Pink,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 194 | 192,9025494,small-scale-greyhounds-dogwalk-turquoise-background-by-selmacardoso,/en/fabric/9025494-small-scale-greyhounds-dogwalk-turquoise-background-by-selmacardoso,Small scale // Greyhounds dogwalk // turquoise background,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 195 | 193,3102067,orbs-ogb1wh-large-by-tonyanewton,/en/fabric/3102067-orbs-ogb1wh-large-by-tonyanewton,Orbs - #OGB1WH (large),"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 196 | 194,2930396,starry-watercolor-dreams-grey-by-emilysanford,/en/fabric/2930396-starry-watercolor-dreams-grey-by-emilysanford,Starry Watercolor Dreams in Grey,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 197 | 195,7356711,pizza-shark-blue-by-littlearrowdesign,/en/fabric/7356711-pizza-shark-blue-by-littlearrowdesign,Pizza Shark - blue,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 198 | 196,7085277,beer-by-natalia_gonzalez,/en/fabric/7085277-beer-by-natalia_gonzalez,Beer,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 199 | 197,8091225,dark-floral-black-roses-on-black-moody-floral-jumbo-size-by-mlags,/en/fabric/8091225-dark-floral-black-roses-on-black-moody-floral-jumbo-size-by-mlags,Dark Floral Black roses on black moody floral JUMBO size,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 200 | 198,5715897,black-lives-matter-small-scale-blue-by-ashleysummersdesign,/en/fabric/5715897-black-lives-matter-small-scale-blue-by-ashleysummersdesign,Black Lives Matter Small Scale - Blue,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 201 | 199,7193136,thunderbird-kilim-watercolor-bohemian-tribal-kilim-arrows-aztec-boho-diamond-watercolor-fabric-by-liz_sawyer_design,/en/fabric/7193136-thunderbird-kilim-watercolor-bohemian-tribal-kilim-arrows-aztec-boho-diamond-watercolor-fabric-by-liz_sawyer_design,Thunderbird Kilim Watercolor // bohemian tribal kilim arrows aztec boho diamond watercolor fabric,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 202 | 200,2800727,fried-egg-bacon-pattern-breakfast-food-design-blue-background-by-kostolom3000,/en/fabric/2800727-fried-egg-bacon-pattern-breakfast-food-design-blue-background-by-kostolom3000,Fried egg and bacon pattern. Breakfast food design. Blue background.,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 203 | 201,4644387,colorado-state-flag-by-hooks_treasure_cove,/en/fabric/4644387-colorado-state-flag-by-hooks_treasure_cove,Colorado State Flag,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 204 | 202,7281016,dragon-scales-purple-blue-by-littlearrowdesign,/en/fabric/7281016-dragon-scales-purple-blue-by-littlearrowdesign,dragon scales - purple and blue,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 205 | 203,4671052,tiger-halloween-costume-pattern-by-furbuddy,/en/fabric/4671052-tiger-halloween-costume-pattern-by-furbuddy,Tiger Halloween Costume Pattern,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 206 | 204,9980371,cat-cafe-by-j9design,/en/fabric/9980371-cat-cafe-by-j9design,Cat Cafe,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 207 | 205,10214388,cranes-vintage-asian-style-by-bamokreativ,/en/fabric/10214388-cranes-vintage-asian-style-by-bamokreativ,cranes vintage asian style,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 208 | 206,5071085,painted-protea-floral-extra-large-version-by-micklyn,/en/fabric/5071085-painted-protea-floral-extra-large-version-by-micklyn,Painted Protea Floral Extra Large Version,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 209 | 207,1524515,green-marijuana-leaves-by-farrellart,/en/fabric/1524515-green-marijuana-leaves-by-farrellart,Green Marijuana Leaves,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 210 | 208,7140035,tattoo-dogs-teal-by-miss_fluff,/en/fabric/7140035-tattoo-dogs-teal-by-miss_fluff,Tattoo Dogs- Teal,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 211 | 209,8205159,alchemical-astrology-sepia-by-xoxotique,/en/fabric/8205159-alchemical-astrology-sepia-by-xoxotique,Alchemical Astrology Sepia,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 212 | 210,9855694,pho-indigo-by-scrummy,/en/fabric/9855694-pho-indigo-by-scrummy,PHO INDIGO,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 213 | 211,6275064,green-paddle-cactus-rose-by-ivieclothco,/en/fabric/6275064-green-paddle-cactus-rose-by-ivieclothco,green paddle cactus + rose,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 214 | 212,10264700,fragant-field-by-e-dri,/en/fabric/10264700-fragant-field-by-e-dri,A Fragant Field,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 215 | 213,8345014,dusty-rose-solid-pink-solid-dusty-pink-by-erin__kendal,/en/fabric/8345014-dusty-rose-solid-pink-solid-dusty-pink-by-erin__kendal,Dusty Rose Solid Pink Solid Dusty Pink,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 216 | 214,5300469,jesus-loves-me-by-sunshineandspoons,/en/fabric/5300469-jesus-loves-me-by-sunshineandspoons,Jesus Loves Me,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 217 | 215,8297993,watercolor-butterflies-by-camcreative,/en/fabric/8297993-watercolor-butterflies-by-camcreative,watercolor butterflies,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 218 | 216,2253187,horror-words-by-miss_motley,/en/fabric/2253187-horror-words-by-miss_motley,Horror words,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 219 | 217,9681832,teeny-tiny-koalas-tea-tree-blossoms-eucalyptus-by-micklyn,/en/fabric/9681832-teeny-tiny-koalas-tea-tree-blossoms-eucalyptus-by-micklyn,Teeny Tiny Koalas with Tea Tree Blossoms and Eucalyptus,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 220 | 218,2340522,peacock-3-large-by-glimmericks,/en/fabric/2340522-peacock-3-large-by-glimmericks,peacock - 3 large,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 221 | 219,4591552,books-by-trizzuto,/en/fabric/4591552-books-by-trizzuto,books!,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 222 | 220,7185474,riveting-rosie-by-fabric_rocks,/en/fabric/7185474-riveting-rosie-by-fabric_rocks,Riveting Rosie,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 223 | 221,7076550,large-scale-vintage-moroccan-dusty-blue-by-littlearrowdesign,/en/fabric/7076550-large-scale-vintage-moroccan-dusty-blue-by-littlearrowdesign,(large scale) vintage moroccan - dusty blue,"Fat Quarter 21"" x 18""",10.75,USD,2030-01-01 224 | -------------------------------------------------------------------------------- /supercharged/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codingforentrepreneurs/Supercharged-Web-Scraping-with-Asyncio/b460f62de4e6a43edfaf279e08ee02f2fcbe2a09/supercharged/__init__.py -------------------------------------------------------------------------------- /supercharged/conf.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | DB_CONNECTION_STR = "sqlite:///db.sqlite3" # //postgres:// -------------------------------------------------------------------------------- /supercharged/db.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import create_engine 2 | 3 | from .conf import DB_CONNECTION_STR 4 | 5 | conn = create_engine(DB_CONNECTION_STR) 6 | 7 | def verify_table_exists(table_name): 8 | return conn.dialect.has_table(conn, table_name) -------------------------------------------------------------------------------- /supercharged/logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import structlog # pip install structlog 3 | 4 | 5 | def set_arsenic_log_level(level = logging.WARNING): 6 | # Create logger 7 | logger = logging.getLogger('arsenic') 8 | 9 | # We need factory, to return application-wide logger 10 | def logger_factory(): 11 | return logger 12 | 13 | structlog.configure(logger_factory=logger_factory) 14 | logger.setLevel(level) -------------------------------------------------------------------------------- /supercharged/main.py: -------------------------------------------------------------------------------- 1 | import fire 2 | from .projects.spoonflower import run_spoonflower 3 | 4 | class Pipeline(): 5 | def __init__(self): 6 | self.spoonflower = run_spoonflower 7 | 8 | if __name__ == "__main__": 9 | fire.Fire(Pipeline) 10 | -------------------------------------------------------------------------------- /supercharged/projects/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codingforentrepreneurs/Supercharged-Web-Scraping-with-Asyncio/b460f62de4e6a43edfaf279e08ee02f2fcbe2a09/supercharged/projects/__init__.py -------------------------------------------------------------------------------- /supercharged/projects/spoonflower.py: -------------------------------------------------------------------------------- 1 | import fire 2 | import os 3 | import asyncio 4 | from arsenic import get_session, keys, browsers, services 5 | import pandas as pd 6 | from requests_html import HTML 7 | import itertools 8 | import re 9 | import time 10 | import pathlib 11 | from urllib.parse import urlparse 12 | import random 13 | 14 | import logging 15 | import structlog # pip install structlog 16 | 17 | 18 | from supercharged.logging import set_arsenic_log_level 19 | from supercharged.scrapers import scraper 20 | 21 | from supercharged.storage import df_from_sql, df_to_sql, list_to_sql 22 | 23 | 24 | 25 | # /en/fabric/7137786-genevieve-floral-by-crystal_walen 26 | async def extract_id_slug(url_path): 27 | path = url_path 28 | if path.startswith('http'): 29 | parsed_url = urlparse(path) 30 | path = parsed_url.path 31 | regex = r"^[^\s]+/(?P\d+)-(?P[\w_-]+)$" 32 | group = re.match(regex, path) 33 | if not group: 34 | return None, None, path 35 | return group['id'], group['slug'], path 36 | 37 | 38 | 39 | async def get_product_data(url, content): 40 | id_, slug_, path = await extract_id_slug(url) 41 | titleEl = content.find(".design-title", first=True) 42 | data = { 43 | 'id': id_, 44 | 'slug': slug_, 45 | 'path': path, 46 | } 47 | title = None 48 | if titleEl == None: 49 | return data 50 | title = titleEl.text 51 | data['title'] = title 52 | sizeEl = content.find("#fabric-size", first=True) 53 | size = None 54 | if sizeEl != None: 55 | size = sizeEl.text 56 | data['size'] = size 57 | price_parent_el = content.find('.b-item-price', first=True) 58 | price_el = price_parent_el.find('.visuallyhidden', first=True) 59 | for i in price_el.element.iterchildren(): 60 | attrs = dict(**i.attrib) 61 | try: 62 | del attrs['itemprop'] 63 | except: 64 | pass 65 | attrs_keys = list(attrs.keys()) 66 | data[i.attrib['itemprop']] = i.attrib[attrs_keys[0]] 67 | return data 68 | 69 | async def get_parsable_html(body_html_str): 70 | return HTML(html=body_html_str) 71 | 72 | async def get_links(html_r): 73 | fabric_links = [x for x in list(html_r.links) if x.startswith("/en/fabric")] 74 | datas = [] 75 | for path in fabric_links: 76 | id_, slug_, _ = await extract_id_slug(path) 77 | data = { 78 | "id": id_, 79 | "slug": slug_, 80 | "path": path, 81 | "scraped": 0 # True / False -> 1 / 0 82 | } 83 | datas.append(data) 84 | return datas 85 | 86 | async def spoonflower_scraper(url, i=-1, timeout=60, start=None): 87 | body = await scraper(url, i=i, timeout=timeout, start=start, body_delay=10) 88 | content = await get_parsable_html(body) 89 | links = await get_links(content) 90 | product_data = await get_product_data(url, content) 91 | if start != None: 92 | end = time.time() - start 93 | print(f'{i} took {end} seconds') 94 | # print(body) 95 | dataset = { 96 | "links": links, 97 | "product_data": product_data 98 | } 99 | return dataset 100 | 101 | async def run(urls, timeout=60, start=None): 102 | results = [] 103 | for i, url in enumerate(urls): 104 | results.append( 105 | asyncio.create_task(spoonflower_scraper(url, i=i, timeout=60, start=start)) 106 | ) 107 | list_of_links = await asyncio.gather(*results) 108 | return list_of_links 109 | 110 | def get_saved_urls(limit=5): 111 | links_df = df_from_sql('spoonflower_links') 112 | urls = [] 113 | scraped_ids = [] 114 | used_df = False 115 | if not links_df.empty: 116 | sub_links_df = links_df.copy() 117 | sub_links_df = sub_links_df[sub_links_df['scraped'] == 0] 118 | sub_links_df = sub_links_df.sample(limit) 119 | urls = [f"https://www.spoonflower.com{x}" for x in sub_links_df.path.tolist()] 120 | scraped_ids = sub_links_df.id.tolist() 121 | if len(urls) > 0: 122 | used_df = True 123 | return urls, scraped_ids, used_df 124 | 125 | def get_list_range(limit=10, is_random=True, random_max=150): 126 | urls = [] 127 | for i in range(limit): 128 | if is_random: 129 | page = random.randint(i+1, random_max) 130 | else: 131 | page = i + 1 132 | urls.append(f"https://www.spoonflower.com/en/shop?on=fabric&page_offset={page}") 133 | return urls 134 | 135 | def run_spoonflower(use_links=True, use_list_range=False, is_random=True, save_csv=False, limit=10): 136 | set_arsenic_log_level() 137 | start = time.time() 138 | urls = ['https://www.spoonflower.com/en/shop?on=fabric'] 139 | scraped_ids = [] 140 | used_df = False 141 | if use_links == True and use_list_range == False: 142 | urls, scraped_ids, used_df = get_saved_urls(limit=limit) 143 | if use_list_range == True: 144 | urls = get_list_range(limit=limit, is_random=is_random) 145 | results = asyncio.run(run(urls, start=start)) 146 | end = time.time() - start 147 | links = [x['links'] for x in results] # [[], [], []] 148 | links = itertools.chain.from_iterable(links) 149 | links = list(links) 150 | link_columns = ['id', 'slug', 'path', 'scraped'] 151 | list_to_sql(datas=links, 152 | table_name='spoonflower_links', 153 | columns=link_columns) 154 | product_data = [x['product_data'] for x in results] 155 | product_columns = ['id', 'slug', 'path', 'title', 'size', 'price', 'priceCurrency', 'priceValidUntil'] 156 | list_to_sql(datas=product_data, 157 | table_name='spoonflower_fabrics', 158 | columns=product_columns) 159 | if used_df: 160 | links_df = df_from_sql('spoonflower_links') 161 | link_cond = links_df['id'].isin(scraped_ids) 162 | links_df.loc[link_cond, 'scraped'] = 1 163 | df_to_sql(links_df, table_name='spoonflower_links') 164 | if save_csv: 165 | links_df = df_from_sql('spoonflower_links') 166 | links_df.to_csv('spoonflower_links.csv') 167 | fabrics_df = df_from_sql('spoonflower_fabrics') 168 | fabrics_df.to_csv('spoonflower_fabrics.csv') 169 | # return results -------------------------------------------------------------------------------- /supercharged/scrapers.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from arsenic import get_session, keys, browsers, services 3 | 4 | async def scraper(url, i=-1, timeout=60, start=None, body_delay=10): 5 | service = services.Chromedriver() 6 | browser = browsers.Chrome(chromeOptions={ 7 | 'args': ['--headless', '--disable-gpu'] 8 | }) 9 | async with get_session(service, browser) as session: 10 | try: 11 | await asyncio.wait_for(session.get(url), timeout=timeout) 12 | except asyncio.TimeoutError: 13 | return [] 14 | if body_delay > 0: 15 | await asyncio.sleep(body_delay) 16 | body = await session.get_page_source() 17 | return body -------------------------------------------------------------------------------- /supercharged/storage.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import pandas as pd 3 | 4 | from .db import conn as db_conn, verify_table_exists 5 | 6 | 7 | def df_from_sql(table_name='spoonflower_links'): 8 | table_exists = verify_table_exists(table_name) 9 | if not table_exists: 10 | return pd.DataFrame() 11 | df = pd.read_sql_table(table_name, db_conn) 12 | return df 13 | 14 | def df_to_sql(df, table_name='spoonflower_links', if_exists='replace'): 15 | df.to_sql(table_name, db_conn, if_exists=if_exists, index=False) 16 | return df 17 | 18 | def list_to_sql( 19 | datas=[], 20 | table_name='spoonflower_links', 21 | columns=[], 22 | unique_col='id', 23 | keep='first'): 24 | if len(datas) == 0: 25 | ''' 26 | No data passed, returning empty dataframe. 27 | ''' 28 | return pd.DataFrame() 29 | new_df = pd.DataFrame(datas) 30 | og_df = df_from_sql(table_name=table_name) 31 | if og_df.empty: 32 | df = new_df.copy() 33 | else: 34 | df = pd.concat([og_df, new_df]) 35 | df.reset_index(inplace=True, drop=False) 36 | assert(len(columns)> 0) 37 | if not set(columns).issubset(df.columns): 38 | ''' 39 | Invalid columns. 40 | Dataframe does the columns passed 41 | in the arugment `columns` 42 | ''' 43 | return pd.DataFrame() 44 | df = df[columns] # select certain columns 45 | df = df.loc[~df[unique_col].duplicated(keep=keep)] # make unique 46 | df.dropna(inplace=True) 47 | df_to_sql(df, table_name) 48 | return df 49 | 50 | 51 | def store_links_as_df_pickle(datas=[], name='links.pkl'): 52 | new_df = pd.DataFrame(datas) 53 | og_df = pd.DataFrame([{'id': 0}]) 54 | if pathlib.Path(name).exists(): 55 | og_df = pd.read_pickle(name) # read_csv 56 | df = pd.concat([og_df, new_df]) 57 | df.reset_index(inplace=True, drop=False) 58 | df = df[['id', 'slug', 'path', 'scraped']] 59 | df = df.loc[~df.id.duplicated(keep='first')] 60 | # df.set_index('id', inplace=True, drop=True) 61 | df.dropna(inplace=True) 62 | df.to_pickle(name) 63 | return df 64 | --------------------------------------------------------------------------------