├── .gitignore ├── Crawl4ai.ipynb ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | -------------------------------------------------------------------------------- /Crawl4ai.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": { 21 | "colab": { 22 | "base_uri": "https://localhost:8080/", 23 | "height": 1000 24 | }, 25 | "id": "p6UTmH1J-G2t", 26 | "outputId": "fe10235a-ea23-4de4-a513-6dd473890fa1" 27 | }, 28 | "outputs": [ 29 | { 30 | "output_type": "stream", 31 | "name": "stdout", 32 | "text": [ 33 | "Collecting crawl4ai@ git+https://github.com/unclecode/crawl4ai.git\n", 34 | " Cloning https://github.com/unclecode/crawl4ai.git to /tmp/pip-install-9z736bx0/crawl4ai_95adca0fa9d24ece88c9691a106b42d2\n", 35 | " Running command git clone --filter=blob:none --quiet https://github.com/unclecode/crawl4ai.git /tmp/pip-install-9z736bx0/crawl4ai_95adca0fa9d24ece88c9691a106b42d2\n", 36 | " Resolved https://github.com/unclecode/crawl4ai.git to commit e5e6a34e8097cb5ff72c026b19859b5b98246378\n", 37 | " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 38 | "Collecting aiohttp==3.9.5 (from crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 39 | " Downloading aiohttp-3.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.5 kB)\n", 40 | "Collecting aiosqlite==0.20.0 (from crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 41 | " Downloading aiosqlite-0.20.0-py3-none-any.whl.metadata (4.3 kB)\n", 42 | "Requirement already satisfied: beautifulsoup4==4.12.3 in /usr/local/lib/python3.10/dist-packages (from crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (4.12.3)\n", 43 | "Collecting fastapi==0.111.0 (from crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 44 | " Downloading fastapi-0.111.0-py3-none-any.whl.metadata (25 kB)\n", 45 | "Collecting html2text==2024.2.26 (from crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 46 | " Downloading html2text-2024.2.26.tar.gz (56 kB)\n", 47 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.5/56.5 kB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 48 | "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 49 | "Collecting httpx==0.27.0 (from crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 50 | " Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)\n", 51 | "Collecting litellm==1.40.17 (from crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 52 | " Downloading litellm-1.40.17-py3-none-any.whl.metadata (30 kB)\n", 53 | "Collecting pydantic==2.7.4 (from crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 54 | " Downloading pydantic-2.7.4-py3-none-any.whl.metadata (109 kB)\n", 55 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m109.4/109.4 kB\u001b[0m \u001b[31m623.2 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 56 | "\u001b[?25hCollecting python-dotenv==1.0.1 (from crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 57 | " Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)\n", 58 | "Requirement already satisfied: requests==2.32.3 in /usr/local/lib/python3.10/dist-packages (from crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (2.32.3)\n", 59 | "Collecting rich==13.7.1 (from crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 60 | " Downloading rich-13.7.1-py3-none-any.whl.metadata (18 kB)\n", 61 | "Collecting selenium==4.23.1 (from crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 62 | " Downloading selenium-4.23.1-py3-none-any.whl.metadata (7.1 kB)\n", 63 | "Collecting uvicorn==0.30.1 (from crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 64 | " Downloading uvicorn-0.30.1-py3-none-any.whl.metadata (6.3 kB)\n", 65 | "Collecting pillow==10.3.0 (from crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 66 | " Downloading pillow-10.3.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (9.2 kB)\n", 67 | "Collecting slowapi==0.1.9 (from crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 68 | " Downloading slowapi-0.1.9-py3-none-any.whl.metadata (3.0 kB)\n", 69 | "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp==3.9.5->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (1.3.1)\n", 70 | "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp==3.9.5->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (24.2.0)\n", 71 | "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp==3.9.5->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (1.4.1)\n", 72 | "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp==3.9.5->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (6.0.5)\n", 73 | "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp==3.9.5->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (1.9.4)\n", 74 | "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp==3.9.5->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (4.0.3)\n", 75 | "Requirement already satisfied: typing_extensions>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiosqlite==0.20.0->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (4.12.2)\n", 76 | "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4==4.12.3->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (2.6)\n", 77 | "Collecting starlette<0.38.0,>=0.37.2 (from fastapi==0.111.0->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 78 | " Downloading starlette-0.37.2-py3-none-any.whl.metadata (5.9 kB)\n", 79 | "Collecting fastapi-cli>=0.0.2 (from fastapi==0.111.0->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 80 | " Downloading fastapi_cli-0.0.5-py3-none-any.whl.metadata (7.0 kB)\n", 81 | "Requirement already satisfied: jinja2>=2.11.2 in /usr/local/lib/python3.10/dist-packages (from fastapi==0.111.0->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (3.1.4)\n", 82 | "Collecting python-multipart>=0.0.7 (from fastapi==0.111.0->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 83 | " Downloading python_multipart-0.0.9-py3-none-any.whl.metadata (2.5 kB)\n", 84 | "Collecting ujson!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,>=4.0.1 (from fastapi==0.111.0->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 85 | " Downloading ujson-5.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.3 kB)\n", 86 | "Collecting orjson>=3.2.1 (from fastapi==0.111.0->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 87 | " Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)\n", 88 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.4/50.4 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 89 | "\u001b[?25hCollecting email_validator>=2.0.0 (from fastapi==0.111.0->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 90 | " Downloading email_validator-2.2.0-py3-none-any.whl.metadata (25 kB)\n", 91 | "Requirement already satisfied: anyio in /usr/local/lib/python3.10/dist-packages (from httpx==0.27.0->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (3.7.1)\n", 92 | "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx==0.27.0->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (2024.8.30)\n", 93 | "Collecting httpcore==1.* (from httpx==0.27.0->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 94 | " Downloading httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)\n", 95 | "Requirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx==0.27.0->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (3.8)\n", 96 | "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx==0.27.0->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (1.3.1)\n", 97 | "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from litellm==1.40.17->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (8.1.7)\n", 98 | "Collecting ijson (from litellm==1.40.17->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 99 | " Downloading ijson-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (21 kB)\n", 100 | "Requirement already satisfied: importlib-metadata>=6.8.0 in /usr/local/lib/python3.10/dist-packages (from litellm==1.40.17->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (8.4.0)\n", 101 | "Collecting openai>=1.27.0 (from litellm==1.40.17->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 102 | " Downloading openai-1.44.0-py3-none-any.whl.metadata (22 kB)\n", 103 | "Collecting tiktoken>=0.7.0 (from litellm==1.40.17->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 104 | " Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n", 105 | "Requirement already satisfied: tokenizers in /usr/local/lib/python3.10/dist-packages (from litellm==1.40.17->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (0.19.1)\n", 106 | "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic==2.7.4->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (0.7.0)\n", 107 | "Collecting pydantic-core==2.18.4 (from pydantic==2.7.4->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 108 | " Downloading pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.5 kB)\n", 109 | "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests==2.32.3->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (3.3.2)\n", 110 | "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests==2.32.3->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (2.0.7)\n", 111 | "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich==13.7.1->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (3.0.0)\n", 112 | "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich==13.7.1->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (2.16.1)\n", 113 | "Collecting trio~=0.17 (from selenium==4.23.1->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 114 | " Downloading trio-0.26.2-py3-none-any.whl.metadata (8.6 kB)\n", 115 | "Collecting trio-websocket~=0.9 (from selenium==4.23.1->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 116 | " Downloading trio_websocket-0.11.1-py3-none-any.whl.metadata (4.7 kB)\n", 117 | "Requirement already satisfied: websocket-client~=1.8 in /usr/local/lib/python3.10/dist-packages (from selenium==4.23.1->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (1.8.0)\n", 118 | "Collecting limits>=2.3 (from slowapi==0.1.9->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 119 | " Downloading limits-3.13.0-py3-none-any.whl.metadata (7.2 kB)\n", 120 | "Collecting h11>=0.8 (from uvicorn==0.30.1->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 121 | " Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)\n", 122 | "Collecting dnspython>=2.0.0 (from email_validator>=2.0.0->fastapi==0.111.0->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 123 | " Downloading dnspython-2.6.1-py3-none-any.whl.metadata (5.8 kB)\n", 124 | "Requirement already satisfied: typer>=0.12.3 in /usr/local/lib/python3.10/dist-packages (from fastapi-cli>=0.0.2->fastapi==0.111.0->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (0.12.5)\n", 125 | "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata>=6.8.0->litellm==1.40.17->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (3.20.1)\n", 126 | "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2>=2.11.2->fastapi==0.111.0->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (2.1.5)\n", 127 | "Collecting deprecated>=1.2 (from limits>=2.3->slowapi==0.1.9->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 128 | " Downloading Deprecated-1.2.14-py2.py3-none-any.whl.metadata (5.4 kB)\n", 129 | "Requirement already satisfied: importlib-resources>=1.3 in /usr/local/lib/python3.10/dist-packages (from limits>=2.3->slowapi==0.1.9->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (6.4.4)\n", 130 | "Requirement already satisfied: packaging<25,>=21 in /usr/local/lib/python3.10/dist-packages (from limits>=2.3->slowapi==0.1.9->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (24.1)\n", 131 | "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich==13.7.1->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (0.1.2)\n", 132 | "Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from openai>=1.27.0->litellm==1.40.17->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (1.7.0)\n", 133 | "Collecting jiter<1,>=0.4.0 (from openai>=1.27.0->litellm==1.40.17->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 134 | " Downloading jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)\n", 135 | "Requirement already satisfied: tqdm>4 in /usr/local/lib/python3.10/dist-packages (from openai>=1.27.0->litellm==1.40.17->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (4.66.5)\n", 136 | "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio->httpx==0.27.0->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (1.2.2)\n", 137 | "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken>=0.7.0->litellm==1.40.17->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (2024.5.15)\n", 138 | "Requirement already satisfied: sortedcontainers in /usr/local/lib/python3.10/dist-packages (from trio~=0.17->selenium==4.23.1->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (2.4.0)\n", 139 | "Collecting outcome (from trio~=0.17->selenium==4.23.1->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 140 | " Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)\n", 141 | "Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium==4.23.1->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 142 | " Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)\n", 143 | "Requirement already satisfied: pysocks!=1.5.7,<2.0,>=1.5.6 in /usr/local/lib/python3.10/dist-packages (from urllib3[socks]<3,>=1.26->selenium==4.23.1->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (1.7.1)\n", 144 | "Collecting httptools>=0.5.0 (from uvicorn[standard]>=0.12.0->fastapi==0.111.0->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 145 | " Downloading httptools-0.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)\n", 146 | "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from uvicorn[standard]>=0.12.0->fastapi==0.111.0->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (6.0.2)\n", 147 | "Collecting uvloop!=0.15.0,!=0.15.1,>=0.14.0 (from uvicorn[standard]>=0.12.0->fastapi==0.111.0->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 148 | " Downloading uvloop-0.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n", 149 | "Collecting watchfiles>=0.13 (from uvicorn[standard]>=0.12.0->fastapi==0.111.0->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 150 | " Downloading watchfiles-0.24.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n", 151 | "Collecting websockets>=10.4 (from uvicorn[standard]>=0.12.0->fastapi==0.111.0->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git)\n", 152 | " Downloading websockets-13.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n", 153 | "Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from tokenizers->litellm==1.40.17->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (0.24.6)\n", 154 | "Requirement already satisfied: wrapt<2,>=1.10 in /usr/local/lib/python3.10/dist-packages (from deprecated>=1.2->limits>=2.3->slowapi==0.1.9->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (1.16.0)\n", 155 | "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers->litellm==1.40.17->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (3.15.4)\n", 156 | "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers->litellm==1.40.17->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (2024.6.1)\n", 157 | "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from typer>=0.12.3->fastapi-cli>=0.0.2->fastapi==0.111.0->crawl4ai@ git+https://github.com/unclecode/crawl4ai.git) (1.5.4)\n", 158 | "Downloading aiohttp-3.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n", 159 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m29.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 160 | "\u001b[?25hDownloading aiosqlite-0.20.0-py3-none-any.whl (15 kB)\n", 161 | "Downloading fastapi-0.111.0-py3-none-any.whl (91 kB)\n", 162 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.0/92.0 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 163 | "\u001b[?25hDownloading httpx-0.27.0-py3-none-any.whl (75 kB)\n", 164 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 165 | "\u001b[?25hDownloading litellm-1.40.17-py3-none-any.whl (6.3 MB)\n", 166 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.3/6.3 MB\u001b[0m \u001b[31m53.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 167 | "\u001b[?25hDownloading pillow-10.3.0-cp310-cp310-manylinux_2_28_x86_64.whl (4.5 MB)\n", 168 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m50.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 169 | "\u001b[?25hDownloading pydantic-2.7.4-py3-none-any.whl (409 kB)\n", 170 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m409.0/409.0 kB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 171 | "\u001b[?25hDownloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n", 172 | "Downloading rich-13.7.1-py3-none-any.whl (240 kB)\n", 173 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m240.7/240.7 kB\u001b[0m \u001b[31m13.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 174 | "\u001b[?25hDownloading selenium-4.23.1-py3-none-any.whl (9.4 MB)\n", 175 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.4/9.4 MB\u001b[0m \u001b[31m51.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 176 | "\u001b[?25hDownloading slowapi-0.1.9-py3-none-any.whl (14 kB)\n", 177 | "Downloading uvicorn-0.30.1-py3-none-any.whl (62 kB)\n", 178 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.4/62.4 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 179 | "\u001b[?25hDownloading httpcore-1.0.5-py3-none-any.whl (77 kB)\n", 180 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 181 | "\u001b[?25hDownloading pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)\n", 182 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m39.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 183 | "\u001b[?25hDownloading email_validator-2.2.0-py3-none-any.whl (33 kB)\n", 184 | "Downloading fastapi_cli-0.0.5-py3-none-any.whl (9.5 kB)\n", 185 | "Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n", 186 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 187 | "\u001b[?25hDownloading limits-3.13.0-py3-none-any.whl (45 kB)\n", 188 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 189 | "\u001b[?25hDownloading openai-1.44.0-py3-none-any.whl (367 kB)\n", 190 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m367.8/367.8 kB\u001b[0m \u001b[31m23.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 191 | "\u001b[?25hDownloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (141 kB)\n", 192 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m141.9/141.9 kB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 193 | "\u001b[?25hDownloading python_multipart-0.0.9-py3-none-any.whl (22 kB)\n", 194 | "Downloading starlette-0.37.2-py3-none-any.whl (71 kB)\n", 195 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.9/71.9 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 196 | "\u001b[?25hDownloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)\n", 197 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m37.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 198 | "\u001b[?25hDownloading trio-0.26.2-py3-none-any.whl (475 kB)\n", 199 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m476.0/476.0 kB\u001b[0m \u001b[31m28.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 200 | "\u001b[?25hDownloading trio_websocket-0.11.1-py3-none-any.whl (17 kB)\n", 201 | "Downloading ujson-5.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (53 kB)\n", 202 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.6/53.6 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 203 | "\u001b[?25hDownloading ijson-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (114 kB)\n", 204 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.5/114.5 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 205 | "\u001b[?25hDownloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)\n", 206 | "Downloading dnspython-2.6.1-py3-none-any.whl (307 kB)\n", 207 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.7/307.7 kB\u001b[0m \u001b[31m19.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 208 | "\u001b[?25hDownloading httptools-0.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (341 kB)\n", 209 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.4/341.4 kB\u001b[0m \u001b[31m20.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 210 | "\u001b[?25hDownloading jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (318 kB)\n", 211 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m318.9/318.9 kB\u001b[0m \u001b[31m20.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 212 | "\u001b[?25hDownloading uvloop-0.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)\n", 213 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m67.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 214 | "\u001b[?25hDownloading watchfiles-0.24.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (425 kB)\n", 215 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m425.7/425.7 kB\u001b[0m \u001b[31m22.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 216 | "\u001b[?25hDownloading websockets-13.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (157 kB)\n", 217 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m157.3/157.3 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 218 | "\u001b[?25hDownloading wsproto-1.2.0-py3-none-any.whl (24 kB)\n", 219 | "Downloading outcome-1.3.0.post0-py2.py3-none-any.whl (10 kB)\n", 220 | "Building wheels for collected packages: crawl4ai, html2text\n", 221 | " Building wheel for crawl4ai (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 222 | " Created wheel for crawl4ai: filename=Crawl4AI-0.2.77-py3-none-any.whl size=47936 sha256=68a22f6742f978ce8d3726f1a5050821f75cbe3c8a232033f92c8c0b91a7fc41\n", 223 | " Stored in directory: /tmp/pip-ephem-wheel-cache-l_k1lpkm/wheels/99/72/3b/2fedf5c14c27671cb743e96a3af3f438545902bf19a3e5823f\n", 224 | " Building wheel for html2text (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 225 | " Created wheel for html2text: filename=html2text-2024.2.26-py3-none-any.whl size=33111 sha256=7fea630022d71adc3c75d1c98076f10b2ff1ac4d0536d8586a00b5e29b678020\n", 226 | " Stored in directory: /root/.cache/pip/wheels/f3/96/6d/a7eba8f80d31cbd188a2787b81514d82fc5ae6943c44777659\n", 227 | "Successfully built crawl4ai html2text\n", 228 | "Installing collected packages: ijson, websockets, uvloop, ujson, python-multipart, python-dotenv, pydantic-core, pillow, outcome, orjson, jiter, httptools, html2text, h11, dnspython, deprecated, aiosqlite, wsproto, watchfiles, uvicorn, trio, tiktoken, starlette, rich, pydantic, limits, httpcore, email_validator, aiohttp, trio-websocket, slowapi, httpx, selenium, openai, fastapi-cli, litellm, fastapi, crawl4ai\n", 229 | " Attempting uninstall: pydantic-core\n", 230 | " Found existing installation: pydantic_core 2.20.1\n", 231 | " Uninstalling pydantic_core-2.20.1:\n", 232 | " Successfully uninstalled pydantic_core-2.20.1\n", 233 | " Attempting uninstall: pillow\n", 234 | " Found existing installation: Pillow 9.4.0\n", 235 | " Uninstalling Pillow-9.4.0:\n", 236 | " Successfully uninstalled Pillow-9.4.0\n", 237 | " Attempting uninstall: rich\n", 238 | " Found existing installation: rich 13.8.0\n", 239 | " Uninstalling rich-13.8.0:\n", 240 | " Successfully uninstalled rich-13.8.0\n", 241 | " Attempting uninstall: pydantic\n", 242 | " Found existing installation: pydantic 2.8.2\n", 243 | " Uninstalling pydantic-2.8.2:\n", 244 | " Successfully uninstalled pydantic-2.8.2\n", 245 | " Attempting uninstall: aiohttp\n", 246 | " Found existing installation: aiohttp 3.10.5\n", 247 | " Uninstalling aiohttp-3.10.5:\n", 248 | " Successfully uninstalled aiohttp-3.10.5\n", 249 | "Successfully installed aiohttp-3.9.5 aiosqlite-0.20.0 crawl4ai-0.2.77 deprecated-1.2.14 dnspython-2.6.1 email_validator-2.2.0 fastapi-0.111.0 fastapi-cli-0.0.5 h11-0.14.0 html2text-2024.2.26 httpcore-1.0.5 httptools-0.6.1 httpx-0.27.0 ijson-3.3.0 jiter-0.5.0 limits-3.13.0 litellm-1.40.17 openai-1.44.0 orjson-3.10.7 outcome-1.3.0.post0 pillow-10.3.0 pydantic-2.7.4 pydantic-core-2.18.4 python-dotenv-1.0.1 python-multipart-0.0.9 rich-13.7.1 selenium-4.23.1 slowapi-0.1.9 starlette-0.37.2 tiktoken-0.7.0 trio-0.26.2 trio-websocket-0.11.1 ujson-5.10.0 uvicorn-0.30.1 uvloop-0.20.0 watchfiles-0.24.0 websockets-13.0.1 wsproto-1.2.0\n" 250 | ] 251 | }, 252 | { 253 | "output_type": "display_data", 254 | "data": { 255 | "application/vnd.colab-display-data+json": { 256 | "pip_warning": { 257 | "packages": [ 258 | "PIL" 259 | ] 260 | }, 261 | "id": "9be286f96740471b9548d70454e33157" 262 | } 263 | }, 264 | "metadata": {} 265 | } 266 | ], 267 | "source": [ 268 | "!pip install \"crawl4ai @ git+https://github.com/unclecode/crawl4ai.git\"" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "source": [ 274 | "from crawl4ai import WebCrawler" 275 | ], 276 | "metadata": { 277 | "id": "CC7THDDy-fOy" 278 | }, 279 | "execution_count": 1, 280 | "outputs": [] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "source": [ 285 | "# Create an instance of WebCrawler\n", 286 | "crawler = WebCrawler()" 287 | ], 288 | "metadata": { 289 | "colab": { 290 | "base_uri": "https://localhost:8080/" 291 | }, 292 | "id": "vwGzzA_a-88g", 293 | "outputId": "9ee16bd9-4439-4fda-b473-7f70fcb86796" 294 | }, 295 | "execution_count": 2, 296 | "outputs": [ 297 | { 298 | "output_type": "stream", 299 | "name": "stdout", 300 | "text": [ 301 | "[LOG] 🚀 Initializing LocalSeleniumCrawlerStrategy\n" 302 | ] 303 | } 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "source": [ 309 | "# Warm up the crawler (load necessary models)\n", 310 | "crawler.warmup()" 311 | ], 312 | "metadata": { 313 | "colab": { 314 | "base_uri": "https://localhost:8080/" 315 | }, 316 | "id": "DYWQyc-C-_v8", 317 | "outputId": "41544d56-6fa1-4a2d-9ec4-0693b68817ec" 318 | }, 319 | "execution_count": 3, 320 | "outputs": [ 321 | { 322 | "output_type": "stream", 323 | "name": "stdout", 324 | "text": [ 325 | "[LOG] 🌤️ Warming up the WebCrawler\n", 326 | "[LOG] 🌞 WebCrawler is ready to crawl\n" 327 | ] 328 | } 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "source": [ 334 | "# Run the crawler on a URL\n", 335 | "result = crawler.run(\n", 336 | " url = \"https://www.eu-startups.com/directory/\"\n", 337 | ")\n", 338 | "\n", 339 | "# Print the extracted content\n", 340 | "print(result.markdown)" 341 | ], 342 | "metadata": { 343 | "colab": { 344 | "base_uri": "https://localhost:8080/" 345 | }, 346 | "id": "p5K70HHj_FSU", 347 | "outputId": "1585a91f-9514-4972-c2b6-047c2959d8f5" 348 | }, 349 | "execution_count": 5, 350 | "outputs": [ 351 | { 352 | "output_type": "stream", 353 | "name": "stdout", 354 | "text": [ 355 | "[LOG] 🚀 Crawling done for https://www.eu-startups.com/directory/, success: True, time taken: 10.05967903137207 seconds\n", 356 | "[LOG] 🚀 Content extracted for https://www.eu-startups.com/directory/, success: True, time taken: 0.36144351959228516 seconds\n", 357 | "[LOG] 🔥 Extracting semantic blocks for https://www.eu-startups.com/directory/, Strategy: NoExtractionStrategy\n", 358 | "[LOG] 🚀 Extraction done for https://www.eu-startups.com/directory/, time taken: 0.3694131374359131 seconds.\n", 359 | "Join 81,000 Founders & Investors who already receive our weekly EU-Startups\n", 360 | "Newsletter!\n", 361 | "\n", 362 | "COUNT ME IN\n", 363 | "\n", 364 | "No, Thanks\n", 365 | "\n", 366 | "Sign in\n", 367 | "\n", 368 | " * Magazine\n", 369 | " * Interviews\n", 370 | " * Re-/Launch\n", 371 | " * Funding\n", 372 | " * Acquisitions\n", 373 | " * Other Stuff\n", 374 | " * Know-How\n", 375 | " * Summit\n", 376 | " * Event Overview\n", 377 | " * Get Tickets\n", 378 | " * Sponsor\n", 379 | " * Exhibit\n", 380 | " * Job Board\n", 381 | " * Job Board\n", 382 | " * Post a Job\n", 383 | " * Insights\n", 384 | " * Startup Sourcing\n", 385 | " * Premium Reports\n", 386 | " * Podcast\n", 387 | " * Database\n", 388 | " * Startup Database\n", 389 | " * Investor Database\n", 390 | " * About us\n", 391 | " * Mission / Team\n", 392 | " * Advertising\n", 393 | " * Our Newsletter\n", 394 | " * –CLUB–\n", 395 | " * Register Now\n", 396 | " * Log In\n", 397 | " * CLUB Benefits\n", 398 | "\n", 399 | "__\n", 400 | "\n", 401 | "Sign in\n", 402 | "\n", 403 | "Welcome!Log into your account\n", 404 | "\n", 405 | "your username\n", 406 | "\n", 407 | "your password\n", 408 | "\n", 409 | "Forgot your password?\n", 410 | "\n", 411 | "Password recovery\n", 412 | "\n", 413 | "Recover your password\n", 414 | "\n", 415 | "your email\n", 416 | "\n", 417 | "Search\n", 418 | "\n", 419 | "![](\\\\\"https://www.eu-startups.com/wp-content/uploads/2024/08/1-3.png\\\\\"/)\n", 420 | "\n", 421 | "![\\\\\"Logo\\\\\"](\\\\\"https://www.eu-startups.com/wp-\n", 422 | "content/uploads/2023/02/EU_Startups_Logo_Transparent_White-1.png\\\\\")\n", 423 | "\n", 424 | "![\\\\\"Logo\\\\\"](\\\\\"https://www.eu-startups.com/wp-\n", 425 | "content/uploads/2023/02/EU_Startups_Logo_Transparent_White-1.png\\\\\")\n", 426 | "\n", 427 | "![\\\\\"Logo\\\\\"](\\\\\"https://www.eu-startups.com/wp-content/uploads/2022/08/EU-\n", 428 | "Startups-Logo.png\\\\\")\n", 429 | "\n", 430 | "![](\\\\\"https://www.eu-startups.com/wp-content/uploads/2024/08/1-3.png\\\\\"/)\n", 431 | "\n", 432 | " * Magazine\n", 433 | "\n", 434 | " * Interviews\n", 435 | "\n", 436 | " * Re-/Launch\n", 437 | "\n", 438 | " * Funding\n", 439 | "\n", 440 | " * Acquisitions\n", 441 | "\n", 442 | " * Other Stuff\n", 443 | "\n", 444 | " * Know-How\n", 445 | "\n", 446 | " * Summit\n", 447 | "\n", 448 | " * Event Overview\n", 449 | "\n", 450 | " * Get Tickets\n", 451 | "\n", 452 | " * Sponsor\n", 453 | "\n", 454 | " * Exhibit\n", 455 | "\n", 456 | " * Job Board\n", 457 | "\n", 458 | " * Job Board\n", 459 | "\n", 460 | " * Post a Job\n", 461 | "\n", 462 | " * Insights\n", 463 | "\n", 464 | " * Startup Sourcing\n", 465 | "\n", 466 | " * Premium Reports\n", 467 | "\n", 468 | " * Podcast\n", 469 | "\n", 470 | " * Database\n", 471 | "\n", 472 | " * Startup Database\n", 473 | "\n", 474 | " * Investor Database\n", 475 | "\n", 476 | " * About us\n", 477 | "\n", 478 | " * Mission / Team\n", 479 | "\n", 480 | " * Advertising\n", 481 | "\n", 482 | " * Our Newsletter\n", 483 | "\n", 484 | " * –CLUB–\n", 485 | "\n", 486 | " * Register Now\n", 487 | "\n", 488 | " * Log In\n", 489 | "\n", 490 | " * CLUB Benefits\n", 491 | "\n", 492 | "Search\n", 493 | "\n", 494 | "![\\\\\"Logo\\\\\"](\\\\\"https://www.eu-startups.com/wp-content/uploads/2022/08/EU-\n", 495 | "Startups-Logo.png\\\\\")\n", 496 | "\n", 497 | " * Magazine\n", 498 | "\n", 499 | " * Interviews\n", 500 | "\n", 501 | " * Re-/Launch\n", 502 | "\n", 503 | " * Funding\n", 504 | "\n", 505 | " * Acquisitions\n", 506 | "\n", 507 | " * Other Stuff\n", 508 | "\n", 509 | " * Know-How\n", 510 | "\n", 511 | " * Summit\n", 512 | "\n", 513 | " * Event Overview\n", 514 | "\n", 515 | " * Get Tickets\n", 516 | "\n", 517 | " * Sponsor\n", 518 | "\n", 519 | " * Exhibit\n", 520 | "\n", 521 | " * Job Board\n", 522 | "\n", 523 | " * Job Board\n", 524 | "\n", 525 | " * Post a Job\n", 526 | "\n", 527 | " * Insights\n", 528 | "\n", 529 | " * Startup Sourcing\n", 530 | "\n", 531 | " * Premium Reports\n", 532 | "\n", 533 | " * Podcast\n", 534 | "\n", 535 | " * Database\n", 536 | "\n", 537 | " * Startup Database\n", 538 | "\n", 539 | " * Investor Database\n", 540 | "\n", 541 | " * About us\n", 542 | "\n", 543 | " * Mission / Team\n", 544 | "\n", 545 | " * Advertising\n", 546 | "\n", 547 | " * Our Newsletter\n", 548 | "\n", 549 | " * –CLUB–\n", 550 | "\n", 551 | " * Register Now\n", 552 | "\n", 553 | " * Log In\n", 554 | "\n", 555 | " * CLUB Benefits\n", 556 | "\n", 557 | "Search\n", 558 | "\n", 559 | "Home Startup Database\n", 560 | "\n", 561 | "# Startup Database\n", 562 | "\n", 563 | "Keywords:\n", 564 | "\n", 565 | "Advanced Search\n", 566 | "\n", 567 | "Add Listing\n", 568 | "\n", 569 | "Sorted by Country:\n", 570 | "\n", 571 | " * Austria (723) \n", 572 | " * Belgium (662) \n", 573 | " * Bulgaria (243) \n", 574 | " * Croatia (198) \n", 575 | " * Cyprus (271) \n", 576 | " * Czechia (299) \n", 577 | " * Denmark (756) \n", 578 | " * Estonia (722) \n", 579 | " * Finland (564) \n", 580 | " * France (2835) \n", 581 | " * Germany (4398) \n", 582 | " * Greece (276) \n", 583 | " * Hungary (315) \n", 584 | " * Ireland (825) \n", 585 | " * Italy (1414) \n", 586 | " * Latvia (242) \n", 587 | " * Lithuania (275) \n", 588 | " * Luxembourg (214) \n", 589 | " * Malta (146) \n", 590 | " * Netherlands (1762) \n", 591 | " * Norway (515) \n", 592 | " * Poland (699) \n", 593 | " * Portugal (589) \n", 594 | " * Romania (385) \n", 595 | " * Slovakia (162) \n", 596 | " * Slovenia (201) \n", 597 | " * Spain (2653) \n", 598 | " * Sweden (1028) \n", 599 | " * Switzerland (1068) \n", 600 | " * UK (6726) \n", 601 | "\n", 602 | "Sort By:Default Business Name Total Funding Founded\n", 603 | "\n", 604 | "### Infrafon GmbH\n", 605 | "\n", 606 | "![\\\\\"Infrafon](\\\\\"https://www.eu-startups.com/wp-\n", 607 | "content/uploads/2024/09/dmea-150x174.jpg\\\\\")\n", 608 | "\n", 609 | "Business Name:\n", 610 | "\n", 611 | "Infrafon GmbH\n", 612 | "\n", 613 | "Category:\n", 614 | "\n", 615 | "Germany\n", 616 | "\n", 617 | "Based in:\n", 618 | "\n", 619 | "Freiburg\n", 620 | "\n", 621 | "Tags:\n", 622 | "\n", 623 | "edge Digitalization, Security, Workmanagement\n", 624 | "\n", 625 | "Founded:\n", 626 | "\n", 627 | "2021\n", 628 | "\n", 629 | "### Cimphony\n", 630 | "\n", 631 | "![\\\\\"Cimphony\\\\\"](\\\\\"https://www.eu-startups.com/wp-\n", 632 | "content/uploads/2024/09/agNsxwT_400x400-150x150.jpg\\\\\")\n", 633 | "\n", 634 | "Business Name:\n", 635 | "\n", 636 | "Cimphony\n", 637 | "\n", 638 | "Category:\n", 639 | "\n", 640 | "Austria\n", 641 | "\n", 642 | "Based in:\n", 643 | "\n", 644 | "Thal\n", 645 | "\n", 646 | "Tags:\n", 647 | "\n", 648 | "SaaS, AI,Bussines\n", 649 | "\n", 650 | "Founded:\n", 651 | "\n", 652 | "2024\n", 653 | "\n", 654 | "### Phi Wallet\n", 655 | "\n", 656 | "![\\\\\"Phi](\\\\\"https://www.eu-startups.com/wp-\n", 657 | "content/uploads/2024/09/Screenshot-2024-09-03-at-12.31.24-150x88.png\\\\\")\n", 658 | "\n", 659 | "Business Name:\n", 660 | "\n", 661 | "Phi Wallet\n", 662 | "\n", 663 | "Category:\n", 664 | "\n", 665 | "Portugal\n", 666 | "\n", 667 | "Based in:\n", 668 | "\n", 669 | "Lisbon\n", 670 | "\n", 671 | "Tags:\n", 672 | "\n", 673 | "Financial Services\n", 674 | "\n", 675 | "Founded:\n", 676 | "\n", 677 | "2021\n", 678 | "\n", 679 | "### Calensync.live\n", 680 | "\n", 681 | "![\\\\\"Calensync.live\\\\\"](\\\\\"https://www.eu-startups.com/wp-\n", 682 | "content/uploads/2024/09/calensync_226548.png\\\\\")\n", 683 | "\n", 684 | "Business Name:\n", 685 | "\n", 686 | "Calensync.live\n", 687 | "\n", 688 | "Category:\n", 689 | "\n", 690 | "Estonia\n", 691 | "\n", 692 | "Based in:\n", 693 | "\n", 694 | "Tallinn, Estonia\n", 695 | "\n", 696 | "Tags:\n", 697 | "\n", 698 | "Entrepreneurs, freelancers, directors, contractors\n", 699 | "\n", 700 | "Founded:\n", 701 | "\n", 702 | "2023\n", 703 | "\n", 704 | "### VidToBlogs\n", 705 | "\n", 706 | "![\\\\\"VidToBlogs\\\\\"](\\\\\"https://www.eu-startups.com/wp-\n", 707 | "content/uploads/2024/09/mfQq88js-150x84.jpg\\\\\")\n", 708 | "\n", 709 | "Business Name:\n", 710 | "\n", 711 | "VidToBlogs\n", 712 | "\n", 713 | "Category:\n", 714 | "\n", 715 | "Estonia\n", 716 | "\n", 717 | "Based in:\n", 718 | "\n", 719 | "Tallinn\n", 720 | "\n", 721 | "Tags:\n", 722 | "\n", 723 | "Video, Blog, Text Generation,\n", 724 | "\n", 725 | "Founded:\n", 726 | "\n", 727 | "2024\n", 728 | "\n", 729 | "### WAPlus – WhatsApp CRM\n", 730 | "\n", 731 | "![\\\\\"WhatsApp](\\\\\"https://www.eu-startups.com/wp-\n", 732 | "content/uploads/2024/09/logo-1-150x150.png\\\\\")\n", 733 | "\n", 734 | "Business Name:\n", 735 | "\n", 736 | "WAPlus – WhatsApp CRM\n", 737 | "\n", 738 | "Category:\n", 739 | "\n", 740 | "Spain\n", 741 | "\n", 742 | "Based in:\n", 743 | "\n", 744 | "Barcelona\n", 745 | "\n", 746 | "Tags:\n", 747 | "\n", 748 | "Schedule Message, Auto Reply, CRM Integration, AI ChatBot, AI Translator\n", 749 | "\n", 750 | "Founded:\n", 751 | "\n", 752 | "2022\n", 753 | "\n", 754 | "### Bricks\n", 755 | "\n", 756 | "![\\\\\"Bricks\\\\\"](\\\\\"https://www.eu-startups.com/wp-\n", 757 | "content/uploads/2024/09/bricks-og-image-150x79.png\\\\\")\n", 758 | "\n", 759 | "Business Name:\n", 760 | "\n", 761 | "Bricks\n", 762 | "\n", 763 | "Category:\n", 764 | "\n", 765 | "UK\n", 766 | "\n", 767 | "Based in:\n", 768 | "\n", 769 | "London\n", 770 | "\n", 771 | "Tags:\n", 772 | "\n", 773 | "productivity, collaboration tools, spreadsheets, presentations, slide decks,\n", 774 | "docs, wiki\n", 775 | "\n", 776 | "Founded:\n", 777 | "\n", 778 | "2024\n", 779 | "\n", 780 | "### Ask Bart\n", 781 | "\n", 782 | "![\\\\\"Ask](\\\\\"https://www.eu-startups.com/wp-content/uploads/2024/09/Askbart-\n", 783 | "rectangular-logo-150x39.png\\\\\")\n", 784 | "\n", 785 | "Business Name:\n", 786 | "\n", 787 | "Ask Bart\n", 788 | "\n", 789 | "Category:\n", 790 | "\n", 791 | "UK\n", 792 | "\n", 793 | "Based in:\n", 794 | "\n", 795 | "Worthing\n", 796 | "\n", 797 | "Tags:\n", 798 | "\n", 799 | "care homes, health care\n", 800 | "\n", 801 | "Founded:\n", 802 | "\n", 803 | "2024\n", 804 | "\n", 805 | "### Emble\n", 806 | "\n", 807 | "![\\\\\"Emble\\\\\"](\\\\\"https://www.eu-startups.com/wp-\n", 808 | "content/uploads/2024/09/Emble-avatar-white-on-black-solid_-150x150.png\\\\\")\n", 809 | "\n", 810 | "Business Name:\n", 811 | "\n", 812 | "Emble\n", 813 | "\n", 814 | "Category:\n", 815 | "\n", 816 | "UK\n", 817 | "\n", 818 | "Based in:\n", 819 | "\n", 820 | "London, United Kingdom\n", 821 | "\n", 822 | "Tags:\n", 823 | "\n", 824 | "User Research, User Testing, Product Management, SaaS Tools,\n", 825 | "\n", 826 | "Founded:\n", 827 | "\n", 828 | "2024\n", 829 | "\n", 830 | "### TinkTide\n", 831 | "\n", 832 | "![\\\\\"TinkTide\\\\\"](\\\\\"https://www.eu-startups.com/wp-\n", 833 | "content/uploads/2024/09/workflow4-150x83.png\\\\\")\n", 834 | "\n", 835 | "Business Name:\n", 836 | "\n", 837 | "TinkTide\n", 838 | "\n", 839 | "Category:\n", 840 | "\n", 841 | "Croatia\n", 842 | "\n", 843 | "Based in:\n", 844 | "\n", 845 | "Zagreb\n", 846 | "\n", 847 | "Tags:\n", 848 | "\n", 849 | "entrepreneur, small business, startup, market research, business idea,\n", 850 | "business plan, pitch deck\n", 851 | "\n", 852 | "Founded:\n", 853 | "\n", 854 | "2024\n", 855 | "\n", 856 | "### Tatanka.nl\n", 857 | "\n", 858 | "![\\\\\"Tatanka.nl\\\\\"](\\\\\"https://www.eu-startups.com/wp-\n", 859 | "content/uploads/2024/09/logo-150x150.png\\\\\")\n", 860 | "\n", 861 | "Business Name:\n", 862 | "\n", 863 | "Tatanka.nl\n", 864 | "\n", 865 | "Category:\n", 866 | "\n", 867 | "Netherlands\n", 868 | "\n", 869 | "Based in:\n", 870 | "\n", 871 | "Amsterdam\n", 872 | "\n", 873 | "Tags:\n", 874 | "\n", 875 | "natural supplements, herbal supplements, mood enhancers, energy boosters,\n", 876 | "relaxation aids, responsible use\n", 877 | "\n", 878 | "Founded:\n", 879 | "\n", 880 | "2023\n", 881 | "\n", 882 | "Next →\n", 883 | "\n", 884 | "#### STARTUP NEWS – By Country\n", 885 | "\n", 886 | " * ![](\\\\\"/wp-content/uploads/icons/Austria-Startups.png\\\\\"/)Austria\n", 887 | " * ![](\\\\\"/wp-content/uploads/icons/Belgium-Startups.png\\\\\"/)Belgium\n", 888 | " * ![](\\\\\"/wp-content/uploads/icons/Bulgaria-Startups.png\\\\\"/)Bulgaria\n", 889 | " * ![](\\\\\"/wp-content/uploads/icons/Croatia-Startups.png\\\\\"/)Croatia\n", 890 | " * ![](\\\\\"/wp-content/uploads/icons/Cyprus-Startups.png\\\\\"/)Cyprus\n", 891 | " * ![](\\\\\"/wp-content/uploads/icons/Czechia-Startups.png\\\\\"/)Czechia\n", 892 | " * ![](\\\\\"/wp-content/uploads/icons/Denmark-Startups.png\\\\\"/)Denmark\n", 893 | " * ![](\\\\\"/wp-content/uploads/icons/Estonia-Startups.png\\\\\"/)Estonia\n", 894 | " * ![](\\\\\"/wp-content/uploads/icons/Finland-Startups.png\\\\\"/)Finland\n", 895 | " * ![](\\\\\"/wp-content/uploads/icons/France-Startups.png\\\\\"/)France\n", 896 | " * ![](\\\\\"/wp-content/uploads/icons/Germany-Startups.png\\\\\"/)Germany\n", 897 | " * ![](\\\\\"/wp-content/uploads/icons/Greece-Startups.png\\\\\"/)Greece\n", 898 | " * ![](\\\\\"/wp-content/uploads/icons/Hungary-Startups.png\\\\\"/)Hungary\n", 899 | " * ![](\\\\\"/wp-content/uploads/icons/Ireland-Startups.png\\\\\"/)Ireland\n", 900 | " * ![](\\\\\"/wp-content/uploads/icons/Italy-Startups.png\\\\\"/)Italy\n", 901 | " * ![](\\\\\"/wp-content/uploads/icons/Latvia-Startups.png\\\\\"/)Latvia\n", 902 | " * ![](\\\\\"/wp-content/uploads/icons/Lithuania-Startups.png\\\\\"/)Lithuania\n", 903 | " * ![](\\\\\"/wp-content/uploads/icons/Luxembourg-Startups.png\\\\\"/)Luxembourg\n", 904 | " * ![](\\\\\"/wp-content/uploads/icons/Malta-Startups.png\\\\\"/)Malta\n", 905 | " * ![](\\\\\"/wp-content/uploads/icons/Netherlands-Startups.png\\\\\"/)Netherlands\n", 906 | " * ![](\\\\\"/wp-content/uploads/icons/Norway-Startups.png\\\\\"/)Norway\n", 907 | " * ![](\\\\\"/wp-content/uploads/icons/Poland-Startups.png\\\\\"/)Poland\n", 908 | " * ![](\\\\\"/wp-content/uploads/icons/Portugal-Startups.png\\\\\"/)Portugal\n", 909 | " * ![](\\\\\"/wp-content/uploads/icons/Romania-Startups.png\\\\\"/)Romania\n", 910 | " * ![](\\\\\"/wp-content/uploads/icons/Slovakia-Startups.png\\\\\"/)Slovakia\n", 911 | " * ![](\\\\\"/wp-content/uploads/icons/Slovenia-Startups.png\\\\\"/)Slovenia\n", 912 | " * ![](\\\\\"/wp-content/uploads/icons/Spain-Startups.png\\\\\"/)Spain\n", 913 | " * ![](\\\\\"/wp-content/uploads/icons/Sweden-Startups.png\\\\\"/)Sweden\n", 914 | " * ![](\\\\\"/wp-content/uploads/icons/Switzerland-Startups.png\\\\\"/)Switzerland\n", 915 | " * ![](\\\\\"/wp-content/uploads/icons/UK-Startups.png\\\\\"/)UK\n", 916 | "\n", 917 | "#### Receive our weekly Newsletter\n", 918 | "\n", 919 | "Newsletter Form (#1)\n", 920 | "\n", 921 | "First Name\n", 922 | "\n", 923 | "Email\n", 924 | "\n", 925 | "Subscribe Now\n", 926 | "\n", 927 | "#### Our Channels\n", 928 | "\n", 929 | "![](\\\\\"https://www.eu-startups.com/wp-\n", 930 | "content/uploads/2024/06/LinkedIn-2.png\\\\\"/) ![](\\\\\"https://www.eu-\n", 931 | "startups.com/wp-\n", 932 | "content/uploads/2024/06/Facebook-1-1.png\\\\\"/)![](\\\\\"https://www.eu-\n", 933 | "startups.com/wp-\n", 934 | "content/uploads/2024/06/Instagram-1.png\\\\\"/)![](\\\\\"https://www.eu-\n", 935 | "startups.com/wp-\n", 936 | "content/uploads/2024/06/Newsletter-1.png\\\\\"/)![](\\\\\"https://www.eu-\n", 937 | "startups.com/wp-content/uploads/2024/06/Twitter-1.png\\\\\"/)\n", 938 | "\n", 939 | "#### Latest Jobs\n", 940 | "\n", 941 | "Event Manager Barcelona, Spain\n", 942 | "\n", 943 | "Data Engineer Watermael-Boitsfort, Belgium\n", 944 | "\n", 945 | "Innovatie Manager Amsterdam, Netherlands\n", 946 | "\n", 947 | "Traineeship Pensions Rotterdam, Netherlands\n", 948 | "\n", 949 | "Field Sales Representative Zürich, Switzerland\n", 950 | "\n", 951 | "View All\n", 952 | "\n", 953 | "#### Advertising\n", 954 | "\n", 955 | "![](\\\\\"https://www.eu-startups.com/wp-content/uploads/2023/05/Sourcing-\n", 956 | "square.png\\\\\"/)\n", 957 | "\n", 958 | "![\\\\\"Logo\\\\\"](\\\\\"https://www.eu-startups.com/wp-\n", 959 | "content/uploads/2023/02/EU_Startups_Logo_Transparent_White-1.png\\\\\")\n", 960 | "\n", 961 | "### ABOUT US\n", 962 | "\n", 963 | "EU-Startups.com is the leading online magazine about startups in Europe. Learn\n", 964 | "more about us and our advertising options.\n", 965 | "\n", 966 | "### FOLLOW US\n", 967 | "\n", 968 | "Facebook\n", 969 | "\n", 970 | "Instagram\n", 971 | "\n", 972 | "Linkedin\n", 973 | "\n", 974 | "RSS\n", 975 | "\n", 976 | "Twitter\n", 977 | "\n", 978 | "© Menlo Media S.L. - All rights reserved.\n", 979 | "\n", 980 | "Notifications\n", 981 | "\n", 982 | "\n" 983 | ] 984 | } 985 | ] 986 | }, 987 | { 988 | "cell_type": "code", 989 | "source": [], 990 | "metadata": { 991 | "id": "stieeRDh_PpG" 992 | }, 993 | "execution_count": null, 994 | "outputs": [] 995 | } 996 | ] 997 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 AI Anytime 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Crawl-Websites-to-Markdown 2 | Crawl Websites to Markdown. 3 | --------------------------------------------------------------------------------