├── docs ├── requirements.txt ├── asyncio.rst ├── websockets.rst ├── impersonate │ ├── _index.rst │ ├── fingerprint.rst │ ├── targets.rst │ ├── faq.rst │ ├── psk.rst │ └── customize.rst ├── pro.rst ├── Makefile ├── make.bat ├── conf.py ├── dev.rst ├── vs-requests.rst ├── community.rst ├── cookies.rst ├── advanced.rst ├── index.rst ├── changelog.rst ├── api.rst └── faq.rst ├── examples ├── scrapy_integration.py ├── requests_like.py ├── async_session.py ├── websockets │ ├── short_running.py │ ├── long_running_async.py │ └── long_running.py ├── curl_like.py ├── custom_response_class.py ├── upload.py ├── impersonate.py └── stream.py ├── curl_cffi ├── py.typed ├── __version__.py ├── requests │ ├── errors.py │ ├── __init__.py │ ├── exceptions.py │ └── models.py ├── utils.py ├── cli.py └── __init__.py ├── FUNDING.yml ├── assets ├── scrapfly.png ├── thordata.png ├── yescaptcha.png └── hypersolutions.png ├── MANIFEST.in ├── benchmark ├── requirements.txt ├── hardware.txt ├── single_worker.csv ├── server.py ├── multiple_workers.csv ├── ws_bench_1_server.py ├── benchmark.py ├── ws_bench_utils.py ├── README.md └── ws_bench_1_client.py ├── ffi ├── shim.h ├── shim.c └── cdef.c ├── tests ├── integration │ ├── test_real_world.py │ ├── test_response_class.py │ ├── test_fingerprints.py │ └── test_httpbin.py ├── unittest │ ├── test_cli.py │ ├── test_smoke.py │ ├── test_async.py │ ├── test_headers.py │ ├── test_cookies.py │ ├── test_websockets.py │ ├── test_upload.py │ ├── test_impersonate.py │ └── test_curl.py └── threads │ ├── test_eventlet.py │ └── test_gevent.py ├── .github ├── ISSUE_TEMPLATE │ ├── fingerprint_report.md │ ├── question.md │ ├── feature_request.md │ └── bug_report.md └── workflows │ └── build-and-test.yaml ├── scripts ├── download_curl.sh ├── bump_version.sh ├── generate_consts.py └── build.py ├── .gitignore ├── .readthedocs.yaml ├── setup.py ├── LICENSE ├── Makefile ├── libs.json └── pyproject.toml /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | -------------------------------------------------------------------------------- /examples/scrapy_integration.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /curl_cffi/py.typed: -------------------------------------------------------------------------------- 1 | # Marker file for PEP 561. -------------------------------------------------------------------------------- /docs/asyncio.rst: -------------------------------------------------------------------------------- 1 | Asyncio 2 | ======= 3 | 4 | TODO 5 | -------------------------------------------------------------------------------- /FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: lexiforest 2 | buy_me_a_coffee: yifei 3 | -------------------------------------------------------------------------------- /docs/websockets.rst: -------------------------------------------------------------------------------- 1 | WebSockets 2 | ========== 3 | 4 | TODO 5 | 6 | -------------------------------------------------------------------------------- /assets/scrapfly.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexiforest/curl_cffi/HEAD/assets/scrapfly.png -------------------------------------------------------------------------------- /assets/thordata.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexiforest/curl_cffi/HEAD/assets/thordata.png -------------------------------------------------------------------------------- /assets/yescaptcha.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexiforest/curl_cffi/HEAD/assets/yescaptcha.png -------------------------------------------------------------------------------- /assets/hypersolutions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexiforest/curl_cffi/HEAD/assets/hypersolutions.png -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include tests * 2 | 3 | include ffi/* 4 | include include/curl/* 5 | include scripts/build.py 6 | include Makefile 7 | include libs.json 8 | -------------------------------------------------------------------------------- /benchmark/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | starlette 3 | uvicorn 4 | requests 5 | httpx 6 | aiohttp 7 | pycurl 8 | tls-client 9 | gunicorn 10 | uvloop 11 | -------------------------------------------------------------------------------- /ffi/shim.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #define CURL_STATICLIB 5 | #include "curl/curl.h" 6 | 7 | int _curl_easy_setopt(void* curl, int option, void* param); 8 | -------------------------------------------------------------------------------- /tests/integration/test_real_world.py: -------------------------------------------------------------------------------- 1 | from curl_cffi import requests 2 | 3 | 4 | def test_post_with_no_body(): 5 | r = requests.post( 6 | "https://shopee.co.id/api/v2/authentication/get_active_login_page" 7 | ) 8 | assert r.status_code == 200 9 | -------------------------------------------------------------------------------- /curl_cffi/__version__.py: -------------------------------------------------------------------------------- 1 | from importlib import metadata 2 | 3 | from .curl import Curl 4 | 5 | __title__ = "curl_cffi" 6 | __description__ = metadata.metadata("curl_cffi")["Summary"] 7 | __version__ = metadata.version("curl_cffi") 8 | __curl_version__ = Curl().version().decode() 9 | -------------------------------------------------------------------------------- /curl_cffi/requests/errors.py: -------------------------------------------------------------------------------- 1 | # for compatibility with 0.5.x 2 | 3 | __all__ = ["CurlError", "RequestsError", "CookieConflict", "SessionClosed"] 4 | 5 | from ..curl import CurlError 6 | from .exceptions import CookieConflict, SessionClosed 7 | from .exceptions import RequestException as RequestsError 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/fingerprint_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Fingerprint report 3 | about: Report a new fingerprint not supported by curl_cffi 4 | title: "New Fingerprint: " 5 | labels: "" 6 | --- 7 | 8 | Please report in the [curl-impersonate](https://github.com/lexiforest/curl-impersonate) repo. 9 | 10 | -------------------------------------------------------------------------------- /benchmark/hardware.txt: -------------------------------------------------------------------------------- 1 | Benchmarks run on: 2 | 3 | macOS 4 | 5 | Processor Name: 6-Core Intel Core i5 6 | Processor Speed: 3.3 GHz 7 | Number of Processors: 1 8 | Total Number of Cores: 6 9 | L2 Cache (per Core): 256 KB 10 | L3 Cache: 12 MB 11 | Hyper-Threading Technology: Enabled 12 | Memory: 32 GB 13 | -------------------------------------------------------------------------------- /scripts/download_curl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | CURL_VERSION=curl-8_15_0 4 | 5 | curl -L https://github.com/curl/curl/archive/${CURL_VERSION}.zip -o curl.zip 6 | unzip -q -o curl.zip 7 | mv curl-${CURL_VERSION} ${CURL_VERSION} 8 | 9 | cd ${CURL_VERSION} 10 | 11 | patchfile=../../curl-impersonate/patches/curl.patch 12 | patch -p1 < $patchfile 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .pytest_cache 3 | Release 4 | *.pyc 5 | *.pyd 6 | *.so 7 | *.o 8 | **/__pycache__ 9 | .venv 10 | 11 | build/ 12 | dist/ 13 | wheelhouse/ 14 | curl.egg-info/ 15 | curl_cffi.egg-info/ 16 | # curl_cffi/const.py 17 | curl-*/ 18 | *.tar.xz 19 | *.tar.gz 20 | .preprocessed 21 | include/ 22 | .DS_Store 23 | 24 | .mypy_cache/ 25 | .ruff_cache/ 26 | .aider* 27 | -------------------------------------------------------------------------------- /scripts/bump_version.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | VERSION=$1 4 | UPSTREAM_VERSION=$2 5 | 6 | # Makefile 7 | gsed "s/^VERSION := .*/VERSION := ${UPSTREAM_VERSION}/g" -i Makefile 8 | 9 | # pyproject.toml 10 | gsed "s/^version = .*/version = \"${VERSION}\"/g" -i pyproject.toml 11 | 12 | # build.py 13 | gsed "s/^__version__ = .*/__version__ = \"${UPSTREAM_VERSION}\"/g" -i scripts/build.py 14 | -------------------------------------------------------------------------------- /curl_cffi/utils.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | 4 | class CurlCffiWarning(UserWarning, RuntimeWarning): 5 | pass 6 | 7 | 8 | def config_warnings(on: bool = False): 9 | if on: 10 | warnings.simplefilter("default", category=CurlCffiWarning) 11 | else: 12 | warnings.simplefilter("ignore", category=CurlCffiWarning) 13 | 14 | 15 | def is_pro(): 16 | return False 17 | -------------------------------------------------------------------------------- /tests/unittest/test_cli.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | 4 | def test_cli(server): 5 | """Test that the curl-cffi CLI can perform basic GET requests.""" 6 | result = subprocess.check_output( 7 | f"curl-cffi {server.url}", 8 | shell=True, 9 | text=True, 10 | timeout=30, 11 | ) 12 | # Should look like HTTP response: 13 | assert "Hello, world!" in result 14 | -------------------------------------------------------------------------------- /docs/impersonate/_index.rst: -------------------------------------------------------------------------------- 1 | Impersonate Guide 2 | ================= 3 | 4 | 5 | You probably come across ``curl_cffi`` for it's ability to impersonate browsers. Here is 6 | a tutorial on how to better impersonate using ``curl_cffi``. 7 | 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | :glob: 13 | 14 | fingerprint 15 | targets 16 | customize 17 | psk 18 | faq 19 | 20 | 21 | -------------------------------------------------------------------------------- /docs/pro.rst: -------------------------------------------------------------------------------- 1 | curl_cffi pro version 2 | ********************* 3 | 4 | We offer a pro version for professional users of curl_cffi. 5 | 6 | Feature Matrix 7 | ============== 8 | 9 | ============ ===== ======= ======== 10 | Feature http2 http3 Fingerprint updates 11 | ============ ===== ======= ======== 12 | Open Source ✅ ✅ Major ones 13 | Pro version False False Weekly 14 | ===== ===== ======= 15 | -------------------------------------------------------------------------------- /examples/requests_like.py: -------------------------------------------------------------------------------- 1 | import curl_cffi 2 | 3 | r = curl_cffi.get("https://tls.browserleaks.com/json") 4 | print("No impersonation", r.json()) 5 | 6 | 7 | r = curl_cffi.get("https://tls.browserleaks.com/json", impersonate="chrome101") 8 | print("With impersonation", r.json()) 9 | 10 | 11 | s = curl_cffi.Session(impersonate="chrome110") 12 | r = s.get("https://tls.browserleaks.com/json") 13 | print("With impersonation", r.json()) 14 | -------------------------------------------------------------------------------- /examples/async_session.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | import curl_cffi 4 | 5 | 6 | async def main(): 7 | async with curl_cffi.AsyncSession() as s: 8 | r = await s.get("https://httpbin.org/headers") 9 | print(r.text) 10 | 11 | r = await s.get("https://httpbin.org/stream/20", stream=True) 12 | async for chunk in r.aiter_content(): 13 | print(chunk) 14 | 15 | 16 | if __name__ == "__main__": 17 | asyncio.run(main()) 18 | -------------------------------------------------------------------------------- /examples/websockets/short_running.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | import curl_cffi 4 | 5 | URL = "ws://echo.websocket.events" 6 | 7 | 8 | ws = curl_cffi.WebSocket().connect(URL) 9 | ws.send(b"Foo") 10 | reply = ws.recv() 11 | print(reply) 12 | 13 | 14 | async def async_examples(): 15 | async with curl_cffi.AsyncSession() as s: 16 | ws = await s.ws_connect(URL) 17 | await ws.send(b"Bar") 18 | reply = await ws.recv() 19 | print(reply) 20 | 21 | 22 | asyncio.run(async_examples()) 23 | -------------------------------------------------------------------------------- /benchmark/single_worker.csv: -------------------------------------------------------------------------------- 1 | name,size,duration 2 | requests,1k,1.6857 3 | httpx_sync,1k,1.0149 4 | tls_client,1k,0.7551 5 | curl_cffi_sync,1k,0.6563 6 | curl_cffi_raw,1k,0.4655 7 | pycurl,1k,0.4502 8 | requests,20k,1.6896 9 | httpx_sync,20k,1.0015 10 | tls_client,20k,2.0465 11 | curl_cffi_sync,20k,0.6723 12 | curl_cffi_raw,20k,0.4764 13 | pycurl,20k,0.4569 14 | requests,200k,1.8377 15 | httpx_sync,200k,1.1493 16 | tls_client,200k,20.2690 17 | curl_cffi_sync,200k,1.5508 18 | curl_cffi_raw,200k,1.3124 19 | pycurl,200k,1.0680 20 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Required 2 | version: 2 3 | 4 | # Set the OS, Python version and other tools you might need 5 | build: 6 | os: ubuntu-22.04 7 | tools: 8 | python: "3.11" 9 | apt_packages: 10 | - build-essential 11 | - libtool 12 | jobs: 13 | pre_install: 14 | - make preprocess 15 | - python -m pip install -e . 16 | 17 | # Optionally declare the Python requirements required to build your docs 18 | python: 19 | install: 20 | - requirements: docs/requirements.txt 21 | 22 | sphinx: 23 | configuration: docs/conf.py 24 | 25 | formats: 26 | - pdf 27 | - epub 28 | -------------------------------------------------------------------------------- /examples/websockets/long_running_async.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | import curl_cffi 4 | 5 | 6 | async def main(): 7 | async with curl_cffi.AsyncSession() as s: 8 | ws = await s.ws_connect("wss://api.gemini.com/v1/marketdata/BTCUSD") 9 | print( 10 | "For websockets, you need to set $wss_proxy environment variable!\n" 11 | "$https_proxy will not work!" 12 | ) 13 | print(">>> Websocket open!") 14 | 15 | async for message in ws: 16 | print(message) 17 | 18 | print("<<< Websocket closed!") 19 | 20 | 21 | asyncio.run(main()) 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from wheel.bdist_wheel import bdist_wheel 3 | 4 | 5 | class bdist_wheel_abi3(bdist_wheel): 6 | def get_tag(self): 7 | python, abi, plat = super().get_tag() 8 | 9 | if python.startswith("cp"): 10 | # on CPython, our wheels are abi3 and compatible back to 3.9 11 | return "cp39", "abi3", plat 12 | 13 | return python, abi, plat 14 | 15 | 16 | setup( 17 | # this option is only valid in setup.py 18 | cffi_modules=["scripts/build.py:ffibuilder"], 19 | cmdclass={ 20 | "bdist_wheel": bdist_wheel_abi3, 21 | }, 22 | ) 23 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Ask a question 3 | about: Asking for how to use a feature or when you are not sure if it's a bug 4 | title: "" 5 | labels: question 6 | 7 | --- 8 | 9 | **The question** 10 | 11 | - What feature do you find confusing? 12 | - Which site does not work? And you don't have a clue. 13 | 14 | **Documentation suggesion** 15 | 16 | If the documentation is missing or confusing, add your suggestion here. 17 | 18 | **Versions** 19 | 20 | If it's related to a specific environment, paste your env info here. 21 | 22 | - OS: [e.g. linux x64] 23 | - curl_cffi version [e.g. 0.5.7] 24 | - `pip freeze` dump 25 | -------------------------------------------------------------------------------- /benchmark/server.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from starlette.applications import Starlette 4 | from starlette.responses import PlainTextResponse 5 | from starlette.routing import Route 6 | 7 | random_1k = os.urandom(1 * 1024) 8 | random_20k = os.urandom(20 * 1024) 9 | random_200k = os.urandom(200 * 1024) 10 | 11 | 12 | app = Starlette( 13 | routes=[ 14 | Route("/1k", lambda r: PlainTextResponse(random_1k)), 15 | Route("/20k", lambda r: PlainTextResponse(random_20k)), 16 | Route("/200k", lambda r: PlainTextResponse(random_200k)), 17 | ], 18 | ) 19 | 20 | # Run: 21 | # gunicorn benchmark.server:app -b 127.0.0.1:8000 -n benchmark -w 8 -k \ 22 | # uvicorn.workers.UvicornWorker 23 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea or feature for this project 4 | title: "" 5 | labels: enhancement 6 | 7 | --- 8 | 9 | **Is your feature request related to a problem? Please describe.** 10 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 11 | 12 | **Describe the solution you'd like** 13 | A clear and concise description of what you want to happen. 14 | 15 | **Describe alternatives you've considered** 16 | A clear and concise description of any alternative solutions or features you've considered. 17 | 18 | **Additional context** 19 | Add any other context or screenshots about the feature request here. 20 | -------------------------------------------------------------------------------- /tests/unittest/test_smoke.py: -------------------------------------------------------------------------------- 1 | # Simple smoke test to real world websites 2 | from curl_cffi import requests 3 | 4 | URLS = [ 5 | "https://www.google.com", 6 | "https://www.apple.com", 7 | ] 8 | 9 | 10 | def test_without_impersonate(): 11 | for url in URLS: 12 | r = requests.get(url) 13 | assert r.status_code == 200 14 | 15 | 16 | def test_with_impersonate(): 17 | for url in URLS: 18 | r = requests.get(url, impersonate="chrome") 19 | assert r.status_code == 200 20 | 21 | 22 | async def test_async(): 23 | async with requests.AsyncSession() as s: 24 | for url in URLS: 25 | r = await s.get(url) 26 | assert r.status_code == 200 27 | -------------------------------------------------------------------------------- /examples/curl_like.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | 3 | from curl_cffi import Curl, CurlOpt 4 | 5 | buffer = BytesIO() 6 | c = Curl() 7 | c.setopt(CurlOpt.CUSTOMREQUEST, b"GET") 8 | c.setopt(CurlOpt.URL, b"https://tls.browserleaks.com/json") 9 | c.setopt(CurlOpt.WRITEDATA, buffer) 10 | c.perform() 11 | body = buffer.getvalue() 12 | print("NO impersonate:") 13 | print(body.decode()) 14 | print("") 15 | 16 | 17 | buffer = BytesIO() 18 | c.setopt(CurlOpt.WRITEDATA, buffer) 19 | c.setopt(CurlOpt.URL, b"https://tls.browserleaks.com/json") 20 | c.impersonate("chrome110") 21 | c.setopt(CurlOpt.HTTPHEADER, [b"User-Agent: Curl/impersonate"]) 22 | c.perform() 23 | body = buffer.getvalue() 24 | print("with impersonate:") 25 | print(body.decode()) 26 | c.close() 27 | -------------------------------------------------------------------------------- /benchmark/multiple_workers.csv: -------------------------------------------------------------------------------- 1 | name,size,duration 2 | requests,1k,1.0432 3 | httpx_sync,1k,0.7141 4 | tls_client,1k,0.2622 5 | curl_cffi_sync,1k,0.3528 6 | curl_cffi_raw,1k,0.1410 7 | pycurl,1k,0.1293 8 | aiohttp,1k,0.2924 9 | httpx_async,1k,1.7600 10 | curl_cffi_async,1k,0.3095 11 | requests,20k,1.0526 12 | httpx_sync,20k,0.6814 13 | tls_client,20k,1.5532 14 | curl_cffi_sync,20k,0.3530 15 | curl_cffi_raw,20k,0.1350 16 | pycurl,20k,0.0941 17 | aiohttp,20k,0.2929 18 | httpx_async,20k,1.6954 19 | curl_cffi_async,20k,0.3355 20 | requests,200k,1.4353 21 | httpx_sync,200k,1.3627 22 | tls_client,200k,15.2174 23 | curl_cffi_sync,200k,1.3735 24 | curl_cffi_raw,200k,1.0463 25 | pycurl,200k,1.0445 26 | aiohttp,200k,0.4401 27 | httpx_async,200k,3.4437 28 | curl_cffi_async,200k,0.8381 29 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | dev: 16 | python -m http.server --dir build/html/ --bind 0.0.0.0 17 | 18 | .PHONY: dev help Makefile 19 | 20 | # Catch-all target: route all unknown targets to Sphinx using the new 21 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 22 | %: Makefile 23 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 24 | -------------------------------------------------------------------------------- /tests/unittest/test_async.py: -------------------------------------------------------------------------------- 1 | from curl_cffi import AsyncCurl, Curl, CurlOpt 2 | 3 | 4 | async def test_init(server): 5 | ac = AsyncCurl() # noqa F841 6 | 7 | 8 | async def test_add_handle(server): 9 | ac = AsyncCurl() 10 | c = Curl() 11 | c.setopt(CurlOpt.URL, "http://example.com") 12 | c.setopt(CurlOpt.WRITEFUNCTION, lambda x: len(x)) 13 | fut = ac.add_handle(c) 14 | await fut 15 | 16 | 17 | async def test_socket_action(server): 18 | ac = AsyncCurl() 19 | running = ac.socket_action(-1, 0) 20 | # assert running == 0 21 | c = Curl() 22 | c.setopt(CurlOpt.URL, "http://example.com") 23 | c.setopt(CurlOpt.WRITEFUNCTION, lambda x: len(x)) 24 | fut = ac.add_handle(c) 25 | await fut 26 | running = ac.socket_action(-1, 0) # noqa F841 27 | # assert running == 1 28 | 29 | 30 | async def test_process_data(server): ... 31 | -------------------------------------------------------------------------------- /tests/integration/test_response_class.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from curl_cffi import requests 3 | 4 | 5 | def test_default_response(): 6 | response = requests.get("http://example.com") 7 | assert type(response) is requests.Response 8 | print(response.status_code) 9 | 10 | 11 | class CustomResponse(requests.Response): 12 | @property 13 | def status(self): 14 | return self.status_code 15 | 16 | 17 | def test_custom_response(): 18 | session = requests.Session(response_class=CustomResponse) 19 | response = session.get("http://example.com") 20 | assert isinstance(response, CustomResponse) 21 | assert hasattr(response, "status") 22 | print(response.status) 23 | 24 | 25 | class WrongTypeResponse: 26 | pass 27 | 28 | 29 | def test_wrong_type_custom_response(): 30 | with pytest.raises(TypeError): 31 | requests.Session(response_class=WrongTypeResponse) 32 | -------------------------------------------------------------------------------- /tests/threads/test_eventlet.py: -------------------------------------------------------------------------------- 1 | import eventlet 2 | 3 | eventlet.monkey_patch() 4 | 5 | import threading # noqa: E402 6 | import time # noqa: E402 7 | 8 | from curl_cffi import requests # noqa: E402 9 | 10 | 11 | def delay(): 12 | requests.get("http://192.168.64.5:8080/delay/2", thread="eventlet") 13 | 14 | 15 | def delay_not_working(): 16 | requests.get("http://192.168.64.5:8080/delay/2") 17 | 18 | 19 | def test_gevent_parallel(fn): 20 | start = time.time() 21 | threads = [] 22 | for _ in range(6): 23 | t = threading.Thread(target=fn) 24 | threads.append(t) 25 | t.start() 26 | for t in threads: 27 | t.join() 28 | # if no thread, the time should be 12 29 | print(time.time() - start) 30 | # assert time.time() - start < 3 31 | 32 | 33 | if __name__ == "__main__": 34 | test_gevent_parallel(delay_not_working) 35 | test_gevent_parallel(delay) 36 | -------------------------------------------------------------------------------- /tests/threads/test_gevent.py: -------------------------------------------------------------------------------- 1 | from gevent import monkey 2 | 3 | monkey.patch_all() 4 | 5 | import threading # noqa: E402 6 | import time # noqa: E402 7 | 8 | from curl_cffi import requests # noqa: E402 9 | 10 | 11 | def delay(): 12 | requests.get("http://192.168.64.5:8080/delay/2", thread="gevent") 13 | 14 | 15 | def delay_not_working(): 16 | requests.get("http://192.168.64.5:8080/delay/2") 17 | 18 | 19 | def test_gevent_parallel(fn): 20 | start = time.time() 21 | threads = [] 22 | for _ in range(6): 23 | t = threading.Thread(target=fn) 24 | threads.append(t) 25 | t.start() 26 | for t in threads: 27 | t.join() 28 | # if no thread, the time should be 12 29 | print(time.time() - start) 30 | # assert time.time() - start < 3 31 | 32 | 33 | if __name__ == "__main__": 34 | test_gevent_parallel(delay_not_working) 35 | test_gevent_parallel(delay) 36 | -------------------------------------------------------------------------------- /examples/custom_response_class.py: -------------------------------------------------------------------------------- 1 | import curl_cffi 2 | from curl_cffi import Curl, CurlInfo 3 | from typing import cast 4 | 5 | 6 | class CustomResponse(curl_cffi.Response): 7 | def __init__( 8 | self, curl: Curl | None = None, request: curl_cffi.Request | None = None 9 | ): 10 | super().__init__(curl, request) 11 | self.local_port = cast(int, curl.getinfo(CurlInfo.LOCAL_PORT)) 12 | self.connect_time = cast(float, curl.getinfo(CurlInfo.CONNECT_TIME)) 13 | 14 | @property 15 | def status(self): 16 | return self.status_code 17 | 18 | def custom_method(self): 19 | return "this is a custom method" 20 | 21 | 22 | session = curl_cffi.Session(response_class=CustomResponse) 23 | response: CustomResponse = session.get("http://example.com") 24 | print(f"{response.status=}") 25 | print(response.custom_method()) 26 | print(f"{response.local_port=}") 27 | print(f"{response.connect_time=}") 28 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /curl_cffi/cli.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | import curl_cffi 4 | 5 | 6 | def main(): 7 | parser = argparse.ArgumentParser( 8 | prog="curl-cffi", 9 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 10 | description="A curl-like tool using curl-cffi with browser impersonation", 11 | ) 12 | parser.add_argument( 13 | "-i", 14 | "--impersonate", 15 | default="chrome", 16 | help="Browser to impersonate", 17 | ) 18 | parser.add_argument("urls", nargs="+", help="URLs to fetch") 19 | 20 | args = parser.parse_args() 21 | 22 | for url in args.urls: 23 | try: 24 | response = curl_cffi.requests.get(url, impersonate=args.impersonate) 25 | print(response.text) 26 | except Exception as e: 27 | print(f"Error fetching {url}: {e}", file=sys.stderr) 28 | sys.exit(1) 29 | 30 | 31 | if __name__ == "__main__": 32 | main() 33 | -------------------------------------------------------------------------------- /ffi/shim.c: -------------------------------------------------------------------------------- 1 | 2 | #include "shim.h" 3 | 4 | int _curl_easy_setopt(void* curl, int option, void* parameter) { 5 | // printf("****** hijack test begins: \n"); 6 | // int val = curl_easy_setopt(instance->curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0); 7 | // printf("****** hijack test ends. opt: %d, val: %d, result is: %d\n", CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0, val); 8 | // CURLoption opt_value = (CURLoption) option; 9 | // printf("option: %d, setopt parameter: %d\n", option, *(int*)parameter); 10 | // for integer options, we need to convert param from pointers to integers 11 | if (option < CURLOPTTYPE_OBJECTPOINT) { 12 | return (int)curl_easy_setopt(curl, (CURLoption)option, *(long*)parameter); 13 | } 14 | if (CURLOPTTYPE_OFF_T <= option && option < CURLOPTTYPE_BLOB) { 15 | return (int)curl_easy_setopt(curl, (CURLoption)option, *(curl_off_t*)parameter); 16 | } 17 | return (int)curl_easy_setopt(curl, (CURLoption)option, parameter); 18 | } 19 | -------------------------------------------------------------------------------- /examples/websockets/long_running.py: -------------------------------------------------------------------------------- 1 | from curl_cffi import WebSocket 2 | 3 | msg_count = 0 4 | 5 | 6 | def on_message(ws: WebSocket, message: str | bytes): 7 | global msg_count 8 | 9 | print("------------------------------------------------------") 10 | print(message) 11 | print("======================================================") 12 | 13 | msg_count += 1 14 | if msg_count >= 100: 15 | ws.close() 16 | 17 | 18 | def on_error(ws: WebSocket, error: Exception): 19 | print(error) 20 | 21 | 22 | def on_open(ws: WebSocket): 23 | print( 24 | "For websockets, you need to set $wss_proxy environment variable!\n" 25 | "$https_proxy will not work!" 26 | ) 27 | print(">>> Websocket open!") 28 | 29 | 30 | def on_close(ws: WebSocket, code: int, reason: str): 31 | print( 32 | f"<<< Websocket closed! code: {code}, reason: {reason}, clean: " 33 | f"{code in (1000, 1001)}" 34 | ) 35 | 36 | 37 | ws = WebSocket( 38 | on_open=on_open, 39 | on_close=on_close, 40 | on_message=on_message, 41 | on_error=on_error, 42 | ) 43 | ws.run_forever("wss://api.gemini.com/v1/marketdata/BTCUSD") 44 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | 4 | Copyright (c) 2018 multippt 5 | Copyright (c) 2022 curl_cffi developers 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | -------------------------------------------------------------------------------- /tests/integration/test_fingerprints.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from curl_cffi import requests 4 | 5 | JA3_URL = "https://tls.browserleaks.com/json" 6 | # Copied from my browser on macOS 7 | CHROME_JA3_HASH = "53ff64ddf993ca882b70e1c82af5da49" 8 | # Edge 101 is the same as Chrome 101 9 | EDGE_JA3_HASH = "53ff64ddf993ca882b70e1c82af5da49" 10 | # Same as safari 16.x 11 | SAFARI_JA3_HASH = "8468a1ef6cb71b13e1eef8eadf786f7d" 12 | 13 | 14 | def test_not_impersonate(): 15 | r = requests.get(JA3_URL) 16 | assert r.json()["ja3_hash"] != CHROME_JA3_HASH 17 | 18 | 19 | def test_impersonate(): 20 | r = requests.get(JA3_URL, impersonate="chrome101") 21 | assert r.json()["ja3_hash"] == CHROME_JA3_HASH 22 | 23 | 24 | def test_impersonate_edge(): 25 | r = requests.get(JA3_URL, impersonate="edge101") 26 | assert r.json()["ja3_hash"] == EDGE_JA3_HASH 27 | 28 | 29 | def test_impersonate_safari(): 30 | r = requests.get(JA3_URL, impersonate="safari15_5") 31 | assert r.json()["ja3_hash"] == SAFARI_JA3_HASH 32 | 33 | 34 | def test_impersonate_unknown(): 35 | with pytest.raises(requests.RequestsError, match="not supported"): 36 | requests.get(JA3_URL, impersonate="unknown") 37 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | import os 9 | import sys 10 | 11 | sys.path.insert(0, os.path.abspath("../")) 12 | 13 | project = "curl_cffi" 14 | copyright = "2023-2025, lexiforest" 15 | author = "lexiforest" 16 | 17 | # -- General configuration --------------------------------------------------- 18 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 19 | 20 | extensions = [ 21 | "sphinx.ext.todo", 22 | "sphinx.ext.viewcode", 23 | "sphinx.ext.autodoc", 24 | "sphinx.ext.napoleon", 25 | ] 26 | 27 | templates_path = ["_templates"] 28 | exclude_patterns = [] 29 | 30 | root_doc = "index" 31 | 32 | 33 | # -- Options for HTML output ------------------------------------------------- 34 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 35 | 36 | html_theme = "bizstyle" 37 | html_static_path = ["_static"] 38 | -------------------------------------------------------------------------------- /docs/dev.rst: -------------------------------------------------------------------------------- 1 | Development and contributing 2 | ============== 3 | 4 | This page documents how to compile curl-impersonate and curl-cffi from source. If binary 5 | package is not available on your platform, you may refer to this page for some inspirations. 6 | 7 | First, you need to check if there are libcurl-impersonate binaries for you platform. If 8 | so, you can simply download and install them. 9 | 10 | For now, a pre-compiled `libcurl-impersonate` is downloaded from github and built 11 | into a bdist wheel, which is a binary package format used by PyPI. However, the 12 | right way is to download curl and curl-impersonate sources on our side and compile 13 | them all together. 14 | 15 | macOS 16 | 17 | To install the local editable build: 18 | 19 | .. code-block:: shell 20 | 21 | # This is for using the libcurl-impersonate built by GitHub actions 22 | 23 | sudo mkdir /Users/runner 24 | sudo chmod 777 /Users/runner 25 | 26 | # Dependencies 27 | 28 | brew install libidn2 zstd 29 | 30 | # Then install 31 | 32 | pip install -e .[test] 33 | pip install -e .[dev] 34 | 35 | Contributing 36 | ------------ 37 | 38 | When opening PR, please do not use the ``main`` branch in your fork, otherwise I cannot 39 | add my modification, such as unittests. 40 | -------------------------------------------------------------------------------- /docs/vs-requests.rst: -------------------------------------------------------------------------------- 1 | Compatibility with requests 2 | *************************** 3 | 4 | Although we try our best to mimic the requests API, some functionality is not easy to implement and left out. 5 | Here are a list of known incompatibilities: 6 | 7 | - files API are slightly different, but more error-proof. 8 | - retries are not supported yet, tracked in [#24](https://github.com/lexiforest/curl_cffi/issues/24) 9 | - redirect history are not supported, tracked in [#82](https://github.com/lexiforest/curl_cffi/issues/82) 10 | - empty-domains cookies may lost during redirects, tracked in [#55](https://github.com/lexiforest/curl_cffi/issues/55) 11 | - response object can not be pickled. 12 | - The ``requests`` proxies dict is supported, but we prefer ``proxy=...``, unless you really use different proxies for http and https. 13 | - You can use use transports/adapters, instead, you can use ``curl_cffi`` as adapter for ``reuqests``. 14 | 15 | 16 | Transports and Adapters 17 | ======================= 18 | 19 | ``curl_cffi`` is deeply coupled with ``libcurl-impersonate``. Unlike ``requests`` or ``httpx``, 20 | There is no way to use a different networking library or mount different adapters. 21 | 22 | Alternatively, you can use ``curl-cffi`` as a requests adapter via `curl-adapter `_. 23 | In this way, you get the full functionality of requests. 24 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Report a bug in curl_cffi 4 | title: "" 5 | labels: bug 6 | 7 | --- 8 | 9 | 14 | 15 | Please check the following items and answer all the questions when reporting a bug, 16 | otherwise it will be closed immediately. 17 | 18 | - [ ] **This is NOT a site-related "bugs"**, e.g. some site blocks me when using ``curl_cffi``, 19 | UNLESS you have verified that the reason is imperfect impersonation. 20 | - [ ] A code snippet that can reproduce this bug will be provided, even if it's a one-liner. 21 | - [ ] Version and environment information will be pasted as below. 22 | 23 | **Describe the bug** 24 | A clear and concise description of what the bug is. 25 | 26 | **To Reproduce** 27 | ```py 28 | # Minimal reproducible code, like target websites, and request parameters, etc. 29 | ``` 30 | 31 | **Expected behavior** 32 | A clear and concise description of what you expected to happen. 33 | 34 | **Versions** 35 | - OS: [e.g. linux x64, Windows 7, macOS Sequoia] 36 | - curl_cffi version [e.g. 0.5.7, 0.7.3] 37 | - `pip freeze` dump 38 | 39 | **Additional context** 40 | - Which session are you using? async or sync? 41 | - If using async session, which loop implementation are you using? 42 | - If you have tried, does this work with other http clients, e.g. `requests`, `httpx` or real browsers. 43 | -------------------------------------------------------------------------------- /examples/upload.py: -------------------------------------------------------------------------------- 1 | """ 2 | We do not support requests.post(url, files=...), for 2 reasons. 3 | 4 | - Curl's mime struct need to be freed manually after each request. 5 | - requests' files parameter is quite a mess, it's just not worth it. 6 | 7 | You use the multipart instead, it's very simple and straightforward. 8 | """ 9 | 10 | import curl_cffi 11 | 12 | mp = curl_cffi.CurlMime() 13 | mp.addpart( 14 | name="image", # form field name 15 | content_type="image/png", # mime type 16 | filename="image.png", # filename seen by remote server 17 | local_path="./image.png", # local file to upload 18 | ) 19 | 20 | with open("./image.jpg", "rb") as file: 21 | data = file.read() 22 | 23 | # you can add multiple files under the same field name 24 | mp.addpart( 25 | name="image", 26 | content_type="image/jpg", 27 | filename="image.jpg", 28 | data=data, # note the difference vs above 29 | ) 30 | 31 | # from a list 32 | mp = curl_cffi.CurlMime.from_list( 33 | [ 34 | { 35 | "name": "text", 36 | "content_type": "text/plain", 37 | "filename": "test.txt", 38 | "local_path": "./test.txt", 39 | }, 40 | { 41 | "name": "foo", 42 | "content_type": "text/plain", 43 | "filename": "another.txt", 44 | "data": "bar", 45 | }, 46 | ] 47 | ) 48 | 49 | r = curl_cffi.post("https://httpbin.org/post", data={"foo": "bar"}, multipart=mp) 50 | print(r.json()) 51 | 52 | # close the form object, otherwise you have to wait for GC to recycle it. If you files 53 | # are too large, you may run out of memory quickly. 54 | mp.close() 55 | -------------------------------------------------------------------------------- /docs/community.rst: -------------------------------------------------------------------------------- 1 | Community 2 | ========= 3 | 4 | Scrapy integrations 5 | ------ 6 | 7 | If you are using scrapy, check out these middlewares: 8 | 9 | - [divtiply/scrapy-curl-cffi](https://github.com/divtiply/scrapy-curl-cffi) 10 | - [tieyongjie/scrapy-fingerprint](https://github.com/tieyongjie/scrapy-fingerprint) 11 | - [jxlil/scrapy-impersonate](https://github.com/jxlil/scrapy-impersonate) 12 | 13 | 14 | Using with eventlet/gevent 15 | ------ 16 | 17 | Just set ``thread`` to eventlet or gevent. 18 | 19 | .. code-block:: python 20 | 21 | from curl_cffi import requests 22 | 23 | s = requests.Session(thread="eventlet") 24 | s.get(url) 25 | 26 | 27 | As a urllib3/requests adapter 28 | ------ 29 | 30 | You can also use curl-cffi as a requests adapter via `curl-adapter `_. 31 | In this way, you get the full functionality of requests. 32 | 33 | .. code-block:: python 34 | 35 | import requests 36 | from curl_adapter import CurlCffiAdapter 37 | 38 | session = requests.Session() 39 | session.mount("http://", CurlCffiAdapter()) 40 | session.mount("https://", CurlCffiAdapter()) 41 | 42 | # just use requests session like you normally would 43 | session.get("https://example.com") 44 | 45 | 46 | As a httpx transport 47 | ------ 48 | 49 | You can also use curl-cffi as a httpx transport via `httpx-curl-cffi `_. 50 | With this, you get the full functionality of httpx. 51 | 52 | .. code-block:: python 53 | 54 | from httpx import Client, AsyncClient 55 | from httpx_curl_cffi import CurlTransport, AsyncCurlTransport, CurlOpt 56 | 57 | client = Client(transport=CurlTransport(impersonate="chrome", default_headers=True)) 58 | client.get("https://tools.scrapfly.io/api/fp/ja3") 59 | 60 | async_client = AsyncClient(transport=AsyncCurlTransport( 61 | impersonate="chrome", 62 | default_headers=True, 63 | # required for parallel requests, see curl_cffi issues below 64 | curl_options={CurlOpt.FRESH_CONNECT: True} 65 | )) 66 | 67 | 68 | -------------------------------------------------------------------------------- /docs/impersonate/fingerprint.rst: -------------------------------------------------------------------------------- 1 | What is TLS and http/2, http/3 fingerprinting? 2 | ---------------------------------------------- 3 | 4 | TLS and http/2 5 | ~~~~~~~~~~~~~~ 6 | 7 | TLS is the ``s`` in ``https``. ``https`` has been uniformly deployed across the world. 8 | There are many extension and cipher suites a implementation can choose to use. According to 9 | the RFC, there are many valid combinations. But in reality, browser vendors tend to use 10 | fixed combinations, and these combinations can be used identify if the request is from a 11 | certain browser or an automated script. The digest of this combination is called a TLS 12 | fingerprints. The most common digesting method is called `JA3`. 13 | 14 | Similar to TLS, there are a few settings in http/2 connection can be used to identify the 15 | source of a request. The most commonly used digesting method is proposed by Akamai, and called 16 | the Akamai http2 fingerprint. 17 | 18 | To learn the details of TLS and http2 fingerprinting, you can read these great articles from lwthiker: 19 | 20 | 1. https://lwthiker.com/networks/2022/06/17/tls-fingerprinting.html 21 | 2. https://lwthiker.com/networks/2022/06/17/http2-fingerprinting.html 22 | 23 | The format of JA3 and Akamai digest is briefly discussed below. 24 | 25 | http/3 26 | ~~~~~~ 27 | 28 | As of http/3, the newest version of http. Basically, it's http/2 reimplemented over QUIC, 29 | thus it can be fingerprinted in a similar way with http/2. 30 | 31 | Http3 fingerprints has not yet been publicly exploited and reported. But given the rapidly increasing 32 | marketshare of http/3(35% of internet traffic), it is expected that some strict WAF vendors have begun 33 | to utilize http/3 fingerprinting. 34 | 35 | It has also been noticed by many users, that, for a lot of sites, there is less or even none 36 | detection when using http/3. 37 | 38 | ``curl_cffi`` provides TLS and http/2 impersonation in the open source version. 39 | 40 | For http/3 impersonation and http/3 proxy support, please head over to `impersonate.pro `_ 41 | for the commercial version of ``curl_cffi``. 42 | 43 | -------------------------------------------------------------------------------- /curl_cffi/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "Curl", 3 | "AsyncCurl", 4 | "CurlMime", 5 | "CurlError", 6 | "CurlInfo", 7 | "CurlOpt", 8 | "CurlMOpt", 9 | "CurlECode", 10 | "CurlHttpVersion", 11 | "CurlSslVersion", 12 | "CurlWsFlag", 13 | "config_warnings", 14 | "ffi", 15 | "is_pro", 16 | "lib", 17 | "Session", 18 | "AsyncSession", 19 | "BrowserType", 20 | "BrowserTypeLiteral", 21 | "request", 22 | "head", 23 | "get", 24 | "post", 25 | "put", 26 | "patch", 27 | "delete", 28 | "options", 29 | "Cookies", 30 | "Headers", 31 | "Request", 32 | "Response", 33 | "AsyncWebSocket", 34 | "WebSocket", 35 | "WebSocketError", 36 | "WebSocketClosed", 37 | "WebSocketTimeout", 38 | "WsCloseCode", 39 | "ExtraFingerprints", 40 | "CookieTypes", 41 | "HeaderTypes", 42 | "ProxySpec", 43 | "exceptions", 44 | ] 45 | 46 | import _cffi_backend # noqa: F401 # required by _wrapper 47 | 48 | from .__version__ import __curl_version__, __description__, __title__, __version__ # noqa: F401 49 | 50 | # This line includes _wrapper.so into the wheel 51 | from ._wrapper import ffi, lib 52 | from .aio import AsyncCurl 53 | from .const import ( 54 | CurlECode, 55 | CurlHttpVersion, 56 | CurlInfo, 57 | CurlMOpt, 58 | CurlOpt, 59 | CurlSslVersion, 60 | CurlWsFlag, 61 | ) 62 | from .curl import Curl, CurlError, CurlMime 63 | 64 | from .requests import ( 65 | AsyncSession, 66 | AsyncWebSocket, 67 | BrowserType, 68 | BrowserTypeLiteral, 69 | Cookies, 70 | CookieTypes, 71 | ExtraFingerprints, 72 | Headers, 73 | HeaderTypes, 74 | ProxySpec, 75 | Request, 76 | Response, 77 | Session, 78 | WebSocket, 79 | WebSocketClosed, 80 | WebSocketError, 81 | WebSocketTimeout, 82 | WsCloseCode, 83 | delete, 84 | exceptions, 85 | get, 86 | head, 87 | options, 88 | patch, 89 | post, 90 | put, 91 | request, 92 | ) 93 | 94 | from .utils import config_warnings, is_pro 95 | 96 | config_warnings(on=False) 97 | -------------------------------------------------------------------------------- /examples/impersonate.py: -------------------------------------------------------------------------------- 1 | import curl_cffi 2 | 3 | # OKHTTP impersonatation examples 4 | # credits: https://github.com/bogdanfinn/tls-client/blob/master/profiles/contributed_custom_profiles.go 5 | 6 | url = "https://tls.browserleaks.com/json" 7 | 8 | okhttp4_android10_ja3 = ",".join( 9 | [ 10 | "771", 11 | "4865-4866-4867-49195-49196-52393-49199-49200-52392-49171-49172-156-157-47-53", 12 | "0-23-65281-10-11-35-16-5-13-51-45-43-21", 13 | "29-23-24", 14 | "0", 15 | ] 16 | ) 17 | 18 | okhttp4_android10_akamai = "4:16777216|16711681|0|m,p,a,s" 19 | 20 | extra_fp = { 21 | "tls_signature_algorithms": [ 22 | "ecdsa_secp256r1_sha256", 23 | "rsa_pss_rsae_sha256", 24 | "rsa_pkcs1_sha256", 25 | "ecdsa_secp384r1_sha384", 26 | "rsa_pss_rsae_sha384", 27 | "rsa_pkcs1_sha384", 28 | "rsa_pss_rsae_sha512", 29 | "rsa_pkcs1_sha512", 30 | "rsa_pkcs1_sha1", 31 | ] 32 | # other options: 33 | # tls_min_version: int = CurlSslVersion.TLSv1_2 34 | # tls_grease: bool = False 35 | # tls_permute_extensions: bool = False 36 | # tls_cert_compression: Literal["zlib", "brotli"] = "brotli" 37 | # tls_signature_algorithms: Optional[List[str]] = None 38 | # http2_stream_weight: int = 256 39 | # http2_stream_exclusive: int = 1 40 | # See requests/impersonate.py and tests/unittest/test_impersonate.py for more 41 | # examples 42 | } 43 | 44 | 45 | r = curl_cffi.get( 46 | url, ja3=okhttp4_android10_ja3, akamai=okhttp4_android10_akamai, extra_fp=extra_fp 47 | ) 48 | 49 | print(r.json()) 50 | 51 | 52 | # Special firefox extension 53 | 54 | 55 | # ruff: noqa: E501 56 | extra_fp = { 57 | "tls_delegated_credential": "ecdsa_secp256r1_sha256:ecdsa_secp384r1_sha384:ecdsa_secp521r1_sha512:ecdsa_sha1", 58 | "tls_record_size_limit": 4001, 59 | } 60 | 61 | # Note that the ja3 string also includes extensiion: 28 and 34 62 | # ruff: noqa: E501 63 | ja3 = "771,4865-4867-4866-49195-49199-52393-52392-49196-49200-49162-49161-49171-49172-156-157-47-53,0-23-65281-10-11-35-16-5-34-18-51-43-13-45-28-27-65037,4588-29-23-24-25-256-257,0" 64 | 65 | r = curl_cffi.get(url, ja3=ja3, extra_fp=extra_fp) 66 | print(r.json()) 67 | -------------------------------------------------------------------------------- /tests/unittest/test_headers.py: -------------------------------------------------------------------------------- 1 | from curl_cffi.requests import Headers 2 | from curl_cffi.requests.utils import update_header_line 3 | 4 | 5 | def test_headers(): 6 | headers = Headers() 7 | headers["foo"] = "bar" 8 | headers["foo"] = "baz" 9 | assert headers["foo"] == "baz" 10 | assert headers.get("foo") == "baz" 11 | assert headers.get("bar") is None 12 | assert headers 13 | 14 | 15 | def test_headers_none_value(): 16 | headers = Headers({"foo": None, "bar": ""}) 17 | assert headers.get("foo") is None 18 | assert headers["bar"] == "" 19 | 20 | 21 | def test_header_output(): 22 | headers = Headers({"X-Foo": "bar"}) 23 | header_list = headers.multi_items() 24 | assert header_list[0][0] == "X-Foo" 25 | 26 | 27 | def test_replace_header(): 28 | header_lines = [] 29 | update_header_line(header_lines, "content-type", "image/png") 30 | assert header_lines == ["content-type: image/png"] 31 | update_header_line(header_lines, "Content-Type", "application/json") 32 | assert header_lines == ["content-type: image/png"] 33 | update_header_line(header_lines, "Content-Type", "application/json", replace=True) 34 | assert header_lines == ["Content-Type: application/json"] 35 | update_header_line(header_lines, "Host", "example.com", replace=True) 36 | assert header_lines == ["Content-Type: application/json", "Host: example.com"] 37 | 38 | 39 | def test_none_headers(): 40 | """Allow using None to explictly remove headers""" 41 | headers = Headers({"Content-Type": None}) 42 | assert headers["content-type"] is None 43 | 44 | 45 | def test_wrapped_headers_preserve_encoding(): 46 | headers = Headers({"foo": "bar"}, encoding="utf-8") 47 | wrapped_headers = Headers(headers) 48 | assert wrapped_headers.encoding == "utf-8" 49 | 50 | 51 | def test_wrapped_empty_headers_preserve_encoding(): 52 | headers = Headers({}, encoding="utf-8") 53 | wrapped_headers = Headers(headers) 54 | assert wrapped_headers.encoding == "utf-8" 55 | 56 | 57 | def test_wrapped_headers_change_encoding(): 58 | headers = Headers({"foo": "bar"}, encoding="utf-8") 59 | wrapped_headers = Headers(headers, encoding="ascii") 60 | assert wrapped_headers.encoding == "ascii" 61 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .ONESHELL: 2 | SHELL := bash 3 | 4 | # this is the upstream libcurl-impersonate version 5 | VERSION := 1.2.5 6 | CURL_VERSION := curl-8_15_0 7 | 8 | $(CURL_VERSION): 9 | curl -L https://github.com/curl/curl/archive/$(CURL_VERSION).zip -o curl.zip 10 | unzip -q -o curl.zip 11 | mv curl-$(CURL_VERSION) $(CURL_VERSION) 12 | 13 | curl-impersonate-$(VERSION)/patches: $(CURL_VERSION) 14 | curl -L "https://github.com/lexiforest/curl-impersonate/archive/refs/tags/v$(VERSION).tar.gz" \ 15 | -o "curl-impersonate-$(VERSION).tar.gz" 16 | tar -xf curl-impersonate-$(VERSION).tar.gz 17 | 18 | .preprocessed: curl-impersonate-$(VERSION)/patches 19 | cd $(CURL_VERSION) 20 | # for p in $`_. 5 | 6 | Browser versions will be added **only** when their fingerprints change. If you see a version, e.g. 7 | ``chrome122``, was skipped, you can simply impersonate it with your own headers and the previous version. 8 | 9 | If you are too busy to look up those details, you can try our commercial version at `impersonate.pro `_, 10 | which has a weekly updated list of browser profiles and even more browser types. 11 | 12 | If you are trying to impersonate a target other than a browser, use ``ja3=...``, ``akamai=...`` and ``extra_fp=...`` 13 | to specify your own customized fingerprints. See below for details. 14 | 15 | - chrome99 16 | - chrome100 17 | - chrome101 18 | - chrome104 19 | - chrome107 20 | - chrome110 21 | - chrome116 :sup:`1` 22 | - chrome119 :sup:`1` 23 | - chrome120 :sup:`1` 24 | - chrome123 :sup:`3` 25 | - chrome124 :sup:`3` 26 | - chrome131 :sup:`4` 27 | - chrome133a :sup:`5` :sup:`6` 28 | - chrome136 :sup:`7` 29 | - chrome99_android 30 | - chrome131_android :sup:`4` 31 | - edge99 32 | - edge101 33 | - safari153 :sup:`2` 34 | - safari155 :sup:`2` 35 | - safari170 :sup:`1` 36 | - safari172_ios :sup:`1` 37 | - safari180 :sup:`4` 38 | - safari180_ios :sup:`4` 39 | - safari184 :sup:`7` 40 | - safari184_ios :sup:`7` 41 | - safari260 :sup:`8` 42 | - safari260_ios :sup:`8` 43 | - firefox133 :sup:`5` 44 | - tor145 :sup:`7` 45 | 46 | Notes: 47 | 48 | 1. Added in version ``0.6.0``. 49 | 2. Fixed in version ``0.6.0``, previous http2 fingerprints were `not correct `_. 50 | 3. Added in version ``0.7.0``. 51 | 4. Added in version ``0.8.0``. 52 | 5. Added in version ``0.9.0``. 53 | 6. The version postfix ``-a``(e.g. ``chrome133a``) means that this is an alternative version, i.e. the fingerprint has not been officially updated by browser, but has been observed because of A/B testing. 54 | 7. Added in version ``0.11.0`` 55 | 8. Added in version ``0.12.0`` 56 | 57 | Which target version to use? 58 | ---------------------------- 59 | 60 | Generally speaking, you should use the latest Chrome or Safari versions. As of v0.11, they're 61 | ``chrome136``, ``safari184`` and ``safari184_ios``. To always impersonate the latest available 62 | browser versions, you can simply use ``chrome``, ``firefox``, ``safari`` and ``chrome_android``, ``safari_ios``. 63 | 64 | .. code-block:: python 65 | 66 | import curl_cffi 67 | 68 | curl_cffi.get(url, impersonate="chrome") 69 | 70 | 71 | Tips: 72 | 73 | iOS has restrictions on WebView and TLS libs, so ``safari_*_ios`` should work for a lot of apps. 74 | If you encountered an android app with custom fingerprints, you can try the ``safari_ios`` 75 | fingerprints, given that this app should have an iOS version. 76 | -------------------------------------------------------------------------------- /benchmark/ws_bench_1_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Websocket server example - TLS (WSS) 4 | """ 5 | 6 | from asyncio import ( 7 | FIRST_COMPLETED, 8 | AbstractEventLoop, 9 | CancelledError, 10 | Task, 11 | get_running_loop, 12 | wait, 13 | ) 14 | 15 | from aiohttp import web 16 | from ws_bench_utils import binary_data_generator, config, get_loop, logger 17 | 18 | 19 | async def recv(ws: web.WebSocketResponse) -> None: 20 | """Just receive the data in a tight loop. Do nothing else. 21 | 22 | Args: 23 | ws (web.WebSocketResponse): The WebSocket server object. 24 | """ 25 | async for msg in ws: 26 | if msg.type == web.WSMsgType.BINARY: 27 | continue 28 | 29 | if msg.type == web.WSMsgType.ERROR: 30 | break 31 | 32 | 33 | async def send(ws: web.WebSocketResponse) -> None: 34 | """Send the generated chunks until total size is hit. 35 | 36 | Args: 37 | ws (web.WebSocketResponse): The WebSocket server object. 38 | """ 39 | try: 40 | async for binary_data in binary_data_generator( 41 | config.total_gb, config.chunk_size 42 | ): 43 | await ws.send_bytes(binary_data) 44 | except ConnectionError as exc: 45 | logger.warning(exc) 46 | 47 | 48 | async def ws_handler(request: web.Request) -> web.WebSocketResponse: 49 | """Handle the connection and run the relevant server action, send/recv/both. 50 | 51 | Args: 52 | request (web.Request): Web request object. 53 | 54 | Returns: 55 | web.WebSocketResponse: Response object 56 | """ 57 | loop: AbstractEventLoop = get_running_loop() 58 | waiters: set[Task[None]] = set() 59 | ws: web.WebSocketResponse = web.WebSocketResponse() 60 | _ = await ws.prepare(request) 61 | logger.info("Secure client connected.") 62 | 63 | try: 64 | # NOTE: Uncomment for send/recv or both concurrently 65 | # waiters.add(loop.create_task(recv(ws))) 66 | waiters.add(loop.create_task(send(ws))) 67 | 68 | _, _ = await wait(waiters, return_when=FIRST_COMPLETED) 69 | 70 | except Exception: 71 | logger.exception("Connection closed with exception") 72 | 73 | finally: 74 | for wait_task in waiters: 75 | try: 76 | if not wait_task.done(): 77 | _ = wait_task.cancel() 78 | await wait_task 79 | except CancelledError: 80 | ... 81 | 82 | logger.info("Client disconnected.") 83 | return ws 84 | 85 | 86 | def main() -> None: 87 | """Entrypoint""" 88 | 89 | # Create and start the aiohttp server 90 | app: web.Application = web.Application() 91 | _ = app.add_routes(routes=[web.get("/ws", ws_handler)]) 92 | logger.info("Starting server on %s", config.srv_path) 93 | web.run_app( 94 | app, 95 | host=config.srv_host.exploded, 96 | port=config.srv_port, 97 | loop=get_loop(), 98 | ssl_context=config.ssl_ctx, 99 | access_log=logger, 100 | print=logger.debug, 101 | ) 102 | 103 | 104 | if __name__ == "__main__": 105 | main() 106 | -------------------------------------------------------------------------------- /ffi/cdef.c: -------------------------------------------------------------------------------- 1 | // easy interfaces 2 | void *curl_easy_init(); 3 | int _curl_easy_setopt(void *curl, int option, void *param); 4 | int curl_easy_getinfo(void *curl, int option, void *ret); 5 | int curl_easy_perform(void *curl); 6 | void curl_easy_cleanup(void *curl); 7 | void curl_easy_reset(void *curl); 8 | int curl_easy_impersonate(void *curl, char *target, int default_headers); 9 | void *curl_easy_duphandle(void *curl); 10 | int curl_easy_upkeep(void *curl); 11 | 12 | char *curl_version(); 13 | 14 | // slist interfaces 15 | struct curl_slist { 16 | char *data; 17 | struct curl_slist *next; 18 | }; 19 | struct curl_slist *curl_slist_append(struct curl_slist *list, char *string); 20 | void curl_slist_free_all(struct curl_slist *list); 21 | 22 | // callbacks 23 | extern "Python" size_t buffer_callback(void *ptr, size_t size, size_t nmemb, void *userdata); 24 | extern "Python" size_t write_callback(void *ptr, size_t size, size_t nmemb, void *userdata); 25 | extern "Python" int debug_function(void *curl, int type, char *data, size_t size, void *clientp); 26 | 27 | // multi interfaces 28 | struct CURLMsg { 29 | int msg; /* what this message means */ 30 | void *easy_handle; /* the handle it concerns */ 31 | union { 32 | void *whatever; /* message-specific data */ 33 | int result; /* return code for transfer */ 34 | } data; 35 | }; 36 | void *curl_multi_init(); 37 | int curl_multi_cleanup(void *curlm); 38 | int curl_multi_add_handle(void *curlm, void *curl); 39 | int curl_multi_remove_handle(void *curlm, void *curl); 40 | int curl_multi_socket_action(void *curlm, int sockfd, int ev_bitmask, int *running_handle); 41 | int curl_multi_setopt(void *curlm, int option, void* param); 42 | int curl_multi_assign(void *curlm, int sockfd, void *sockptr); 43 | int curl_multi_perform(void *curlm, int *running_handle); 44 | int curl_multi_timeout(void *curlm, long *timeout_ms); 45 | int curl_multi_wait(void *curlm, void *extra_fds, unsigned int extra_nfds, int timeout_ms, int *numfds); 46 | int curl_multi_poll(void *curlm, void *extra_fds, unsigned int extra_nfds, int timeout_ms, int *numfds); 47 | int curl_multi_wakeup(void *curlm); 48 | const char *curl_multi_strerror(int code); 49 | struct CURLMsg *curl_multi_info_read(void* curlm, int *msg_in_queue); 50 | 51 | // multi callbacks 52 | extern "Python" int socket_function(void *curl, int sockfd, int what, void *clientp, void *socketp); 53 | extern "Python" int timer_function(void *curlm, int timeout_ms, void *clientp); 54 | 55 | // websocket 56 | struct curl_ws_frame { 57 | int age; /* zero */ 58 | int flags; /* See the CURLWS_* defines */ 59 | uint64_t offset; /* the offset of this data into the frame */ 60 | uint64_t bytesleft; /* number of pending bytes left of the payload */ 61 | size_t len; 62 | ...; 63 | }; 64 | 65 | int curl_ws_recv(void *curl, void *buffer, size_t buflen, size_t *recv, const struct curl_ws_frame **meta); 66 | int curl_ws_send(void *curl, const void *buffer, size_t buflen, size_t *sent, int fragsize, unsigned int sendflags); 67 | 68 | // mime 69 | void *curl_mime_init(void* curl); // -> form 70 | void *curl_mime_addpart(void *form); // -> part/field 71 | int curl_mime_name(void *field, char *name); 72 | int curl_mime_data(void *field, char *name, int datasize); 73 | int curl_mime_type(void *field, char *type); 74 | int curl_mime_filename(void *field, char *filename); 75 | int curl_mime_filedata(void *field, char *filename); 76 | void curl_mime_free(void *form); 77 | -------------------------------------------------------------------------------- /tests/unittest/test_websockets.py: -------------------------------------------------------------------------------- 1 | from curl_cffi.requests import AsyncSession, WebSocket, Session 2 | from curl_cffi.requests.websockets import CurlWsFlag 3 | 4 | 5 | def test_websocket(ws_server): 6 | ws = WebSocket() 7 | ws.connect(ws_server.url) 8 | 9 | # deprecated 10 | with Session() as s: 11 | s.ws_connect(ws_server.url) 12 | 13 | 14 | def test_hello(ws_server): 15 | ws = WebSocket() 16 | ws.connect(ws_server.url) 17 | ws.send(b"Foo me once") 18 | content, _ = ws.recv() 19 | assert content == b"Foo me once" 20 | 21 | # deprecated 22 | with Session() as s: 23 | ws = s.ws_connect(ws_server.url) 24 | ws.send(b"Foo me once") 25 | content, _ = ws.recv() 26 | assert content == b"Foo me once" 27 | 28 | 29 | def test_hello_twice(ws_server): 30 | ws = WebSocket() 31 | ws.connect(ws_server.url) 32 | 33 | ws.send(b"Bar") 34 | reply, _ = ws.recv() 35 | 36 | for _ in range(10): 37 | ws.send_str("Bar") 38 | reply = ws.recv_str() 39 | assert reply == "Bar" 40 | 41 | with Session() as s: 42 | ws = s.ws_connect(ws_server.url) 43 | ws.send(b"Foo me once") 44 | content, _ = ws.recv() 45 | assert content == b"Foo me once" 46 | 47 | 48 | def test_receive_large_messages(ws_server): 49 | ws = WebSocket() 50 | ws.connect(ws_server.url) 51 | for _ in range(10): 52 | ws.send("*" * 10000) 53 | for _ in range(10): 54 | buffer, _ = ws.recv() 55 | assert len(buffer) == 10000 56 | ws.close() 57 | 58 | 59 | def test_receive_large_messages_run_forever(ws_server): 60 | def on_open(ws: WebSocket): 61 | ws.send("*" * 10000) 62 | 63 | chunk_counter = 0 64 | 65 | def on_data(ws: WebSocket, data, frame): 66 | nonlocal chunk_counter 67 | if frame.flags & CurlWsFlag.CLOSE: 68 | return 69 | chunk_counter += 1 70 | 71 | message = "" 72 | 73 | def on_message(ws: WebSocket, msg): 74 | nonlocal message 75 | message = msg 76 | # Gracefully close the connection to exit the run_forever loop 77 | ws.send("", CurlWsFlag.CLOSE) 78 | 79 | ws = WebSocket( 80 | on_open=on_open, 81 | on_data=on_data, 82 | on_message=on_message, 83 | ) 84 | ws.run_forever(ws_server.url) 85 | 86 | assert chunk_counter >= 1 87 | assert len(message) == 10000 88 | 89 | 90 | def test_on_data_callback(ws_server): 91 | on_data_called = False 92 | 93 | def on_data(ws: WebSocket, data, frame): 94 | nonlocal on_data_called 95 | on_data_called = True 96 | 97 | ws = WebSocket(on_data=on_data) 98 | ws.connect(ws_server.url) 99 | 100 | ws.send("Hello") 101 | ws.recv() 102 | assert on_data_called is False 103 | ws.close() 104 | 105 | 106 | async def test_hello_twice_async(ws_server): 107 | ws = None 108 | async with AsyncSession() as s: 109 | try: 110 | ws = await s.ws_connect(ws_server.url) 111 | await ws.send(b"Bar") 112 | reply, _ = await ws.recv() 113 | 114 | for _ in range(10): 115 | await ws.send_str("Bar") 116 | reply = await ws.recv_str() 117 | assert reply == "Bar" 118 | finally: 119 | if ws: 120 | await ws.close() 121 | -------------------------------------------------------------------------------- /tests/unittest/test_upload.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | from curl_cffi import CurlMime, requests 5 | 6 | ASSET_FOLDER = Path(__file__).parent.parent.parent / "assets" 7 | 8 | 9 | def test_upload_single_file(file_server): 10 | multipart = CurlMime.from_list( 11 | [ 12 | { 13 | "name": "image", 14 | "content_type": "image/jpg", 15 | "filename": "scrapfly.png", 16 | "local_path": str(ASSET_FOLDER / "scrapfly.png"), 17 | }, 18 | ] 19 | ) 20 | 21 | r = requests.post(file_server.url + "/file", multipart=multipart) 22 | data = r.json() 23 | assert data["filename"] == "scrapfly.png" 24 | assert data["content_type"] == "image/jpg" 25 | assert data["size"] == os.path.getsize(ASSET_FOLDER / "scrapfly.png") 26 | multipart.close() 27 | 28 | 29 | def test_upload_with_text_fields(file_server): 30 | multipart = CurlMime.from_list( 31 | [ 32 | { 33 | "name": "image", 34 | "content_type": "image/jpg", 35 | "filename": "scrapfly.png", 36 | "local_path": str(ASSET_FOLDER / "scrapfly.png"), 37 | }, 38 | {"name": "foo", "data": b"bar"}, 39 | ] 40 | ) 41 | 42 | r = requests.post( 43 | file_server.url + "/file", data={"foo": "bar"}, multipart=multipart 44 | ) 45 | data = r.json() 46 | assert data["filename"] == "scrapfly.png" 47 | assert data["content_type"] == "image/jpg" 48 | assert data["size"] == os.path.getsize(ASSET_FOLDER / "scrapfly.png") 49 | assert data["foo"] == "bar" 50 | multipart.close() 51 | 52 | 53 | def test_upload_multiple_files(file_server): 54 | multipart = CurlMime.from_list( 55 | [ 56 | { 57 | "name": "images", 58 | "content_type": "image/jpg", 59 | "filename": "scrapfly.png", 60 | "local_path": str(ASSET_FOLDER / "scrapfly.png"), 61 | }, 62 | { 63 | "name": "images", 64 | "content_type": "image/jpg", 65 | "filename": "scrapfly.png", 66 | "local_path": str(ASSET_FOLDER / "scrapfly.png"), 67 | }, 68 | ] 69 | ) 70 | 71 | r = requests.post(file_server.url + "/files", multipart=multipart) 72 | data = r.json() 73 | assert len(data["files"]) == 2 74 | assert data["files"][0]["filename"] == "scrapfly.png" 75 | assert data["files"][0]["content_type"] == "image/jpg" 76 | assert data["files"][0]["size"] == os.path.getsize(ASSET_FOLDER / "scrapfly.png") 77 | multipart.close() 78 | 79 | 80 | def test_upload_multiple_files_different_name(file_server): 81 | multipart = CurlMime.from_list( 82 | [ 83 | { 84 | "name": "image1", 85 | "content_type": "image/jpg", 86 | "filename": "scrapfly.png", 87 | "local_path": str(ASSET_FOLDER / "scrapfly.png"), 88 | }, 89 | { 90 | "name": "image2", 91 | "content_type": "image/jpg", 92 | "filename": "scrapfly.png", 93 | "local_path": str(ASSET_FOLDER / "yescaptcha.png"), 94 | }, 95 | ] 96 | ) 97 | 98 | r = requests.post(file_server.url + "/two-files", multipart=multipart) 99 | data = r.json() 100 | assert data["size1"] == os.path.getsize(ASSET_FOLDER / "scrapfly.png") 101 | assert data["size2"] == os.path.getsize(ASSET_FOLDER / "yescaptcha.png") 102 | multipart.close() 103 | -------------------------------------------------------------------------------- /docs/impersonate/faq.rst: -------------------------------------------------------------------------------- 1 | Impersonation FAQ 2 | ================= 3 | 4 | 5 | How to check if my impersonation is working? 6 | -------------------------------------------- 7 | 8 | The most reliable way is to use WireShark, and compare packets from ``curl_cffi`` and your 9 | targets. 10 | 11 | If it's challenging for you to use WireShark, you can use the following sites for JA3 and Akamai fingerprints: 12 | 13 | 1. https://tls.browserleaks.com/json 14 | 2. https://tls.peet.ws/api/all 15 | 3. https://scrapfly.io/web-scraping-tools/browser-fingerprint 16 | 17 | For http/3 fingerprints, use our service: 18 | 19 | 1. https://fp.impersonate.pro/api/http3 20 | 21 | 22 | I'm still being detected even if I impersonated correctly 23 | --------------------------------------------------------- 24 | 25 | First, JA3 and akamai fingerprints are not comprehensive, there are other fields that can 26 | be detected, we have a few more options listed in ``extra_fp``. Be sure to also check them. 27 | 28 | .. note:: 29 | 30 | Since ``curl-impersonate`` was posted on `Hacker News `_, 31 | some features and behaviors of ``curl_cffi`` is being detected by professional players. 32 | If we continue to fix these niche behavior in public, it would soon be noticed by those providers. 33 | 34 | In short, if you are using curl_cffi in production and you are sure about being blocked by TLS or http 35 | detection, try the `curl_cffi pro version `_. 36 | 37 | 38 | Should I randomize my fingerprints for each request? 39 | ---------------------------------------------------- 40 | 41 | You can choose a random version from the list above, like: 42 | 43 | .. code-block:: python 44 | 45 | random.choice(["chrome119", "chrome120", ...]) 46 | 47 | However, be aware of the browser market share, very old versions are not good choices. 48 | 49 | Generally, you should not try to generate a customized random fingerprints. The reason 50 | is that, for a given browser version, the fingerprints are fixed. If you create a new 51 | random fingerprints, the server is easy to know that you are not using a typical browser. 52 | 53 | If you were thinking about ``ja3``, and not ``ja3n``, then the fingerprints is already 54 | randomized, due to the ``extension permutation`` feature introduced in Chrome 110. 55 | 56 | As far as we know, most websites use an allowlist, not a blocklist to filter out bot 57 | traffic. So do not expect random ja3 fingerprints would work in the wild. 58 | 59 | Moreover, do not generate random ja3 strings. There are certain limits for a valid ja3 string. 60 | For example: 61 | 62 | * TLS 1.3 ciphers must be at the front. 63 | * GREASE extension must be the first. 64 | * etc. 65 | 66 | You should copy ja3 strings from sniffing tools, not generate them, unless you can make 67 | sure all the requirements are met. 68 | 69 | Can I change JavaScript fingerprints with this library? 70 | ------------------------------------------------------- 71 | 72 | No, you can not. As the name suggests, JavaScript fingerprints are generated using JavaScript 73 | APIs provided by real browsers. ``curl_cffi`` is a python binding to a C library, with no 74 | browser or JavaScript runtime under the hood. 75 | 76 | If you need to impersonate browsers on the JavaScript perspective, you can search for 77 | "Anti-detect Browser", "Playwright stealth" and similar keywords. Or simply use a 78 | commercial plan from our sponsors. 79 | 80 | 81 | Why are all the User-Agents macOS? 82 | ---------------------------------- 83 | 84 | Simple, because I primarily use macOS and I copied the headers from my own browser. Fingerprints 85 | are generally the same across desktop OSes, if you want it to look like Windows, just update the 86 | user-agent and other related headers to Windows. 87 | 88 | -------------------------------------------------------------------------------- /docs/advanced.rst: -------------------------------------------------------------------------------- 1 | Advanced Topics 2 | ************** 3 | 4 | Proxies 5 | ======= 6 | 7 | You can use the ``proxy`` parameter: 8 | 9 | .. code-block:: python 10 | 11 | import curl_cffi 12 | 13 | curl_cffi.get(url, proxy="http://user:pass@example.com:3128") 14 | 15 | You can also use the ``http_proxy``, ``https_proxy``, and ``ws_proxy``, ``wss_proxy`` 16 | environment variables, respectively. 17 | 18 | .. warning:: 19 | 20 | For beginners, a very common mistake is to add ``https://`` prefix to the ``https`` proxy. 21 | 22 | For explanation of differences between ``http_proxy`` and ``https_proxy``, please see 23 | `#6 `_. 24 | 25 | For compatibility with ``requests``, we also support using dicts. 26 | 27 | .. code-block:: python 28 | 29 | import curl_cffi 30 | 31 | proxies = { 32 | "http": "http://localhost:3128", 33 | "https": "http://localhost:3128" 34 | } 35 | curl_cffi.get(url, proxies=proxies) 36 | 37 | 38 | .. note:: 39 | 40 | Prefer the single `proxy` parameter, unless you do have different proxies for http and https 41 | 42 | 43 | Low-level curl API 44 | ========= 45 | 46 | Although we provide an easy to use ``requests``-like API, sometimes, you may prefer to use the ``curl``-like API. 47 | 48 | The curl API is very much like what you may have used -- ``pycurl``, with extra impersonation support. 49 | 50 | 51 | .. code-block:: python 52 | 53 | from curl_cffi import Curl, CurlOpt 54 | from io import BytesIO 55 | 56 | buffer = BytesIO() 57 | c = Curl() 58 | c.setopt(CurlOpt.URL, b'https://tls.browserleaks.com/json') 59 | c.setopt(CurlOpt.WRITEDATA, buffer) 60 | 61 | c.impersonate("chrome124") 62 | 63 | c.perform() 64 | c.close() 65 | body = buffer.getvalue() 66 | print(body.decode()) 67 | 68 | For a complete list of options, see :doc:`api` 69 | 70 | 71 | Using ``CURLOPT_*`` in requests API 72 | =================================== 73 | 74 | Sometimes, you know an option from libcurl, but we haven't exposed it in the requests API. 75 | You can simply add the ``curl_options`` dict to apply the option. 76 | 77 | .. code-block:: python 78 | 79 | 80 | .. note:: 81 | 82 | Using curl_options is preferred over using ``session.curl.setopt``, the latter may get 83 | overriden internally, while the former is executed after all options have been set. 84 | 85 | 86 | Selecting http version 87 | ====================== 88 | 89 | The recommended and default http version is http/2, the present and most widely used http version 90 | as of 2025. 91 | 92 | According to `Wikipedia `_, the marketshare is: 93 | 94 | - HTTP/1.1, 33.8% 95 | - HTTP/2, 35.3% 96 | - HTTP/3, 30.9% 97 | 98 | To change http versions, use the ``http_version`` parameter. 99 | 100 | .. code-block:: python 101 | 102 | import curl_cffi 103 | curl_cffi.get("https://cloudflare-quic.com", http_version="v3") 104 | 105 | Common values are: ``v1``, ``v2``, ``v3`` and ``v3only``. 106 | 107 | To get the actual used http version, you need to compare the response field with const from libcurl: 108 | 109 | .. code-block:: python 110 | 111 | >>> from curl_cffi import CurlHttpVersion 112 | >>> r = curl_cffi.get("https://example.com", http_version="v2") 113 | >>> r.http_version == CurlHttpVersion.V2_0 114 | True 115 | 116 | 117 | Keeping session alive in http/2 118 | ====== 119 | 120 | With http/2, you can optionally send a ping frame to keep the connection alive when not actively using it. 121 | 122 | 123 | .. code-block:: python 124 | 125 | import curl_cffi 126 | 127 | s = Session() 128 | s.get("https://example.com") 129 | s.upkeep() 130 | 131 | -------------------------------------------------------------------------------- /libs.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "system": "Windows", 4 | "machine": "AMD64", 5 | "pointer_size": 64, 6 | "libdir": "./lib64", 7 | "sysname": "win32", 8 | "so_name": "libcurl.dll", 9 | "so_arch": "x86_64" 10 | }, 11 | { 12 | "system": "Windows", 13 | "machine": "AMD64", 14 | "pointer_size": 32, 15 | "libdir": "./lib32", 16 | "sysname": "win32", 17 | "so_name": "libcurl.dll", 18 | "so_arch": "i686" 19 | }, 20 | { 21 | "system": "Windows", 22 | "machine": "ARM64", 23 | "pointer_size": 64, 24 | "libdir": "./libarm64", 25 | "sysname": "win32", 26 | "so_name": "libcurl.dll", 27 | "so_arch": "arm64" 28 | }, 29 | { 30 | "system": "Darwin", 31 | "machine": "x86_64", 32 | "pointer_size": 64, 33 | "libdir": "/Users/runner/work/_temp/install/lib", 34 | "sysname": "macos", 35 | "so_name": "libcurl-impersonate.4.dylib", 36 | "so_arch": "x86_64" 37 | }, 38 | { 39 | "system": "Darwin", 40 | "machine": "arm64", 41 | "pointer_size": 64, 42 | "libdir": "/Users/runner/work/_temp/install/lib", 43 | "sysname": "macos", 44 | "so_name": "libcurl-impersonate.4.dylib", 45 | "so_arch": "arm64" 46 | }, 47 | { 48 | "system": "Linux", 49 | "machine": "x86_64", 50 | "pointer_size": 64, 51 | "libdir": "", 52 | "sysname": "linux", 53 | "link_type": "static", 54 | "libc": "gnu", 55 | "so_name": "libcurl-impersonate.so", 56 | "so_arch": "x86_64" 57 | }, 58 | { 59 | "system": "Linux", 60 | "machine": "x86_64", 61 | "pointer_size": 64, 62 | "libdir": "", 63 | "sysname": "linux", 64 | "link_type": "static", 65 | "libc": "musl", 66 | "so_name": "libcurl-impersonate.so", 67 | "so_arch": "x86_64" 68 | }, 69 | { 70 | "system": "Linux", 71 | "machine": "i686", 72 | "pointer_size": 32, 73 | "libdir": "", 74 | "sysname": "linux", 75 | "link_type": "static", 76 | "libc": "gnu", 77 | "so_name": "libcurl-impersonate.so", 78 | "so_arch": "i386" 79 | }, 80 | { 81 | "system": "Linux", 82 | "machine": "aarch64", 83 | "pointer_size": 64, 84 | "libdir": "", 85 | "sysname": "linux", 86 | "link_type": "static", 87 | "libc": "gnu", 88 | "so_name": "libcurl-impersonate.so", 89 | "so_arch": "aarch64" 90 | }, 91 | { 92 | "system": "Linux", 93 | "machine": "riscv64", 94 | "pointer_size": 64, 95 | "libdir": "", 96 | "sysname": "linux", 97 | "link_type": "static", 98 | "libc": "gnu", 99 | "so_name": "libcurl-impersonate.so", 100 | "so_arch": "riscv64" 101 | }, 102 | { 103 | "system": "Linux", 104 | "machine": "aarch64", 105 | "pointer_size": 64, 106 | "libdir": "~/.local/lib", 107 | "sysname": "linux", 108 | "link_type": "dynamic", 109 | "libc": "musl", 110 | "so_name": "libcurl-impersonate.so", 111 | "so_arch": "aarch64" 112 | }, 113 | { 114 | "system": "Linux", 115 | "machine": "armv6l", 116 | "pointer_size": 32, 117 | "libdir": "", 118 | "sysname": "linux", 119 | "link_type": "static", 120 | "libc": "gnueabihf", 121 | "so_name": "libcurl-impersonate.so", 122 | "so_arch": "arm" 123 | }, 124 | { 125 | "system": "Linux", 126 | "machine": "armv7l", 127 | "pointer_size": 32, 128 | "libdir": "", 129 | "sysname": "linux", 130 | "link_type": "static", 131 | "libc": "gnueabihf", 132 | "so_name": "libcurl-impersonate.so", 133 | "so_arch": "arm" 134 | } 135 | ] 136 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. curl_cffi documentation master file, created by 2 | sphinx-quickstart on Sat Feb 17 22:22:59 2024. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | curl_cffi's documentation 7 | ========================= 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | :glob: 13 | 14 | quick_start 15 | impersonate/_index 16 | advanced 17 | vs-requests 18 | cookies 19 | community 20 | api 21 | faq 22 | changelog 23 | dev 24 | 25 | `Discuss on Telegram`_ 26 | 27 | .. _Discuss on Telegram: https://t.me/+lL9n33eZp480MGM1 28 | 29 | curl_cffi is a Python binding for `curl-impersonate fork`_ via `cffi`_. For commercial 30 | support, visit `impersonate.pro `_. 31 | 32 | .. _curl-impersonate fork: https://github.com/lexiforest/curl-impersonate 33 | .. _cffi: https://cffi.readthedocs.io/en/latest/ 34 | 35 | Unlike other pure Python http clients like ``httpx`` or ``requests``, ``curl_cffi`` can 36 | impersonate browsers' TLS signatures or JA3 fingerprints. If you are blocked by some 37 | website for no obvious reason, you can give this package a try. 38 | 39 | If you are looking for Python http3 clients, curl_cffi added http3 support since ``v0.11``. 40 | 41 | Sponsors 42 | -------- 43 | 44 | 45 | Bypass Cloudflare with API 46 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 47 | 48 | .. image:: https://raw.githubusercontent.com/lexiforest/curl_cffi/main/assets/yescaptcha.png 49 | :width: 149 50 | :alt: YesCaptcha 51 | :target: https://yescaptcha.com/i/stfnIO 52 | 53 | `Yescaptcha `_ is a proxy service that bypasses Cloudflare and uses the API interface to 54 | obtain verified cookies (e.g. ``cf_clearance``). Click `here `_ 55 | to register. 56 | 57 | 58 | You can also click `here `_ to buy me a coffee. 59 | 60 | 61 | Features 62 | -------- 63 | 64 | - Supports JA3/TLS and http2 fingerprints impersonation, including recent browsers and custom fingerprints. 65 | - Much faster than requests/httpx, on par with aiohttp/pycurl, see `benchmarks `_. 66 | - Mimics requests API, no need to learn another one. 67 | - Pre-compiled, so you don't have to compile on your machine. 68 | - Supports ``asyncio`` with proxy rotation on each request. 69 | - Supports http 2.0 & 3.0, which requests does not. 70 | - Supports websocket. 71 | 72 | .. list-table:: Feature matrix 73 | :widths: 20 16 16 16 16 16 74 | :header-rows: 1 75 | 76 | * - 77 | - requests 78 | - aiohttp 79 | - httpx 80 | - pycurl 81 | - curl_cffi 82 | * - http2 83 | - ❌ 84 | - ❌ 85 | - ✅ 86 | - ✅ 87 | - ✅ 88 | * - http3 89 | - ❌ 90 | - ❌ 91 | - ❌ 92 | - ✅ 93 | - ✅ 94 | * - sync 95 | - ✅ 96 | - ❌ 97 | - ✅ 98 | - ✅ 99 | - ✅ 100 | * - async 101 | - ❌ 102 | - ✅ 103 | - ✅ 104 | - ❌ 105 | - ✅ 106 | * - websocket 107 | - ❌ 108 | - ✅ 109 | - ❌ 110 | - ❌ 111 | - ✅ 112 | * - fingerprints 113 | - ❌ 114 | - ❌ 115 | - ❌ 116 | - ❌ 117 | - ✅ 118 | * - speed 119 | - 🐇 120 | - 🐇🐇 121 | - 🐇 122 | - 🐇🐇 123 | - 🐇🐇 124 | 125 | Notes: 126 | 127 | 1. For pycurl, you need a http/3 enabled libcurl, while curl_cffi packages libcurl-impersonate inside Python wheels. 128 | 2. Full http/3 supported was added in v0.12.0. 129 | 130 | Install 131 | ------- 132 | 133 | .. code-block:: sh 134 | 135 | pip install curl_cffi --upgrade 136 | 137 | For more details, see :doc:`quick_start`. 138 | 139 | Documentation 140 | ------------- 141 | 142 | You can first check out :doc:`quick_start`. Then the :doc:`impersonate`. 143 | 144 | For advanced topics, checkout :doc:`cookies`, :doc:`asyncio` and :doc:`websockets`. 145 | 146 | You can also find common use cases in the `examples `_ directory. 147 | 148 | Finally, if something is missing from the tutorial, you can always find them in the :doc:`api`. 149 | 150 | If you have any questions, be sure to check out the :doc:`faq` section before opening an issue. 151 | 152 | 153 | Indices and tables 154 | ================== 155 | 156 | * :ref:`genindex` 157 | * :ref:`modindex` 158 | * :ref:`search` 159 | -------------------------------------------------------------------------------- /docs/impersonate/psk.rst: -------------------------------------------------------------------------------- 1 | TLS PSK(41) Extension 2 | ===================== 3 | 4 | 5 | What is the TLS PSK(41) extension, how to deal with it? 6 | 7 | PSK is short for ``Pre-Shared Key``, as defined in `RFC 8446 `_, 8 | 9 | Once a handshake has completed, the server can send the client a PSK 10 | identity that corresponds to a unique key derived from the initial 11 | handshake (see Section 4.6.1). The client can then use that PSK 12 | identity in future handshakes to negotiate the use of the associated 13 | PSK. 14 | 15 | Usually, when you first visit a website, the PSK extension is not present in the extension 16 | list. But when you visit the same website for the second time, in a relatively short time, 17 | the client may offer a PSK extension with the key from the server. 18 | 19 | For example, you can visit ``https://tls.peet.ws/api/all``, and then refresh the page, 20 | the PSK extension will be there. 21 | 22 | To correctly implement the PSK extension, the client must have some kind of a session 23 | cache held in memory or persisted on disk. All the major browsers have this feature for 24 | a very long time. ``curl_cffi`` added this feature in version ``0.11.0``, with libcurl 25 | ``8.13.0``. 26 | 27 | The mechanism and behavior of a PSK looks like an http session cookie, where the server sent 28 | a cryptographic value as a key to resume a previous disconnected session. When the server generates 29 | a PSK, it is possible that the server keeps the mapping between the incoming IP and key. 30 | Thus, it can be problematic if you reuse a TLS session with rotating proxies. 31 | 32 | .. code-block:: 33 | 34 | ┌───────────┐ ┌───────────┐ 35 | │ │ │ │ 36 | │ │ IP: 10.0.0.1 │ │ 37 | │ ┼─────────────TLS─Hello──────────────► │ 38 | │ │ │ │ 39 | │ ◄─────────────PSK:─xxx───────────────┼ │ 40 | │ │ │ │ 41 | │ │ │ │ 42 | │ │ │ │ 43 | │ │ │ Server │ 44 | │ Client │ IP: 10.0.0.2 │ │ 45 | │ ┼─────────────TLS─with─PSK───────────► │ 46 | │ │ │ │ 47 | │ ◄─────────────Blocked────────────────┼ │ 48 | │ │ │ │ 49 | │ │ PSK: xxx was │ │ 50 | │ │ associated with │ │ 51 | │ │ 10.0.0.1, not │ │ 52 | └───────────┘ 10.0.0.2 └───────────┘ 53 | 54 | 55 | Luckily, since curl_cffi ``0.12.0``, we added a new option called ``proxy_credential_no_reuse``, 56 | when enabled, the TLS session cache will be bound based on the proxy username and IP, 57 | such that the session can only be reused when the proxy username and IP matches. From the 58 | server's viewpoint, the ``Pre-Shared Key`` will be locked to the same source IP, not 59 | bouncing around among different exit nodes. 60 | 61 | 62 | .. code-block:: python 63 | 64 | # Python example to be added. 65 | # We might enable this by default when proxies are used. 66 | 67 | 68 | How do I enable PSK extension anyway? 69 | ------------------------------------- 70 | 71 | You don't. If you haven't, please read the explanation above first. Generally speaking, 72 | the client should manage this extension, and it should automatically offer this extension 73 | on the second request. 74 | 75 | From the server's perspective, if you forcefully add a PSK extension with random value, 76 | it's an obvious sign that you are not a valid visitor, just like you providing an invalid cookie 77 | value. 78 | 79 | However, it's reasonable that you don't want the PSK extension to be sent, i.e. pretending 80 | to be a first time visitor. We don't support this for now, your option is to use an older 81 | version of curl_cffi or create a new session on each request. 82 | 83 | Note, some other impersonation-oriented http clients give you the control over adding the 84 | PSK or not, but you should let the client decide, if you are trying to impersonating browsers. 85 | -------------------------------------------------------------------------------- /.github/workflows/build-and-test.yaml: -------------------------------------------------------------------------------- 1 | name: Build, test and release 2 | on: 3 | pull_request: 4 | branches: 5 | - main 6 | push: 7 | branches: 8 | - main 9 | - bugfix/* 10 | - feature/* 11 | - release/* 12 | - chore/* 13 | tags: 14 | - v* 15 | 16 | permissions: 17 | contents: write 18 | 19 | jobs: 20 | lint: 21 | name: Lint 22 | runs-on: ubuntu-latest 23 | steps: 24 | - uses: actions/checkout@v4 25 | - uses: actions/setup-python@v5 26 | with: 27 | python-version: '3.9' 28 | - name: Lint 29 | run: | 30 | pip install mypy ruff 31 | make lint 32 | 33 | sdist: 34 | name: Build sdist wheel 35 | runs-on: ubuntu-latest 36 | steps: 37 | - uses: actions/checkout@v4 38 | 39 | - uses: actions/setup-python@v5 40 | with: 41 | python-version: '3.10' 42 | 43 | - name: build sdist 44 | run: | 45 | make preprocess 46 | pip install build 47 | python -m build --sdist 48 | pip install ./dist/*.tar.gz 49 | 50 | - name: upload artifacts 51 | uses: actions/upload-artifact@v4 # https://github.com/actions/upload-artifact/issues/478 52 | with: 53 | name: curl_cffi-${{ github.sha }}-sdist.zip 54 | path: ./dist/*.tar.gz 55 | 56 | bdist: 57 | name: Build bdist wheels and test 58 | runs-on: ${{ matrix.os }} 59 | strategy: 60 | matrix: 61 | os: [ubuntu-24.04, macos-15-intel, macos-14, windows-2022, windows-11-arm] 62 | steps: 63 | - uses: actions/checkout@v4 64 | 65 | - uses: actions/setup-python@v5 66 | with: 67 | python-version: '3.11' 68 | 69 | - if: runner.os == 'Linux' 70 | uses: docker/setup-qemu-action@v3 71 | with: 72 | platforms: all 73 | 74 | # macOS make is too old 75 | - if: runner.os == 'macOS' 76 | run: | 77 | brew install make automake libtool 78 | 79 | # When we build all dependencies with the 11.0 target, we can downgrade this back. 80 | - if: ${{ matrix.os == 'macos-14' }} 81 | run: echo "MACOSX_DEPLOYMENT_TARGET=14.0" >> "$GITHUB_ENV" 82 | 83 | # TODO: fix the target 84 | - if: ${{ matrix.os == 'macos-15-intel' }} 85 | run: echo "MACOSX_DEPLOYMENT_TARGET=15.0" >> "$GITHUB_ENV" 86 | 87 | - name: Build and test wheels 88 | uses: pypa/cibuildwheel@v3.1.3 89 | 90 | # - name: Setup tmate session 91 | # uses: mxschmitt/action-tmate@v3 92 | 93 | - uses: actions/upload-artifact@v4 # https://github.com/actions/upload-artifact/issues/478 94 | with: 95 | name: curl_cffi-${{ github.sha }}-${{ matrix.os }}.zip 96 | path: ./wheelhouse/*.whl 97 | 98 | build_latest: 99 | name: Build bdist on latest OSes 100 | runs-on: ${{ matrix.os }} 101 | strategy: 102 | matrix: 103 | # For linux, it's built inside a container, no need to test for latest versions. 104 | os: [macos-latest, windows-latest, windows-11-arm] 105 | steps: 106 | - uses: actions/checkout@v4 107 | 108 | - uses: actions/setup-python@v5 109 | with: 110 | python-version: '3.11' 111 | 112 | - if: runner.os == 'Linux' 113 | uses: docker/setup-qemu-action@v3 114 | with: 115 | platforms: all 116 | 117 | # macOS make is too old 118 | - if: runner.os == 'macOS' 119 | run: | 120 | brew install make automake libtool 121 | 122 | - if: ${{ matrix.os == 'macos-latest' }} 123 | run: echo "MACOSX_DEPLOYMENT_TARGET=15.0" >> "$GITHUB_ENV" 124 | 125 | - name: Build and test wheels 126 | uses: pypa/cibuildwheel@v3.1.3 127 | 128 | 129 | upload_all: 130 | needs: [bdist, sdist] 131 | runs-on: ubuntu-latest 132 | steps: 133 | - uses: actions/download-artifact@v4.1.7 # https://github.com/actions/upload-artifact/issues/478 134 | if: startsWith(github.ref, 'refs/tags/') 135 | with: 136 | pattern: curl_cffi-* 137 | merge-multiple: true 138 | path: dist 139 | 140 | - uses: pypa/gh-action-pypi-publish@v1.12.4 141 | if: startsWith(github.ref, 'refs/tags/') 142 | with: 143 | password: ${{ secrets.PYPI_TOKEN }} 144 | packages-dir: dist/ 145 | 146 | - name: Upload release files 147 | if: startsWith(github.ref, 'refs/tags/') 148 | uses: softprops/action-gh-release@v2 149 | with: 150 | files: | 151 | ./dist/*.whl 152 | ./dist/*.tar.gz 153 | -------------------------------------------------------------------------------- /scripts/generate_consts.py: -------------------------------------------------------------------------------- 1 | import platform 2 | import re 3 | import subprocess 4 | import sys 5 | 6 | CONST_FILE = "curl_cffi/const.py" 7 | CURL_VERSION = sys.argv[1] 8 | 9 | uname = platform.uname() 10 | 11 | 12 | print("extract consts from curl.h") 13 | with open(CONST_FILE, "w") as f: 14 | f.write("# This file is automatically generated, do not modify it directly.\n\n") 15 | f.write("from enum import IntEnum\n\n\n") 16 | f.write("class CurlOpt(IntEnum):\n") 17 | f.write(' """``CULROPT_`` constancs extracted from libcurl,\n') 18 | f.write(' see: https://curl.se/libcurl/c/curl_easy_setopt.html"""\n\n') 19 | cmd = rf""" 20 | echo '#include "{CURL_VERSION}/include/curl/curl.h"' | gcc -E - | grep -i "CURLOPT_.\+ =" | sed "s/ CURLOPT_/ /g" | sed "s/,//g" 21 | """ # noqa E501 22 | output = subprocess.check_output(cmd, shell=True) 23 | clean_output = re.sub( 24 | r"__attribute__\(.*\) ", "", output.decode(), flags=re.MULTILINE 25 | ) 26 | f.write(clean_output) 27 | f.write( 28 | """ 29 | if locals().get("WRITEDATA"): 30 | FILE = locals().get("WRITEDATA") 31 | if locals().get("READDATA"): 32 | INFILE = locals().get("READDATA") 33 | if locals().get("HEADERDATA"): 34 | WRITEHEADER = locals().get("HEADERDATA")\n\n 35 | """ 36 | ) 37 | 38 | f.write("class CurlInfo(IntEnum):\n") 39 | f.write(' """``CURLINFO_`` constancs extracted from libcurl,\n') 40 | f.write(' see: https://curl.se/libcurl/c/curl_easy_getinfo.html"""\n\n') 41 | cmd = rf""" 42 | echo '#include "{CURL_VERSION}/include/curl/curl.h"' | gcc -E - | grep -i "CURLINFO_.\+ =" | sed "s/ CURLINFO_/ /g" | sed "s/,//g" 43 | """ # noqa E501 44 | output = subprocess.check_output(cmd, shell=True) 45 | f.write(output.decode()) 46 | f.write( 47 | """ 48 | if locals().get("RESPONSE_CODE"): 49 | HTTP_CODE = locals().get("RESPONSE_CODE")\n\n 50 | """ 51 | ) 52 | 53 | f.write("class CurlMOpt(IntEnum):\n") 54 | f.write(' """``CURLMOPT_`` constancs extracted from libcurl,\n') 55 | f.write(' see: https://curl.se/libcurl/c/curl_multi_setopt.html"""\n\n') 56 | cmd = rf""" 57 | echo '#include "{CURL_VERSION}/include/curl/curl.h"' | gcc -E - | grep -i "CURLMOPT_.\+ =" | sed "s/ CURLMOPT_/ /g" | sed "s/,//g" 58 | """ # noqa E501 59 | output = subprocess.check_output(cmd, shell=True) 60 | f.write(output.decode()) 61 | f.write("\n\n") 62 | 63 | f.write("class CurlECode(IntEnum):\n") 64 | f.write(' """``CURLECODE_`` constancs extracted from libcurl,\n') 65 | f.write(' see: https://curl.se/libcurl/c/libcurl-errors.html"""\n\n') 66 | cmd = rf""" 67 | echo '#include "{CURL_VERSION}/include/curl/curl.h"' | gcc -E - | grep -i CURLE_ | sed "s/[, ][=0]*//g" | sed "s/CURLE_/ /g" | awk '{{print $0 " = " NR-1}}' 68 | """ # noqa E501 69 | output = subprocess.check_output(cmd, shell=True) 70 | f.write(output.decode()) 71 | f.write("\n") 72 | 73 | # These lines are not easy to be extracted automatically 74 | f.write( 75 | ''' 76 | class CurlHttpVersion(IntEnum): 77 | """``CURL_HTTP_VERSION`` constants from libcurl, see comments for details.""" 78 | 79 | NONE = 0 80 | V1_0 = 1 # please use HTTP 1.0 in the request */ 81 | V1_1 = 2 # please use HTTP 1.1 in the request */ 82 | V2_0 = 3 # please use HTTP 2 in the request */ 83 | V2TLS = 4 # use version 2 for HTTPS, version 1.1 for HTTP */ 84 | V2_PRIOR_KNOWLEDGE = 5 # please use HTTP 2 without HTTP/1.1 Upgrade */ 85 | V3 = 30 # Makes use of explicit HTTP/3 with fallback. 86 | V3ONLY = 31 # No fallback 87 | 88 | 89 | class CurlWsFlag(IntEnum): 90 | """``CURL_WS_FLAG`` constants from libcurl, see comments for details.""" 91 | 92 | TEXT = 1 << 0 93 | BINARY = 1 << 1 94 | CONT = 1 << 2 95 | CLOSE = 1 << 3 96 | PING = 1 << 4 97 | OFFSET = 1 << 5 98 | 99 | 100 | class CurlSslVersion(IntEnum): 101 | """``CURL_SSLVERSION`` constants from libcurl, see comments for details.""" 102 | 103 | DEFAULT = 0 104 | TLSv1 = 1 105 | SSLv2 = 2 106 | SSLv3 = 3 107 | TLSv1_0 = 4 108 | TLSv1_1 = 5 109 | TLSv1_2 = 6 110 | TLSv1_3 = 7 111 | MAX_DEFAULT = 1 << 16 112 | 113 | 114 | class CurlIpResolve(IntEnum): 115 | """``CURL_IPRESOLVE`` constants from libcurl, see comments for details.""" 116 | 117 | WHATEVER = 0 # default, uses addresses to all IP versions that your system allows 118 | V4 = 1 # uses only IPv4 addresses/connections 119 | V6 = 2 # uses only IPv6 addresses/connections 120 | 121 | ''' 122 | ) 123 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "curl_cffi" 3 | version = "0.14.0" 4 | authors = [{ name = "lexiforest", email = "infinitesheldon@gmail.com" }] 5 | description = "libcurl ffi bindings for Python, with impersonation support." 6 | license = { text = "MIT License" } 7 | dependencies = [ 8 | "cffi>=1.12.0", 9 | "certifi>=2024.2.2", 10 | ] 11 | readme = "README.md" 12 | requires-python = ">=3.10" 13 | urls = { "repository" = "https://github.com/lexiforest/curl_cffi" } 14 | classifiers = [ 15 | "Development Status :: 4 - Beta", 16 | "Intended Audience :: Developers", 17 | "Programming Language :: Python :: 3", 18 | "Programming Language :: Python :: 3.10", 19 | "Programming Language :: Python :: 3.11", 20 | "Programming Language :: Python :: 3.12", 21 | "Programming Language :: Python :: 3.13", 22 | "Programming Language :: Python :: 3.14", 23 | ] 24 | 25 | 26 | [project.optional-dependencies] 27 | extra = [ 28 | "readability-lxml>=0.8.1", 29 | "markdownify>=1.1.0", 30 | "lxml_html_clean", 31 | ] 32 | dev = [ 33 | "charset_normalizer>=3.3.2,<4.0", 34 | "coverage>=6.4.1,<7.0", 35 | "cryptography>=42.0.5,<43.0", 36 | "httpx==0.23.1", # don't change, tests will raise "httpx.InvalidURL: Invalid URL component 'path'" 37 | "mypy>=1.9.0,<2.0", 38 | "pytest>=8.1.1,<9.0", 39 | "pytest-asyncio>=0.23.6,<1.0", 40 | "pytest-trio>=0.8.0,<1.0", 41 | "ruff>=0.3.5,<1.0", 42 | "trio>=0.25.0,<1.0", 43 | "trustme>=1.1.0,<2.0", 44 | "uvicorn>=0.29.0,<1.0", 45 | "websockets>=14.0", 46 | "typing_extensions", 47 | ] 48 | build = [ 49 | "cibuildwheel", 50 | "wheel", 51 | ] 52 | test = [ 53 | "charset_normalizer>=3.3.2,<4.0", 54 | "cryptography>=42.0.5,<43.0", 55 | "fastapi>=0.110.0,<1.0", 56 | "httpx==0.23.1", # don't change, tests will raise "httpx.InvalidURL: Invalid URL component 'path'" 57 | "proxy.py>=2.4.3,<3.0", 58 | "pytest>=8.1.1,<9.0", 59 | "pytest-asyncio>=0.23.6,<1.0", 60 | "pytest-trio>=0.8.0,<1.0", 61 | "python-multipart>=0.0.9,<1.0", 62 | "trio>=0.25.0,<1.0", 63 | "trustme>=1.1.0,<2.0", 64 | "uvicorn>=0.29.0,<1.0", 65 | "websockets>=14.0", 66 | "typing_extensions", 67 | ] 68 | 69 | [project.scripts] 70 | curl-cffi = "curl_cffi.cli:main" 71 | 72 | [build-system] 73 | requires = ["wheel", "setuptools", "cffi>=1.12.0"] 74 | build-backend = "setuptools.build_meta" 75 | 76 | 77 | [tool.setuptools] 78 | packages = ["curl_cffi", "curl_cffi.requests"] 79 | package-data = { curl_cffi = ["libcurl.dll"] } 80 | 81 | 82 | [tool.cibuildwheel] 83 | # Building for these platforms is enough since we are using abi3 packages 84 | build = [ 85 | "cp310-macosx_x86_64", 86 | "cp310-macosx_arm64", 87 | "cp310-win_amd64", 88 | "cp310-win_arm64", 89 | # "cp310-win32", 90 | "cp310-manylinux_x86_64", 91 | "cp310-manylinux_aarch64", 92 | "cp310-manylinux_riscv64", 93 | "cp310-manylinux_i686", 94 | "cp310-manylinux_armv7l", 95 | "cp310-musllinux_x86_64", 96 | "cp310-musllinux_aarch64", 97 | ] 98 | before-all = "make preprocess" 99 | test-requires = "pytest" 100 | test-command = "python -bb -m pytest {project}/tests/unittest" 101 | test-extras = ["test"] 102 | # trustme not available for these images 103 | test-skip = [ 104 | "cp310-manylinux_i686", 105 | "cp310-win_arm64", 106 | "cp310-manylinux_armv7l", 107 | "cp310-manylinux_riscv64", 108 | ] 109 | build-verbosity = 1 110 | 111 | 112 | # configure cibuildwheel to build native archs ('auto'), and some emulated ones 113 | [tool.cibuildwheel.linux] 114 | archs = ["auto", "aarch64", "riscv64", "i686", "armv7l"] 115 | environment = { LD_LIBRARY_PATH="$HOME/.local/lib" } 116 | environment-pass = ["LD_LIBRARY_PATH"] 117 | 118 | 119 | [tool.cibuildwheel.macos] 120 | before-all = "gmake preprocess" 121 | 122 | [tool.cibuildwheel.windows] 123 | before-build = "pip install delvewheel" 124 | repair-wheel-command = "delvewheel repair --add-path ./lib64;./lib32 -w {dest_dir} {wheel}" 125 | 126 | 127 | [tool.pytest.ini_options] 128 | # pythonpath = [ "." ] 129 | asyncio_mode = "auto" 130 | 131 | 132 | [tool.ruff] 133 | line-length = 88 134 | 135 | [tool.ruff.lint] 136 | select = [ 137 | "E", # pycodestyle 138 | "F", # Pyflakes 139 | "UP", # pyupgrade 140 | "B", # flake8-bugbear 141 | "SIM", # flake8-simplify 142 | ] 143 | ignore = [ 144 | "UP007", 145 | "UP045", # X | None 146 | ] 147 | 148 | [tool.isort] 149 | profile = "black" 150 | line_length = 88 151 | 152 | [tool.mypy] 153 | python_version = "3.9" 154 | ignore_missing_imports = true 155 | #warn_unused_ignores = true 156 | #strict = true 157 | exclude = ["benchmark/", "docs/", "examples/", "scripts/", "tests/", "build/"] 158 | -------------------------------------------------------------------------------- /docs/changelog.rst: -------------------------------------------------------------------------------- 1 | Change Log 2 | ========== 3 | 4 | Please see the `GitHub Releases `_ page for details. 5 | 6 | - v0.13 7 | - Added support for Windows on Arm 8 | - Improved support for websockets 9 | 10 | - v0.12 11 | - Added support for safari 26 12 | - Improved support for websockets 13 | 14 | - v0.11 15 | - Added support for http3 16 | - Added tor145, new safari and chrome targets 17 | 18 | 19 | - v0.10.0 20 | - Added support for using curl_cffi directly 21 | 22 | 23 | - v0.9.0 24 | - Brought back Windows support 25 | - Added support for Firefox 26 | - Added support for Chrome 133a 27 | 28 | 29 | - v0.8.0 30 | - Added more recent impersonate versions, Safari 18.0 for iOS and macOS, Chrome 131. 31 | - Added ``quote`` parameter for setting which letter should be quoted in URL. 32 | - Added ``response_class`` parameter for using a customized ``Response`` class. 33 | 34 | 35 | - v0.7.3 36 | - Bugfixes. 37 | - v0.7.2 38 | - Added requests-like exception hierarchy. 39 | - v0.7.1 40 | - Added ``Cookies.get_dict()``, for compatibility with ``requests``. 41 | - Fixed type conversion in C shim, by @qishipai. 42 | - Fixed cookie ``subdomains`` attribute. 43 | - v0.7.0 44 | - Added more recent impersonate versions, up to Chrome 124. 45 | - Upgraded ``libcurl`` to 8.7.1. 46 | - Supported custom impersonation. 47 | - Added support for list of tuple in post fields. 48 | - Updated header strategy: always exclude empty headers, never send Expect header. 49 | - Changed default redirect limit to 30. 50 | - Prefer not sending CONNECT for plain http proxy. 51 | - Fix Windows build. 52 | - Fix Safari Stream priority. 53 | 54 | 55 | The minimum Python version is now 3.8. Windows fingerprints are wrong in 0.6.x. 56 | 57 | - v0.6.1 58 | - ``AsyncSession.close`` is now a coroutine. 59 | - This is a bugfix release. 60 | - v0.6.0 61 | - Added more recent impersonate versions, up to Chrome 120 and Safari 17.0 62 | - Upgraded libcurl to 8.1.1 63 | - Added experimental websocket support 64 | - Supported proactive eventloop on Windows 65 | - Added win32 and macOS arm64 build targets 66 | - Added `allow_redirects` to Session parameters 67 | - Use certifi to replace packaged cacert.pem 68 | - Improved proxy support by accepting `proxy=...` 69 | - Bumped minimum python versiont to 3.8 70 | - Added files support 71 | - Added client certs support 72 | - Incorporated build time files for sdist 73 | - Bugfix: async curl timer leak 74 | 75 | 76 | - v0.5.10 77 | - Add stream support 78 | - Add support for secure cookies 79 | - Add curl_infos to extract extra info after performing 80 | - Bugfix: `timeout=None` not working 81 | - v0.5.9 82 | - Add interface support 83 | - Make POST work as in real world 84 | - Add support for custom resolve 85 | - Switched to libcurl's COOKIELIST to sync cookies between python and curl 86 | - Add default_headers option for sessions like in curl-impersonate 87 | - Add curl_options for extra curl_options in Session 88 | - Add http_version option for limiting http version to 1.1 or whatever 89 | - Add debug option for extra curl debug info 90 | - Add CurlError.code 91 | - Bugfix: duplicated header lines for the same header 92 | - Bugfix: clearing headers when request fails 93 | - Bugfix: fix HEAD request 94 | - Bugfix: reset curl options when errors occur 95 | - v0.5.7 96 | - Refactor JSON serialization to mimic browser behavior (#66) 97 | - Add http options to Session classes (#72) 98 | - Add Windows eventloop warning 99 | - v0.5.6 100 | - Make Session.curl a thread-local variable (#50) 101 | - Add support for eventlet and gevent with threadpool 102 | - Bugfix: Only close future if it's not done or cancelled 103 | - 0.5.5 104 | - Bugfix: Fix high CPU usage (#46) 105 | - 0.5.4 106 | - Bugfix: Fix cert and error buffer when calling curl_easy_reset 107 | - 0.5.3 108 | - Bugfix: Reset curl after performing, fix #39 109 | - 0.5.2 110 | - Bugfix: Clear headers after async perform 111 | - 0.5.1 112 | - Bugfix: Clean up timerfunction when curl already closed 113 | - 0.5.0 114 | - Added asyncio support 115 | 116 | 117 | - 0.4.0 118 | - Removed c shim callback function, use cffi native callback function 119 | 120 | 121 | - 0.3.6 122 | - Updated to curl-impersonate v0.5.4, supported chrome107 and chrome110 123 | - 0.3.0, copied more code from `httpx` to support session 124 | - Add `requests.Session` 125 | - Breaking change: `Response.cookies` changed from `http.cookies.SimpleCookie` to `curl_cffi.requests.Cookies` 126 | - Using ABI3 wheels to reduce package size. 127 | 128 | -------------------------------------------------------------------------------- /benchmark/benchmark.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import queue 3 | import threading 4 | import time 5 | from io import BytesIO 6 | 7 | import aiohttp 8 | import httpx 9 | import pandas as pd 10 | import pycurl 11 | import requests 12 | import tls_client 13 | 14 | import curl_cffi 15 | import curl_cffi.requests 16 | 17 | # import uvloop 18 | # uvloop.install() 19 | 20 | results = [] 21 | 22 | 23 | class FakePycurlSession: 24 | def __init__(self): 25 | self.c = pycurl.Curl() 26 | 27 | def get(self, url): 28 | buffer = BytesIO() 29 | self.c.setopt(pycurl.URL, url) 30 | self.c.setopt(pycurl.WRITEDATA, buffer) 31 | self.c.perform() 32 | 33 | def __del__(self): 34 | self.c.close() 35 | 36 | 37 | class FakeCurlCffiSession: 38 | def __init__(self): 39 | self.c = curl_cffi.Curl() 40 | 41 | def get(self, url): 42 | buffer = BytesIO() 43 | self.c.setopt(curl_cffi.CurlOpt.URL, url) 44 | self.c.setopt(curl_cffi.CurlOpt.WRITEDATA, buffer) 45 | self.c.perform() 46 | 47 | def __del__(self): 48 | self.c.close() 49 | 50 | 51 | for size in ["1k", "20k", "200k"]: 52 | stats = {} 53 | url = "http://localhost:8000/" + size 54 | 55 | for name, SessionClass in [ 56 | ("requests", requests.Session), 57 | ("httpx_sync", httpx.Client), 58 | ("tls_client", tls_client.Session), 59 | ("curl_cffi_sync", curl_cffi.requests.Session), 60 | ("curl_cffi_raw", FakeCurlCffiSession), 61 | ("pycurl", FakePycurlSession), 62 | ]: 63 | s = SessionClass() 64 | start = time.time() 65 | for _ in range(1000): 66 | s.get(url) 67 | dur = time.time() - start 68 | stats[name] = dur 69 | results.append({"name": name, "size": size, "duration": dur}) 70 | 71 | print(f"One worker, {size}: {stats}") 72 | 73 | df = pd.DataFrame(results) 74 | df.to_csv("single_worker.csv", index=False, float_format="%.4f") 75 | 76 | results = [] 77 | 78 | 79 | def worker(q, done, SessionClass): 80 | s = SessionClass() 81 | while not done.is_set(): 82 | try: 83 | url = q.get_nowait() 84 | except Exception: 85 | continue 86 | s.get(url) 87 | q.task_done() 88 | 89 | 90 | async def aiohttp_worker(q, done, s): 91 | while not done.is_set(): 92 | url = await q.get() 93 | async with s.get(url) as response: 94 | await response.read() 95 | q.task_done() 96 | 97 | 98 | async def httpx_worker(q, done, s): 99 | while not done.is_set(): 100 | url = await q.get() 101 | await s.get(url) 102 | q.task_done() 103 | 104 | 105 | for size in ["1k", "20k", "200k"]: 106 | url = "http://localhost:8000/" + size 107 | stats = {} 108 | for name, SessionClass in [ 109 | ("requests", requests.Session), 110 | ("httpx_sync", httpx.Client), 111 | ("tls_client", tls_client.Session), 112 | ("curl_cffi_sync", curl_cffi.requests.Session), 113 | ("curl_cffi_raw", FakeCurlCffiSession), 114 | ("pycurl", FakePycurlSession), 115 | ]: 116 | q = queue.Queue() 117 | for _ in range(1000): 118 | q.put(url) 119 | done = threading.Event() 120 | start = time.time() 121 | threads = [] 122 | for _ in range(10): 123 | t = threading.Thread(target=worker, args=(q, done, SessionClass)) 124 | threads.append(t) 125 | t.start() 126 | q.join() 127 | done.set() 128 | dur = time.time() - start 129 | stats[name] = dur 130 | results.append({"name": name, "size": size, "duration": dur}) 131 | for t in threads: 132 | t.join() 133 | # print(stats) 134 | 135 | async def test_asyncs_workers(url, size, stats): 136 | for name, worker, SessionClass in [ 137 | ("aiohttp", aiohttp_worker, aiohttp.ClientSession), 138 | ("httpx_async", httpx_worker, httpx.AsyncClient), 139 | ("curl_cffi_async", httpx_worker, curl_cffi.requests.AsyncSession), 140 | ]: 141 | q = asyncio.Queue() 142 | for _ in range(1000): 143 | await q.put(url) 144 | done = asyncio.Event() 145 | start = time.time() 146 | workers = [] 147 | async with SessionClass() as s: 148 | for _ in range(10): 149 | w = asyncio.create_task(worker(q, done, s)) 150 | workers.append(w) 151 | await q.join() 152 | done.set() 153 | dur = time.time() - start 154 | stats[name] = dur 155 | results.append({"name": name, "size": size, "duration": dur}) 156 | for w in workers: 157 | w.cancel() 158 | 159 | asyncio.run(test_asyncs_workers(url, size, stats)) 160 | print(f"10 Workers, {size}: {stats}") 161 | 162 | df = pd.DataFrame(results) 163 | df.to_csv("multiple_workers.csv", index=False, float_format="%.4f") 164 | -------------------------------------------------------------------------------- /benchmark/ws_bench_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Cross-platform utility code for the WebSocket benchmarks. 4 | """ 5 | 6 | import asyncio 7 | import os 8 | from collections.abc import AsyncGenerator, Generator 9 | from dataclasses import dataclass 10 | from ipaddress import IPv4Address 11 | from logging import DEBUG, Formatter, Logger, StreamHandler, getLogger 12 | from pathlib import Path 13 | from ssl import PROTOCOL_TLS_SERVER, SSLContext 14 | from typing import TextIO 15 | 16 | 17 | def get_logger() -> Logger: 18 | """Setup the logger. 19 | 20 | Returns: 21 | Logger: Initialized logger object 22 | """ 23 | console: Logger = getLogger(name=__name__) 24 | console_handler: StreamHandler[TextIO] = StreamHandler() 25 | console.setLevel(level=DEBUG) 26 | console_handler.setLevel(level=DEBUG) 27 | formatter: Formatter = Formatter( 28 | fmt="[%(asctime)s] [%(levelname)s] %(message)s", datefmt="%d-%m-%Y %H:%M:%S" 29 | ) 30 | console_handler.setFormatter(fmt=formatter) 31 | console.addHandler(hdlr=console_handler) 32 | return console 33 | 34 | 35 | # Initialize logger 36 | logger: Logger = get_logger() 37 | 38 | 39 | def get_ssl_ctx(cert_file: Path, cert_key: Path) -> SSLContext | None: 40 | """Load in the SSL context if cert files present and host is non-Windows. 41 | 42 | Returns: 43 | `SSLContext | None`: The SSL context or `None`. 44 | """ 45 | 46 | if not (cert_file.is_file() and cert_key.is_file()): 47 | logger.warning( 48 | "Certificate file(s) %s or %s not found, disabling TLS", 49 | cert_file, 50 | cert_key, 51 | ) 52 | return None 53 | 54 | ssl_context: SSLContext = SSLContext(PROTOCOL_TLS_SERVER) 55 | ssl_context.load_cert_chain(cert_file, cert_key) 56 | return ssl_context 57 | 58 | 59 | @dataclass 60 | class TestConfig: 61 | """ 62 | Configuration values, should be changed as needed. 63 | """ 64 | 65 | total_gb: int = 10 66 | chunk_size: int = 65536 67 | large_chunk_size: int = 4 * 1024**2 68 | total_bytes: int = total_gb * 1024**3 69 | recv_queue: int = 512 70 | send_queue: int = 128 71 | cert_file: Path = Path("localhost.crt") 72 | cert_key: Path = Path("localhost.key") 73 | data_filename: Path = Path("testdata.bin") 74 | hash_filename: Path = data_filename.with_suffix(".hash") 75 | srv_host: IPv4Address = IPv4Address("127.0.0.1") 76 | srv_port: int = 4443 77 | ssl_ctx: SSLContext | None = get_ssl_ctx(cert_file, cert_key) 78 | proto: str = "wss://" if ssl_ctx else "ws://" 79 | srv_path: str = f"{proto}{srv_host}:{srv_port}/ws" 80 | 81 | 82 | # Initialize config object 83 | config: TestConfig = TestConfig() 84 | 85 | 86 | async def binary_data_generator( 87 | total_gb: float, chunk_size: int 88 | ) -> AsyncGenerator[bytes]: 89 | """An asynchronous generator that yields chunks of binary data efficiently 90 | for benchmarking. It generates one chunk of random data and reuses it to 91 | eliminate chunk generation overhead as a performance factor. 92 | 93 | Args: 94 | total_gb (`float`): The total amount of data to generate. 95 | chunk_size (`int`): Data should be yielded in chunks of this size. 96 | 97 | Yields: 98 | `Iterator[AsyncGenerator[bytes, None]]`: Chunks until total size is reached. 99 | """ 100 | bytes_to_send: int = int(total_gb * 1024**3) 101 | bytes_sent = 0 102 | 103 | # Create one reusable chunk of random data to avoid calling os.urandom() in a loop. 104 | reusable_chunk = os.urandom(chunk_size) 105 | while bytes_sent < bytes_to_send: 106 | # Calculate the size of the next chunk to send 107 | current_chunk_size = min(chunk_size, bytes_to_send - bytes_sent) 108 | 109 | # If it's a full-sized chunk, yield the reusable one. Otherwise, yield a slice. 110 | if current_chunk_size == chunk_size: 111 | yield reusable_chunk 112 | else: 113 | yield reusable_chunk[:current_chunk_size] 114 | bytes_sent += current_chunk_size 115 | 116 | 117 | def get_loop() -> asyncio.AbstractEventLoop: 118 | """Returns the correct event loop for the platform and what's installed. 119 | 120 | Returns: 121 | asyncio.AbstractEventLoop: The created and installed event loop. 122 | """ 123 | 124 | try: 125 | # pylint: disable-next=import-outside-toplevel 126 | import uvloop 127 | 128 | loop: asyncio.AbstractEventLoop = uvloop.new_event_loop() 129 | except ImportError: 130 | loop = asyncio.new_event_loop() 131 | 132 | asyncio.set_event_loop(loop) 133 | return loop 134 | 135 | 136 | def generate_random_chunks() -> Generator[bytes]: 137 | """Generate chunks of random data up to a total size. 138 | 139 | Returns: 140 | Generator[bytes]: Generator that yields random chunks. 141 | """ 142 | num_chunks: int = config.total_bytes // config.large_chunk_size 143 | for _ in range(num_chunks): 144 | yield os.urandom(config.large_chunk_size) 145 | 146 | remaining_size = config.total_bytes % config.large_chunk_size 147 | if remaining_size > 0: 148 | yield os.urandom(remaining_size) 149 | -------------------------------------------------------------------------------- /benchmark/README.md: -------------------------------------------------------------------------------- 1 | Benchmark 2 | ====== 3 | 4 | benchmark between curl_cffi and other python http clients 5 | 6 | Sync clients 7 | ------ 8 | 9 | - curl_cffi 10 | - requests 11 | - pycurl 12 | - [python-tls-client](https://github.com/FlorianREGAZ/Python-Tls-Client.git) 13 | - httpx 14 | 15 | Async clients 16 | ------ 17 | 18 | - curl_cffi 19 | - httpx 20 | - aiohttp 21 | 22 | Target 23 | ------ 24 | 25 | All the clients run with session/client enabled. 26 | 27 | Async WebSocket 28 | ------ 29 | 30 | Two distinct benchmarks are provided to evaluate the performance of the `AsyncWebSocket` implementation under different conditions. 31 | 32 | 1. Simple Throughput Test ([`client`](ws_bench_1_client.py), [`server`](ws_bench_1_server.py)) 33 | 34 | This is a lightweight, in-memory benchmark designed to measure the raw throughput and overhead of the WebSocket client. The server sends a repeating chunk of random bytes from memory, and the client receives it. This test is useful for quick sanity checks and detecting performance regressions under ideal, CPU-cached conditions. 35 | 36 | 2. Verified Streaming Test ([`benchmark`](ws_bench_2.py)) 37 | 38 | This is a rigorous, end-to-end test. It first generates a multi-gigabyte file of random data and its SHA256 hash. The benchmark then streams this file from disk over the WebSocket connection. The receiving end calculates the hash of the incoming stream and verifies it against the original, ensuring complete data integrity. 39 | 40 | **Important**: This test requires enough RAM free on the system equal to the size of the random data. It measures the performance of the entire system pipeline, including Disk I/O speed, CPU hashing speed, and network transfer. On modern systems, it is likely to be bottlenecked by the CPU's hashing performance or the disk's read speed. 41 | 42 | ### Prerequisites 43 | 44 | - Python 3.10+ 45 | - Pip packages 46 | 47 | ```bash 48 | pip install aiohttp curl_cffi 49 | ``` 50 | 51 | > `uvloop` is highly recommended for performance on Linux and macOS. The benchmarks will automatically fall back to the standard asyncio event loop if it is not installed or on Windows. 52 | 53 | ### Setup 54 | 55 | 1. TLS certificate (optional) 56 | 57 | These benchmarks are configured to use WSS (secure WebSockets) by default on Linux and macOS. To generate a self-signed certificate: 58 | 59 | ```bash 60 | openssl req -x509 -newkey rsa:2048 -nodes -keyout localhost.key -out localhost.crt -days 365 -subj "/CN=localhost" 61 | ``` 62 | 63 | > **Note**: If you are on any platform and skip certificate generation, the benchmarks will use the insecure `ws://` instead. 64 | 65 | 2. Configuration 66 | 67 | The benchmark parameters (total data size, chunk size) can be modified by editing the `TestConfig` class within the [`ws_bench_utils.py`](ws_bench_utils.py) file. By default, both benchmarks are configured for `10 GiB` of data transfer. 68 | 69 | ### Running the Benchmarks 70 | 71 | It is recommended to run the server and client in separate terminal windows. 72 | 73 | #### Benchmark 1: Simple Throughput Test 74 | 75 | 1. Start the Server: 76 | 77 | ```bash 78 | python ws_bench_1_server.py 79 | ``` 80 | 81 | 2. Run the Client: 82 | 83 | ```bash 84 | python ws_bench_1_client.py 85 | ``` 86 | 87 | #### Benchmark 2: Verified Streaming Test 88 | 89 | 1. Generate Test File (Initial Setup): 90 | 91 | This command will create a large (`10 GiB`) file named `testdata.bin` and its hash. Ensure you have sufficient disk space: 92 | 93 | ```bash 94 | python ws_bench_2.py generate 95 | ``` 96 | 97 | 2. Start the Server: 98 | 99 | ```bash 100 | python ws_bench_2.py server 101 | ``` 102 | 103 | 3. Run the Client (Choose one): 104 | 105 | - To test download speed (server sends, client receives): 106 | 107 | ```bash 108 | python ws_bench_2.py client --test download 109 | ``` 110 | 111 | - To test upload speed (client sends, server receives): 112 | 113 | ```bash 114 | python ws_bench_2.py client --test upload 115 | ``` 116 | 117 | ### Performance Considerations 118 | 119 | Benchmark results can vary significantly based on system-level factors. The following should be kept in mind: 120 | 121 | - **Loopback Interface**: These tests run on a local loopback interface (`127.0.0.1`), which does not represent real-world internet conditions (latency, packet loss, etc.). 122 | 123 | - **CPU Affinity**: For maximum consistency, especially on multi-core or multi-CPU (NUMA) systems, you can pin the server and client processes to specific CPU cores. This avoids performance penalties from processes migrating between cores or crossing CPU socket boundaries. 124 | 125 | **On Linux:** 126 | Use `taskset` to specify a CPU core (e.g., core 0 for the server, core 1 for the client). 127 | 128 | ```bash 129 | # Terminal 1 130 | taskset -c 0 python ws_bench_1_server.py 131 | 132 | # Terminal 2 133 | taskset -c 1 python ws_bench_1_client.py 134 | ``` 135 | 136 | **On Windows:** 137 | Use the `start /affinity` command. The affinity mask is a hexadecimal number (`1` for CPU 0, `2` for CPU 1, `4` for CPU 2, etc.). 138 | 139 | ```powershell 140 | # PowerShell/CMD 1 141 | start /affinity 1 python ws_bench_1_server.py 142 | 143 | # PowerShell/CMD 2 144 | start /affinity 2 python ws_bench_1_client.py 145 | ``` 146 | 147 | - **Concurrent Tests**: The first benchmark code (`ws_bench_1_client.py`) can be uncommented to run upload and download tests concurrently. Note that a concurrent test will terminate as soon as the faster of the two directions (typically download) completes. 148 | 149 | - **Queue Sizes**: Adjust the `send_queue` and `recv_queue` sizes within the [`TestConfig`](ws_bench_utils.py) class to observe the impact on performance and backpressure. 150 | -------------------------------------------------------------------------------- /scripts/build.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import platform 4 | import shutil 5 | import struct 6 | import tempfile 7 | from glob import glob 8 | from pathlib import Path 9 | from urllib.request import urlretrieve 10 | 11 | from cffi import FFI 12 | 13 | # this is the upstream libcurl-impersonate version 14 | __version__ = "1.2.5" 15 | 16 | 17 | def detect_arch(): 18 | with open(Path(__file__).parent.parent / "libs.json") as f: 19 | archs = json.loads(f.read()) 20 | 21 | uname = platform.uname() 22 | glibc_flavor = "gnueabihf" if uname.machine in ["armv7l", "armv6l"] else "gnu" 23 | 24 | libc, _ = platform.libc_ver() 25 | # https://github.com/python/cpython/issues/87414 26 | libc = glibc_flavor if libc == "glibc" else "musl" 27 | pointer_size = struct.calcsize("P") * 8 28 | 29 | for arch in archs: 30 | if ( 31 | arch["system"] == uname.system 32 | and arch["machine"] == uname.machine 33 | and arch["pointer_size"] == pointer_size 34 | and ("libc" not in arch or arch.get("libc") == libc) 35 | ): 36 | if arch["libdir"]: 37 | arch["libdir"] = os.path.expanduser(arch["libdir"]) 38 | else: 39 | global tmpdir 40 | if "CI" in os.environ: 41 | tmpdir = "./tmplibdir" 42 | os.makedirs(tmpdir, exist_ok=True) 43 | arch["libdir"] = tmpdir 44 | else: 45 | tmpdir = tempfile.TemporaryDirectory() 46 | arch["libdir"] = tmpdir.name 47 | return arch 48 | raise Exception(f"Unsupported arch: {uname}") 49 | 50 | 51 | arch = detect_arch() 52 | print(f"Using {arch['libdir']} to store libcurl-impersonate") 53 | 54 | 55 | def download_libcurl(): 56 | if (Path(arch["libdir"]) / arch["so_name"]).exists(): 57 | print(".so files already downloaded.") 58 | return 59 | 60 | file = "libcurl-impersonate.tar.gz" 61 | sysname = "linux-" + arch["libc"] if arch["system"] == "Linux" else arch["sysname"] 62 | 63 | url = ( 64 | f"https://github.com/lexiforest/curl-impersonate/releases/download/" 65 | f"v{__version__}/libcurl-impersonate-v{__version__}" 66 | f".{arch['so_arch']}-{sysname}.tar.gz" 67 | ) 68 | 69 | print(f"Downloading libcurl-impersonate from {url}...") 70 | urlretrieve(url, file) 71 | 72 | print("Unpacking downloaded files...") 73 | os.makedirs(arch["libdir"], exist_ok=True) 74 | shutil.unpack_archive(file, arch["libdir"]) 75 | 76 | if arch["system"] == "Windows": 77 | for file in glob(os.path.join(arch["libdir"], "lib/*.lib")): 78 | shutil.move(file, arch["libdir"]) 79 | for file in glob(os.path.join(arch["libdir"], "bin/*.dll")): 80 | shutil.move(file, arch["libdir"]) 81 | 82 | print("Files after unpacking") 83 | print(os.listdir(arch["libdir"])) 84 | 85 | 86 | def get_curl_archives(): 87 | print("Files for linking") 88 | print(os.listdir(arch["libdir"])) 89 | if arch["system"] == "Linux" and arch.get("link_type") == "static": 90 | # note that the order of libraries matters 91 | # https://stackoverflow.com/a/36581865 92 | return [ 93 | f"{arch['libdir']}/libcurl-impersonate.a", 94 | f"{arch['libdir']}/libssl.a", 95 | f"{arch['libdir']}/libcrypto.a", 96 | f"{arch['libdir']}/libz.a", 97 | f"{arch['libdir']}/libzstd.a", 98 | f"{arch['libdir']}/libnghttp2.a", 99 | f"{arch['libdir']}/libngtcp2.a", 100 | f"{arch['libdir']}/libngtcp2_crypto_boringssl.a", 101 | f"{arch['libdir']}/libnghttp3.a", 102 | f"{arch['libdir']}/libbrotlidec.a", 103 | f"{arch['libdir']}/libbrotlienc.a", 104 | f"{arch['libdir']}/libbrotlicommon.a", 105 | f"{arch['libdir']}/libcares.a", 106 | ] 107 | else: 108 | return [] 109 | 110 | 111 | def get_curl_libraries(): 112 | if arch["system"] == "Windows": 113 | return [ 114 | "Crypt32", 115 | "Secur32", 116 | "wldap32", 117 | "Normaliz", 118 | "libcurl", 119 | "zstd", 120 | "zlib", 121 | "ssl", 122 | "nghttp2", 123 | "nghttp3", 124 | "ngtcp2", 125 | "ngtcp2_crypto_boringssl", 126 | "crypto", 127 | "brotlienc", 128 | "brotlidec", 129 | "brotlicommon", 130 | "iphlpapi", 131 | "cares", 132 | ] 133 | elif arch["system"] == "Darwin" or ( 134 | arch["system"] == "Linux" and arch.get("link_type") == "dynamic" 135 | ): 136 | return ["curl-impersonate"] 137 | else: 138 | return [] 139 | 140 | 141 | ffibuilder = FFI() 142 | system = platform.system() 143 | root_dir = Path(__file__).parent.parent 144 | download_libcurl() 145 | 146 | 147 | ffibuilder.set_source( 148 | "curl_cffi._wrapper", 149 | """ 150 | #include "shim.h" 151 | """, 152 | # FIXME from `curl-impersonate` 153 | libraries=get_curl_libraries(), 154 | extra_objects=get_curl_archives(), 155 | library_dirs=[arch["libdir"]], 156 | source_extension=".c", 157 | include_dirs=[ 158 | str(root_dir / "include"), 159 | str(root_dir / "ffi"), 160 | str(Path(arch["libdir"]) / "include"), 161 | ], 162 | sources=[ 163 | str(root_dir / "ffi/shim.c"), 164 | ], 165 | extra_compile_args=( 166 | ["-Wno-implicit-function-declaration"] if system == "Darwin" else [] 167 | ), 168 | extra_link_args=(["-lstdc++"] if system != "Windows" else []), 169 | ) 170 | 171 | with open(root_dir / "ffi/cdef.c") as f: 172 | cdef_content = f.read() 173 | ffibuilder.cdef(cdef_content) 174 | 175 | 176 | if __name__ == "__main__": 177 | ffibuilder.compile(verbose=False) 178 | -------------------------------------------------------------------------------- /benchmark/ws_bench_1_client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Websocket client simple benchmark - TLS (WSS) 4 | """ 5 | 6 | import time 7 | from asyncio import ( 8 | FIRST_COMPLETED, 9 | AbstractEventLoop, 10 | CancelledError, 11 | Task, 12 | sleep, 13 | wait, 14 | ) 15 | 16 | from typing_extensions import Never 17 | from ws_bench_utils import binary_data_generator, config, get_loop, logger 18 | 19 | from curl_cffi import AsyncSession, AsyncWebSocket, WebSocketClosed 20 | 21 | 22 | def calculate_stats(start_time: float, total_len: int) -> tuple[float, float]: 23 | """Calculate the amount of time it took and the throughput average. 24 | 25 | Args: 26 | start_time (`float`): The start time from the performance counter 27 | 28 | Returns: 29 | `tuple[float, float]`: The duration and rate in Gbps 30 | """ 31 | end_time: float = time.perf_counter() 32 | duration: float = end_time - start_time 33 | rate_gbps: float = (total_len * 8) / duration / (1024**3) 34 | return duration, rate_gbps 35 | 36 | 37 | async def health_check() -> Never: 38 | """A simple coroutine that continuously prints a dot to prove that the event loop 39 | is alive and not starved from being able to run this task. 40 | 41 | Returns: 42 | Never: Keeps printing dots until the task is cancelled. 43 | """ 44 | counter = 0 45 | logger.info("Starting sanity check. You should see a continuous stream of dots '.'") 46 | logger.info("If the dots stop for a long time, the event loop is blocked.") 47 | try: 48 | while True: 49 | await sleep(0.05) 50 | print(".", end="", flush=True) 51 | counter += 1 52 | if counter % 100 == 0: 53 | print("") 54 | finally: 55 | print("\r\x1b[K", end="") 56 | logger.info("Sanity check complete.") 57 | 58 | 59 | async def ws_counter(ws: AsyncWebSocket) -> None: 60 | """Simple coroutine which counts how many bytes were received. 61 | 62 | Args: 63 | ws (`AsyncWebSocket`): Instantiated Curl CFFI AsyncWebSocket object. 64 | """ 65 | recvd_len: int = 0 66 | start_time: float = time.perf_counter() 67 | logger.info("Receiving data from server") 68 | try: 69 | async for msg in ws: 70 | recvd_len += len(msg) 71 | 72 | except WebSocketClosed as exc: 73 | logger.debug(exc) 74 | 75 | finally: 76 | duration, avg_rate = calculate_stats(start_time, recvd_len) 77 | print("\r\x1b[K", end="") 78 | logger.info( 79 | "Received: %.2f GB in %.2f seconds", recvd_len / (1024**3), duration 80 | ) 81 | logger.info("Average throughput (recv): %.2f Gbps", avg_rate) 82 | 83 | 84 | async def ws_sender(ws: AsyncWebSocket) -> None: 85 | """Simple coroutine which just sends the same chunk of bytes until exhausted. 86 | 87 | Args: 88 | ws (`AsyncWebSocket`): Instantiated Curl CFFI AsyncWebSocket object. 89 | """ 90 | sent_len: int = 0 91 | start_time: float = time.perf_counter() 92 | logger.info("Sending data to server") 93 | try: 94 | async for data_chunk in binary_data_generator( 95 | total_gb=config.total_gb, chunk_size=min(65535, config.chunk_size) 96 | ): 97 | _ = await ws.send(payload=data_chunk) 98 | sent_len += len(data_chunk) 99 | 100 | except WebSocketClosed as exc: 101 | logger.debug(exc) 102 | 103 | finally: 104 | duration, avg_rate = calculate_stats(start_time, sent_len) 105 | print("\r\x1b[K", end="") 106 | logger.info("Sent: %.2f GB in %.2f seconds", sent_len / (1024**3), duration) 107 | logger.info("Average throughput (send): %.2f Gbps", avg_rate) 108 | 109 | 110 | async def run_benchmark(loop: AbstractEventLoop) -> None: 111 | """ 112 | Simple client benchmark which sends/receives binary messages using curl-cffi. 113 | """ 114 | logger.info("Starting curl-cffi benchmark") 115 | ws: AsyncWebSocket | None = None 116 | waiters: set[Task[None]] = set() 117 | try: 118 | async with AsyncSession(impersonate="chrome", verify=False) as session: 119 | ws = await session.ws_connect( 120 | config.srv_path, 121 | recv_queue_size=config.recv_queue, 122 | send_queue_size=config.send_queue, 123 | ) 124 | logger.info("Connection established to %s", config.srv_path) 125 | 126 | # NOTE: Uncomment for send/recv benchmark or both 127 | waiters.add(loop.create_task(ws_counter(ws))) 128 | # waiters.add(loop.create_task(ws_sender(ws))) 129 | 130 | _, _ = await wait(waiters, return_when=FIRST_COMPLETED) 131 | 132 | except Exception: 133 | logger.exception("curl-cffi benchmark failed") 134 | raise 135 | 136 | finally: 137 | for wait_task in waiters: 138 | try: 139 | if not wait_task.done(): 140 | _ = wait_task.cancel() 141 | await wait_task 142 | 143 | except CancelledError: 144 | ... 145 | if ws: 146 | await ws.close(timeout=2) 147 | 148 | 149 | async def main(loop: AbstractEventLoop) -> None: 150 | """Entrypoint""" 151 | waiters: set[Task[None]] = set() 152 | 153 | try: 154 | # Create the health check and benchmark tasks 155 | waiters.update( 156 | {loop.create_task(health_check()), loop.create_task(run_benchmark(loop))} 157 | ) 158 | _, _ = await wait(waiters, return_when=FIRST_COMPLETED) 159 | 160 | except (KeyboardInterrupt, CancelledError): 161 | logger.debug("Cancelling benchmark") 162 | 163 | finally: 164 | for wait_task in waiters: 165 | try: 166 | if not wait_task.done(): 167 | _ = wait_task.cancel() 168 | await wait_task 169 | except CancelledError: 170 | ... 171 | 172 | 173 | if __name__ == "__main__": 174 | evt_loop: AbstractEventLoop = get_loop() 175 | try: 176 | evt_loop.run_until_complete(main(evt_loop)) 177 | finally: 178 | evt_loop.close() 179 | -------------------------------------------------------------------------------- /examples/stream.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from contextlib import closing 3 | 4 | import curl_cffi 5 | 6 | try: 7 | # Python 3.10+ 8 | from contextlib import aclosing # pyright: ignore 9 | except ImportError: 10 | from contextlib import asynccontextmanager 11 | 12 | @asynccontextmanager 13 | async def aclosing(thing): 14 | try: 15 | yield thing 16 | finally: 17 | await thing.aclose() 18 | 19 | 20 | URL = "https://httpbin.org/stream/20" 21 | 22 | with curl_cffi.Session() as s: 23 | print("\n======================================================================") 24 | print("Iterating over chunks") 25 | print("=====================================================================\n") 26 | r = s.get(URL, stream=True) 27 | for chunk in r.iter_content(): 28 | print("Status: ", r.status_code) 29 | assert r.status_code == 200 30 | print("CHUNK", chunk) 31 | r.close() 32 | 33 | print("\n====================================================================") 34 | print("Empty body is fine.") 35 | print("====================================================================\n") 36 | response = s.get("https://httpbin.org/status/202", stream=True) 37 | print(response.status_code) 38 | response.close() 39 | 40 | print("\n====================================================================") 41 | print("Using with stream") 42 | print("====================================================================\n") 43 | with s.stream("GET", URL) as r: 44 | print("Status: ", r.status_code) 45 | for chunk in r.iter_content(): 46 | assert r.status_code == 200 47 | print("CHUNK", chunk) 48 | 49 | print("\n=====================================================================") 50 | print("Iterating on a line basis") 51 | print("=====================================================================\n") 52 | r = s.get(URL, stream=True) 53 | print("Status: ", r.status_code) 54 | for line in r.iter_lines(): 55 | assert r.status_code == 200 56 | print("LINE", line.decode()) 57 | r.close() 58 | 59 | print("\n======================================================================") 60 | print("Break when reading") 61 | print("=====================================================================\n") 62 | r = s.get("https://httpbin.org/drip", stream=True) 63 | for idx, chunk in enumerate(r.iter_content()): 64 | print(f"{idx}={chunk.decode()}", end="#", flush=True) 65 | if idx == 3: 66 | break 67 | r.close() 68 | 69 | print("\n=====================================================================") 70 | print("Better, using closing to ensure the response is closed") 71 | print("=====================================================================\n") 72 | with closing(s.get(URL, stream=True)) as r: 73 | for chunk in r.iter_content(): 74 | print("Status: ", r.status_code) 75 | assert r.status_code == 200 76 | print("CHUNK", chunk) 77 | 78 | 79 | async def async_examples(): 80 | async with curl_cffi.AsyncSession() as s: 81 | print("\n====================================================================") 82 | print("Using asyncio") 83 | print("====================================================================\n") 84 | r = await s.get(URL, stream=True) 85 | async for chunk in r.aiter_content(): 86 | print("Status: ", r.status_code) 87 | assert r.status_code == 200 88 | print("CHUNK", chunk) 89 | await r.aclose() 90 | 91 | print("\n====================================================================") 92 | print("Empty body is fine.") 93 | print("====================================================================\n") 94 | response = await s.get("https://httpbin.org/status/202", stream=True) 95 | print(response.status_code) 96 | await response.aclose() 97 | 98 | print("\n====================================================================") 99 | print("Using asyncio async with stream") 100 | print("====================================================================\n") 101 | async with s.stream("GET", URL) as r: 102 | async for chunk in r.aiter_content(): 103 | print("Status: ", r.status_code) 104 | assert r.status_code == 200 105 | print("CHUNK", chunk) 106 | 107 | print( 108 | "\n======================================================================" 109 | ) 110 | print("Break when reading") 111 | print("=====================================================================\n") 112 | async with s.stream("GET", "https://httpbin.org/drip") as r: 113 | idx = 0 114 | async for chunk in r.aiter_content(): 115 | idx += 1 116 | print(f"{idx}={chunk.decode()}", end="#", flush=True) 117 | if idx == 3: 118 | break 119 | 120 | print("\n====================================================================") 121 | print("Stream, but not stream, await atext") 122 | print("====================================================================\n") 123 | async with s.stream("GET", URL) as r: 124 | print(await r.atext()) 125 | 126 | print("\n====================================================================") 127 | print("Using asyncio async with stream") 128 | print("====================================================================\n") 129 | async with s.stream("GET", URL) as r: 130 | async for chunk in r.aiter_content(): 131 | print("Status: ", r.status_code) 132 | assert r.status_code == 200 133 | print("CHUNK", chunk) 134 | 135 | print("\n====================================================================") 136 | print("Better, using aclosing to ensure the response is closed") 137 | print("====================================================================\n") 138 | async with aclosing(await s.get(URL.replace("20", "100"), stream=True)) as r: 139 | async for chunk in r.aiter_content(): 140 | print("Status: ", r.status_code) 141 | assert r.status_code == 200 142 | print("CHUNK", chunk) 143 | 144 | 145 | asyncio.run(async_examples()) 146 | -------------------------------------------------------------------------------- /curl_cffi/requests/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "Session", 3 | "AsyncSession", 4 | "BrowserType", 5 | "BrowserTypeLiteral", 6 | "CurlWsFlag", 7 | "request", 8 | "head", 9 | "get", 10 | "post", 11 | "put", 12 | "patch", 13 | "delete", 14 | "options", 15 | "RequestsError", 16 | "Cookies", 17 | "Headers", 18 | "Request", 19 | "Response", 20 | "AsyncWebSocket", 21 | "WebSocket", 22 | "WebSocketError", 23 | "WebSocketClosed", 24 | "WebSocketTimeout", 25 | "WsCloseCode", 26 | "ExtraFingerprints", 27 | "CookieTypes", 28 | "HeaderTypes", 29 | "ProxySpec", 30 | ] 31 | 32 | from typing import Optional, TYPE_CHECKING, TypedDict 33 | 34 | from ..const import CurlWsFlag 35 | from .cookies import Cookies, CookieTypes 36 | from .errors import RequestsError 37 | from .headers import Headers, HeaderTypes 38 | from .impersonate import BrowserType, BrowserTypeLiteral, ExtraFingerprints 39 | from .models import Request, Response 40 | from .session import ( 41 | AsyncSession, 42 | HttpMethod, 43 | ProxySpec, 44 | Session, 45 | ThreadType, 46 | RequestParams, 47 | Unpack, 48 | ) 49 | from .websockets import ( 50 | AsyncWebSocket, 51 | WebSocket, 52 | WebSocketClosed, 53 | WebSocketError, 54 | WebSocketTimeout, 55 | WsCloseCode, 56 | ) 57 | 58 | if TYPE_CHECKING: 59 | 60 | class SessionRequestParams(RequestParams, total=False): 61 | thread: Optional[ThreadType] 62 | curl_options: Optional[dict] 63 | debug: Optional[bool] 64 | else: 65 | SessionRequestParams = TypedDict 66 | 67 | 68 | def request( 69 | method: HttpMethod, 70 | url: str, 71 | thread: Optional[ThreadType] = None, 72 | curl_options: Optional[dict] = None, 73 | debug: Optional[bool] = None, 74 | **kwargs: Unpack[RequestParams], 75 | ) -> Response: 76 | """Send an http request. 77 | 78 | Parameters: 79 | method: http method for the request: GET/POST/PUT/DELETE etc. 80 | url: url for the requests. 81 | params: query string for the requests. 82 | data: form values(dict/list/tuple) or binary data to use in body, 83 | ``Content-Type: application/x-www-form-urlencoded`` will be added if a dict 84 | is given. 85 | json: json values to use in body, `Content-Type: application/json` will be added 86 | automatically. 87 | headers: headers to send. 88 | cookies: cookies to use. 89 | files: not supported, use ``multipart`` instead. 90 | auth: HTTP basic auth, a tuple of (username, password), only basic auth is 91 | supported. 92 | timeout: how many seconds to wait before giving up. 93 | allow_redirects: whether to allow redirection. 94 | max_redirects: max redirect counts, default 30, use -1 for unlimited. 95 | proxies: dict of proxies to use, prefer to use ``proxy`` if they are the same. 96 | format: ``{"http": proxy_url, "https": proxy_url}``. 97 | proxy: proxy to use, format: "http://user@pass:proxy_url". 98 | Can't be used with `proxies` parameter. 99 | proxy_auth: HTTP basic auth for proxy, a tuple of (username, password). 100 | verify: whether to verify https certs. 101 | referer: shortcut for setting referer header. 102 | accept_encoding: shortcut for setting accept-encoding header. 103 | content_callback: a callback function to receive response body. 104 | ``def callback(chunk: bytes) -> None:`` 105 | impersonate: which browser version to impersonate. 106 | ja3: ja3 string to impersonate. 107 | akamai: akamai string to impersonate. 108 | extra_fp: extra fingerprints options, in complement to ja3 and akamai strings. 109 | thread: thread engine to use for working with other thread implementations. 110 | choices: eventlet, gevent. 111 | default_headers: whether to set default browser headers when impersonating. 112 | default_encoding: encoding for decoding response content if charset is not found 113 | in headers. Defaults to "utf-8". Can be set to a callable for automatic 114 | detection. 115 | quote: Set characters to be quoted, i.e. percent-encoded. Default safe string 116 | is ``!#$%&'()*+,/:;=?@[]~``. If set to a sting, the character will be 117 | removed from the safe string, thus quoted. If set to False, the url will be 118 | kept as is, without any automatic percent-encoding, you must encode the URL 119 | yourself. 120 | curl_options: extra curl options to use. 121 | http_version: limiting http version, defaults to http2. 122 | debug: print extra curl debug info. 123 | interface: which interface to use. 124 | cert: a tuple of (cert, key) filenames for client cert. 125 | stream: streaming the response, default False. 126 | max_recv_speed: maximum receive speed, bytes per second. 127 | multipart: upload files using the multipart format, see examples for details. 128 | discard_cookies: discard cookies from server. Default to False. 129 | 130 | Returns: 131 | A ``Response`` object. 132 | """ 133 | debug = False if debug is None else debug 134 | with Session(thread=thread, curl_options=curl_options, debug=debug) as s: 135 | return s.request(method=method, url=url, **kwargs) 136 | 137 | 138 | def head(url: str, **kwargs: Unpack[SessionRequestParams]): 139 | return request(method="HEAD", url=url, **kwargs) 140 | 141 | 142 | def get(url: str, **kwargs: Unpack[SessionRequestParams]): 143 | return request(method="GET", url=url, **kwargs) 144 | 145 | 146 | def post(url: str, **kwargs: Unpack[SessionRequestParams]): 147 | return request(method="POST", url=url, **kwargs) 148 | 149 | 150 | def put(url: str, **kwargs: Unpack[SessionRequestParams]): 151 | return request(method="PUT", url=url, **kwargs) 152 | 153 | 154 | def patch(url: str, **kwargs: Unpack[SessionRequestParams]): 155 | return request(method="PATCH", url=url, **kwargs) 156 | 157 | 158 | def delete(url: str, **kwargs: Unpack[SessionRequestParams]): 159 | return request(method="DELETE", url=url, **kwargs) 160 | 161 | 162 | def options(url: str, **kwargs: Unpack[SessionRequestParams]): 163 | return request(method="OPTIONS", url=url, **kwargs) 164 | 165 | 166 | def trace(url: str, **kwargs: Unpack[SessionRequestParams]): 167 | return request(method="TRACE", url=url, **kwargs) 168 | 169 | 170 | def query(url: str, **kwargs: Unpack[SessionRequestParams]): 171 | return request(method="QUERY", url=url, **kwargs) 172 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | API References 2 | ====== 3 | 4 | curl low levl APIs 5 | ------------------ 6 | 7 | Curl 8 | ~~~~~~ 9 | 10 | .. autoclass:: curl_cffi.Curl 11 | 12 | .. automethod:: __init__ 13 | .. automethod:: debug 14 | .. automethod:: setopt 15 | .. automethod:: getinfo 16 | .. automethod:: version 17 | .. automethod:: impersonate 18 | .. automethod:: perform 19 | .. automethod:: duphandle 20 | .. automethod:: reset 21 | .. automethod:: parse_cookie_headers 22 | .. automethod:: get_reason_phrase 23 | .. automethod:: parse_status_line 24 | .. automethod:: close 25 | .. automethod:: ws_recv 26 | .. automethod:: ws_send 27 | .. automethod:: ws_close 28 | 29 | AsyncCurl 30 | ~~~~~~ 31 | 32 | .. autoclass:: curl_cffi.AsyncCurl 33 | 34 | .. automethod:: __init__ 35 | .. automethod:: add_handle 36 | .. automethod:: remove_handle 37 | .. automethod:: set_result 38 | .. automethod:: set_exception 39 | .. automethod:: setopt 40 | .. automethod:: socket_action 41 | .. automethod:: process_data 42 | .. automethod:: close 43 | 44 | CurlMime 45 | ~~~~~~ 46 | 47 | .. autoclass:: curl_cffi.CurlMime 48 | 49 | .. automethod:: __init__ 50 | .. automethod:: addpart 51 | .. automethod:: from_list 52 | .. automethod:: attach 53 | .. automethod:: close 54 | 55 | Constants 56 | ~~~~~~~~~ 57 | 58 | Enum values used by ``setopt`` and ``getinfo`` can be accessed from ``CurlOpt`` and 59 | ``CurlInfo``. 60 | 61 | .. autoclass:: curl_cffi.CurlOpt 62 | .. autoclass:: curl_cffi.CurlInfo 63 | .. autoclass:: curl_cffi.CurlMOpt 64 | .. autoclass:: curl_cffi.CurlECode 65 | .. autoclass:: curl_cffi.CurlHttpVersion 66 | .. autoclass:: curl_cffi.CurlWsFlag 67 | .. autoclass:: curl_cffi.CurlSslVersion 68 | 69 | requests-like API 70 | ----------------- 71 | 72 | request method 73 | ~~~~~~~~~~~~~~ 74 | 75 | ``requests.get``, ``requests.post``, etc are just aliases of ``.request(METHOD, ...)`` 76 | 77 | .. autofunction:: curl_cffi.requests.request 78 | 79 | 80 | Sessions 81 | ~~~~~~~ 82 | 83 | .. autoclass:: curl_cffi.requests.Session 84 | 85 | .. automethod:: __init__ 86 | .. automethod:: request 87 | .. automethod:: stream 88 | .. automethod:: ws_connect 89 | 90 | 91 | .. autoclass:: curl_cffi.requests.AsyncSession 92 | 93 | .. automethod:: __init__ 94 | .. automethod:: request 95 | .. automethod:: stream 96 | .. automethod:: close 97 | .. automethod:: ws_connect 98 | 99 | Headers 100 | ~~~~~~~ 101 | 102 | .. autoclass:: curl_cffi.requests.Headers 103 | 104 | .. autoproperty:: encoding 105 | .. automethod:: raw 106 | .. automethod:: multi_items 107 | .. automethod:: get 108 | .. automethod:: get_list 109 | .. automethod:: update 110 | .. automethod:: __getitem__ 111 | .. automethod:: __setitem__ 112 | .. automethod:: __delitem__ 113 | 114 | Cookies 115 | ~~~~~~~ 116 | 117 | .. autoclass:: curl_cffi.requests.Cookies 118 | 119 | .. automethod:: set 120 | .. automethod:: get 121 | .. automethod:: delete 122 | .. automethod:: clear 123 | .. automethod:: update 124 | .. automethod:: __getitem__ 125 | .. automethod:: __setitem__ 126 | .. automethod:: __delitem__ 127 | 128 | Request, Response 129 | ~~~~~~ 130 | 131 | .. autoclass:: curl_cffi.requests.Request 132 | 133 | .. autoclass:: curl_cffi.requests.Response 134 | 135 | .. automethod:: raise_for_status 136 | .. automethod:: iter_lines 137 | .. automethod:: iter_content 138 | .. automethod:: json 139 | .. automethod:: close 140 | .. automethod:: aiter_lines 141 | .. automethod:: aiter_content 142 | .. automethod:: atext 143 | .. automethod:: acontent 144 | .. automethod:: aclose 145 | 146 | Asyncio 147 | ------- 148 | 149 | WebSocket 150 | --------- 151 | 152 | .. autoclass:: curl_cffi.requests.WebSocket 153 | 154 | .. automethod:: __init__ 155 | .. automethod:: connect 156 | .. automethod:: recv_fragment 157 | .. automethod:: recv 158 | .. automethod:: recv_str 159 | .. automethod:: recv_json 160 | .. automethod:: send 161 | .. automethod:: send_binary 162 | .. automethod:: send_bytes 163 | .. automethod:: send_str 164 | .. automethod:: send_json 165 | .. automethod:: ping 166 | .. automethod:: run_forever 167 | .. automethod:: close 168 | 169 | .. autoclass:: curl_cffi.requests.AsyncWebSocket 170 | 171 | .. automethod:: __init__ 172 | .. automethod:: recv_fragment 173 | .. automethod:: recv 174 | .. automethod:: recv_str 175 | .. automethod:: recv_json 176 | .. automethod:: send 177 | .. automethod:: send_binary 178 | .. automethod:: send_bytes 179 | .. automethod:: send_str 180 | .. automethod:: send_json 181 | .. automethod:: ping 182 | .. automethod:: close 183 | 184 | Exceptions and Warnings 185 | ----------------------- 186 | 187 | Exceptions 188 | ~~~~~~~~~~~~~~ 189 | 190 | We try to follow the `requests` exception hirearchy, however, some are missing, while 191 | some are added. 192 | 193 | If an exception is marked as "not used", please catch the base exception. 194 | 195 | 196 | .. autoclass:: curl_cffi.requests.exceptions.RequestException 197 | .. autoclass:: curl_cffi.requests.exceptions.CookieConflict 198 | .. autoclass:: curl_cffi.requests.exceptions.SessionClosed 199 | .. autoclass:: curl_cffi.requests.exceptions.ImpersonateError 200 | .. autoclass:: curl_cffi.requests.exceptions.InvalidJSONError 201 | .. autoclass:: curl_cffi.requests.exceptions.HTTPError 202 | .. autoclass:: curl_cffi.requests.exceptions.IncompleteRead 203 | .. autoclass:: curl_cffi.requests.exceptions.ConnectionError 204 | .. autoclass:: curl_cffi.requests.exceptions.DNSError 205 | .. autoclass:: curl_cffi.requests.exceptions.ProxyError 206 | .. autoclass:: curl_cffi.requests.exceptions.SSLError 207 | .. autoclass:: curl_cffi.requests.exceptions.CertificateVerifyError 208 | .. autoclass:: curl_cffi.requests.exceptions.Timeout 209 | .. autoclass:: curl_cffi.requests.exceptions.ConnectTimeout 210 | .. autoclass:: curl_cffi.requests.exceptions.ReadTimeout 211 | .. autoclass:: curl_cffi.requests.exceptions.URLRequired 212 | .. autoclass:: curl_cffi.requests.exceptions.TooManyRedirects 213 | .. autoclass:: curl_cffi.requests.exceptions.MissingSchema 214 | .. autoclass:: curl_cffi.requests.exceptions.InvalidSchema 215 | .. autoclass:: curl_cffi.requests.exceptions.InvalidURL 216 | .. autoclass:: curl_cffi.requests.exceptions.InvalidHeader 217 | .. autoclass:: curl_cffi.requests.exceptions.InvalidProxyURL 218 | .. autoclass:: curl_cffi.requests.exceptions.ChunkedEncodingError 219 | .. autoclass:: curl_cffi.requests.exceptions.ContentDecodingError 220 | .. autoclass:: curl_cffi.requests.exceptions.StreamConsumedError 221 | .. autoclass:: curl_cffi.requests.exceptions.RetryError 222 | .. autoclass:: curl_cffi.requests.exceptions.UnrewindableBodyError 223 | .. autoclass:: curl_cffi.requests.exceptions.InterfaceError 224 | 225 | Warnings 226 | ~~~~~~~~~~~~~~ 227 | 228 | .. autoclass:: curl_cffi.requests.exceptions.RequestsWarning 229 | .. autoclass:: curl_cffi.requests.exceptions.FileModeWarning 230 | .. autoclass:: curl_cffi.requests.exceptions.RequestsDependencyWarning 231 | -------------------------------------------------------------------------------- /docs/faq.rst: -------------------------------------------------------------------------------- 1 | FAQ 2 | ========================== 3 | 4 | What does the pro version offer? Is the open source project still maintained? 5 | ---------------------------------------------------------------------------- 6 | 7 | Yes, the open source project is maintained as before. 8 | 9 | In the `pro version `_, we provide: 10 | 11 | - weekly update of targets 12 | - profiles for mobile browsers and apps 13 | - some private detection fields 14 | - http/3 fingerprints and proxy support 15 | 16 | And, a better financial situation will help the open source version better maintained. 17 | 18 | Why does the JA3 fingerprints change for Chrome 110+ impersonation? 19 | ------ 20 | 21 | This is intended. 22 | 23 | Chrome introduces ``ClientHello`` permutation in version 110, which means the order of 24 | extensions will be random, thus JA3 fingerprints will be random. So, when comparing 25 | JA3 fingerprints of ``curl_cffi`` and a browser, they may differ. However, this does not 26 | mean that TLS fingerprints will not be a problem, ``ClientHello`` extension order is just 27 | one factor of how servers can tell automated requests from browsers. 28 | 29 | Roughly, this can be mitigated like: 30 | 31 | .. code-block:: 32 | 33 | ja3 = md5(list(extensions), ...other arguments) 34 | ja3n = md5(set(extensions), ...other arguments) 35 | 36 | See more from `this article `_ 37 | and `curl-impersonate notes `_. 38 | 39 | Can I bypass Cloudflare with this project? or any other specific site. 40 | ------ 41 | 42 | Short answer is: it depends. 43 | 44 | TLS and http2 fingerprints are just one of the many factors Cloudflare considers. Other 45 | factors include but are not limited to: IP quality, request rate, JS fingerprints, etc. 46 | 47 | There are different protection levels for website owners to choose. For the most basic 48 | ones, TLS fingerprints alone maybe enough, but for higher levels, you may need to find 49 | a better proxy IP provider and use browser automation tools like playwright. 50 | 51 | If you are in a hurry or just want the professionals to take care of the hard parts, 52 | you can consider the commercial solutions from our sponsors: 53 | 54 | - `Yescaptcha `_, captcha resolver and proxy service for bypassing Cloudflare. 55 | - `ScrapeNinja `_, Managed web scraping API. 56 | 57 | For details, see the `Sponsor` section on front page. 58 | 59 | 60 | I'm getting certs errors 61 | ------ 62 | 63 | The simplest way is to turn off cert verification by ``verify=False``: 64 | 65 | .. code-block:: python 66 | 67 | r = curl_cffi.get("https://example.com", verify=False) 68 | 69 | 70 | ErrCode: 77, Reason: error setting certificate verify locations 71 | ------ 72 | 73 | On Windows, if your Python environment or CA bundle path contains non-ASCII characters 74 | (e.g. accents), libcurl may fail to open the CA file when passed as a narrow ``char*``. 75 | ``curl_cffi`` now encodes file-path options (e.g. ``CAINFO``, ``PROXY_CAINFO``, 76 | ``SSLCERT``) using the system's preferred ANSI code page on Windows to ensure correct 77 | file access. This fixes most occurrences of error 77. 78 | 79 | How to use with fiddler/charles to intercept content 80 | ------ 81 | 82 | Fiddler and Charles uses man-in-the-middle self-signed certs to intercept TLS traffic, 83 | to use with them, simply set ``verify=False``. 84 | 85 | 86 | ErrCode: 92, Reason: 'HTTP/2 stream 0 was not closed cleanly: PROTOCOL_ERROR (err 1)' 87 | ------ 88 | 89 | This error(http/2 stream 0) has been reported many times ever since `curl_cffi` was 90 | published, but I still can not find a reproducible way to trigger it. Given that the 91 | majority users are behind proxies, the situation is even more difficult to deal with. 92 | 93 | I'm even not sure it's a bug introduced in libcurl, curl-impersonate or curl_cffi, or 94 | it's just a server error. Depending on your context, here are some general suggestions 95 | for you: 96 | 97 | - First, try removing the ``Content-Length`` header from you request. 98 | - Try to see if this error was caused by proxies, if so, use better proxies. 99 | - If it stops working after a while, maybe you're just being blocked by, such as, Akamai. 100 | - Force http/1.1 mode. Some websites' h2 implementation is simply broken. 101 | - See if the url works in your real browser. 102 | - Find a stable way to reproduce it, so we can finally fix, or at least bypass it. 103 | 104 | To force curl to use http 1.1 only. 105 | 106 | .. code-block:: python 107 | 108 | import curl_cffi 109 | 110 | r = curl_cffi.get("https://postman-echo.com", http_version=curl_cffi.CurlHttpVersion.V1_1) 111 | 112 | Related issues: 113 | 114 | - `#19 `_, 115 | - `#42 `_, 116 | - `#79 `_, 117 | - `#165 `_, 118 | 119 | 120 | Packaging with PyInstaller 121 | ------ 122 | 123 | If you encountered any issue with PyInstaller, here are a list of options provided by the 124 | community: 125 | 126 | Add the ``--hidden-import`` option. 127 | 128 | .. code-block:: 129 | 130 | pyinstaller -F .\example.py --hidden-import=_cffi_backend --collect-all curl_cffi 131 | 132 | Add other paths: 133 | 134 | .. code-block:: 135 | 136 | pyinstaller --noconfirm --onefile --console \ 137 | --paths "C:/Users/Administrator/AppData/Local/Programs/Python/Python39" \ 138 | --add-data "C:/Users/Administrator/AppData/Local/Programs/Python/Python39/Lib/site-packages/curl_cffi.libs/libcurl-cbb416caa1dd01638554eab3f38d682d.dll;." \ 139 | --collect-data "curl_cffi" \ 140 | "C:/Users/Administrator/Desktop/test_script.py" 141 | 142 | 143 | See also: 144 | 145 | - `#5 `_ 146 | - `#48 `_ 147 | 148 | 149 | How to change the order of headers? 150 | ------ 151 | 152 | By default, setting ``impersonate`` parameter will bring the corresponding headers. If 153 | you want to change the order or use your own headers, you need to turn off that and bring 154 | your own headers. 155 | 156 | .. code-block:: 157 | 158 | requests.get(url, impersonate="chrome", default_headers=False, headers=...) 159 | 160 | 161 | How to deal with encoding/decoding errors? 162 | ------------------------------------------ 163 | 164 | Use ``chardet`` or ``cchardet`` 165 | 166 | .. code-block:: 167 | 168 | >>> import curl_cffi 169 | >>> r = curl_cffi.get("https://example.com/messy_codec.html") 170 | >>> import chardet 171 | >>> chardet.detect(r.content) 172 | {'encoding': 'GB2312', 'confidence': 0.99, 'language': 'Chinese'} 173 | 174 | Or use regex or lxml to parse the meta header: 175 | 176 | .. code-block:: 177 | 178 | 179 | -------------------------------------------------------------------------------- /curl_cffi/requests/exceptions.py: -------------------------------------------------------------------------------- 1 | # Apache 2.0 License 2 | # Vendored from https://github.com/psf/requests/blob/main/src/requests/exceptions.py 3 | # With our own addtions 4 | 5 | import json 6 | from typing import Literal, Union 7 | 8 | from ..const import CurlECode 9 | from ..curl import CurlError 10 | 11 | 12 | # Note IOError is an alias of OSError in Python 3.x 13 | class RequestException(CurlError, OSError): 14 | """Base exception for curl_cffi.requests package""" 15 | 16 | def __init__( 17 | self, 18 | msg, 19 | code: Union[CurlECode, Literal[0]] = 0, 20 | response=None, 21 | *args, 22 | **kwargs, 23 | ): 24 | super().__init__(msg, code, *args, **kwargs) 25 | self.response = response 26 | 27 | 28 | class CookieConflict(RequestException): 29 | """Same cookie exists for different domains.""" 30 | 31 | 32 | class SessionClosed(RequestException): 33 | """The session has already been closed.""" 34 | 35 | 36 | class ImpersonateError(RequestException): 37 | """The impersonate config was wrong or impersonate failed.""" 38 | 39 | 40 | # not used 41 | class InvalidJSONError(RequestException): 42 | """A JSON error occurred. not used""" 43 | 44 | 45 | # not used 46 | class JSONDecodeError(InvalidJSONError, json.JSONDecodeError): 47 | """Couldn't decode the text into json. not used""" 48 | 49 | 50 | class HTTPError(RequestException): 51 | """An HTTP error occurred.""" 52 | 53 | 54 | class IncompleteRead(HTTPError): 55 | """Incomplete read of content""" 56 | 57 | 58 | class ConnectionError(RequestException): 59 | """A Connection error occurred.""" 60 | 61 | 62 | class DNSError(ConnectionError): 63 | """Could not resolve""" 64 | 65 | 66 | class ProxyError(RequestException): 67 | """A proxy error occurred.""" 68 | 69 | 70 | class SSLError(ConnectionError): 71 | """An SSL error occurred.""" 72 | 73 | 74 | class CertificateVerifyError(SSLError): 75 | """Raised when certificate validated has failed""" 76 | 77 | 78 | class Timeout(RequestException): 79 | """The request timed out.""" 80 | 81 | 82 | # not used 83 | class ConnectTimeout(ConnectionError, Timeout): 84 | """The request timed out while trying to connect to the remote server. 85 | 86 | Requests that produced this error are safe to retry. 87 | 88 | not used 89 | """ 90 | 91 | 92 | # not used 93 | class ReadTimeout(Timeout): 94 | """The server did not send any data in the allotted amount of time. not used""" 95 | 96 | 97 | # not used 98 | class URLRequired(RequestException): 99 | """A valid URL is required to make a request. not used""" 100 | 101 | 102 | class TooManyRedirects(RequestException): 103 | """Too many redirects.""" 104 | 105 | 106 | # not used 107 | class MissingSchema(RequestException, ValueError): 108 | """The URL scheme (e.g. http or https) is missing. not used""" 109 | 110 | 111 | class InvalidSchema(RequestException, ValueError): 112 | """The URL scheme provided is either invalid or unsupported. not used""" 113 | 114 | 115 | class InvalidURL(RequestException, ValueError): 116 | """The URL provided was somehow invalid.""" 117 | 118 | 119 | # not used 120 | class InvalidHeader(RequestException, ValueError): 121 | """The header value provided was somehow invalid. not used""" 122 | 123 | 124 | # not used 125 | class InvalidProxyURL(InvalidURL): 126 | """The proxy URL provided is invalid. not used""" 127 | 128 | 129 | # not used 130 | class ChunkedEncodingError(RequestException): 131 | """The server declared chunked encoding but sent an invalid chunk. not used""" 132 | 133 | 134 | # not used 135 | class ContentDecodingError(RequestException): 136 | """Failed to decode response content. not used""" 137 | 138 | 139 | # not used 140 | class StreamConsumedError(RequestException, TypeError): 141 | """The content for this response was already consumed. not used""" 142 | 143 | 144 | # does not support 145 | class RetryError(RequestException): 146 | """Custom retries logic failed. not used""" 147 | 148 | 149 | # not used 150 | class UnrewindableBodyError(RequestException): 151 | """Requests encountered an error when trying to rewind a body. not used""" 152 | 153 | 154 | class InterfaceError(RequestException): 155 | """A specified outgoing interface could not be used.""" 156 | 157 | 158 | # Warnings 159 | 160 | 161 | # TODO: use this warning as a base 162 | class RequestsWarning(Warning): 163 | """Base warning for Requests. not used""" 164 | 165 | 166 | # not used 167 | class FileModeWarning(RequestsWarning, DeprecationWarning): 168 | """A file was opened in text mode, but Requests determined its binary length. 169 | not used""" 170 | 171 | 172 | # not used 173 | class RequestsDependencyWarning(RequestsWarning): 174 | """An imported dependency doesn't match the expected version range.""" 175 | 176 | 177 | CODE2ERROR = { 178 | 0: RequestException, 179 | CurlECode.UNSUPPORTED_PROTOCOL: InvalidSchema, 180 | CurlECode.URL_MALFORMAT: InvalidURL, 181 | CurlECode.COULDNT_RESOLVE_PROXY: ProxyError, 182 | CurlECode.COULDNT_RESOLVE_HOST: DNSError, 183 | CurlECode.COULDNT_CONNECT: ConnectionError, 184 | CurlECode.WEIRD_SERVER_REPLY: ConnectionError, 185 | CurlECode.REMOTE_ACCESS_DENIED: ConnectionError, 186 | CurlECode.HTTP2: HTTPError, 187 | CurlECode.HTTP_RETURNED_ERROR: HTTPError, 188 | CurlECode.WRITE_ERROR: RequestException, 189 | CurlECode.READ_ERROR: RequestException, 190 | CurlECode.OUT_OF_MEMORY: RequestException, 191 | CurlECode.OPERATION_TIMEDOUT: Timeout, 192 | CurlECode.SSL_CONNECT_ERROR: SSLError, 193 | CurlECode.INTERFACE_FAILED: InterfaceError, 194 | CurlECode.TOO_MANY_REDIRECTS: TooManyRedirects, 195 | CurlECode.UNKNOWN_OPTION: RequestException, 196 | CurlECode.SETOPT_OPTION_SYNTAX: RequestException, 197 | CurlECode.GOT_NOTHING: ConnectionError, 198 | CurlECode.SSL_ENGINE_NOTFOUND: SSLError, 199 | CurlECode.SSL_ENGINE_SETFAILED: SSLError, 200 | CurlECode.SEND_ERROR: ConnectionError, 201 | CurlECode.RECV_ERROR: ConnectionError, 202 | CurlECode.SSL_CERTPROBLEM: SSLError, 203 | CurlECode.SSL_CIPHER: SSLError, 204 | CurlECode.PEER_FAILED_VERIFICATION: CertificateVerifyError, 205 | CurlECode.BAD_CONTENT_ENCODING: HTTPError, 206 | CurlECode.SSL_ENGINE_INITFAILED: SSLError, 207 | CurlECode.SSL_CACERT_BADFILE: SSLError, 208 | CurlECode.SSL_CRL_BADFILE: SSLError, 209 | CurlECode.SSL_ISSUER_ERROR: SSLError, 210 | CurlECode.SSL_PINNEDPUBKEYNOTMATCH: SSLError, 211 | CurlECode.SSL_INVALIDCERTSTATUS: SSLError, 212 | CurlECode.HTTP2_STREAM: HTTPError, 213 | CurlECode.HTTP3: HTTPError, 214 | CurlECode.QUIC_CONNECT_ERROR: ConnectionError, 215 | CurlECode.PROXY: ProxyError, 216 | CurlECode.SSL_CLIENTCERT: SSLError, 217 | CurlECode.ECH_REQUIRED: SSLError, 218 | CurlECode.PARTIAL_FILE: IncompleteRead, 219 | } 220 | 221 | 222 | # credits: https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/networking/_curlcffi.py#L241 223 | # Unlicense 224 | def code2error(code: Union[CurlECode, Literal[0]], msg: str): 225 | if code == CurlECode.RECV_ERROR and "CONNECT" in msg: 226 | return ProxyError 227 | return CODE2ERROR.get(code, RequestException) 228 | -------------------------------------------------------------------------------- /docs/impersonate/customize.rst: -------------------------------------------------------------------------------- 1 | How to use my own fingerprints? e.g. okhttp 2 | ------ 3 | 4 | Use ``ja3=...``, ``akamai=...`` and ``extra_fp=...``. 5 | 6 | You can retrieve the JA3 and Akamai strings using tools like WireShark or from TLS fingerprinting sites. 7 | 8 | .. code-block:: python 9 | 10 | # OKHTTP impersonatation examples 11 | # credits: https://github.com/bogdanfinn/tls-client/blob/master/profiles/contributed_custom_profiles.go 12 | 13 | url = "https://tls.browserleaks.com/json" 14 | 15 | okhttp4_android10_ja3 = ",".join( 16 | [ 17 | "771", 18 | "4865-4866-4867-49195-49196-52393-49199-49200-52392-49171-49172-156-157-47-53", 19 | "0-23-65281-10-11-35-16-5-13-51-45-43-21", 20 | "29-23-24", 21 | "0", 22 | ] 23 | ) 24 | 25 | okhttp4_android10_akamai = "4:16777216|16711681|0|m,p,a,s" 26 | 27 | extra_fp = { 28 | "tls_signature_algorithms": [ 29 | "ecdsa_secp256r1_sha256", 30 | "rsa_pss_rsae_sha256", 31 | "rsa_pkcs1_sha256", 32 | "ecdsa_secp384r1_sha384", 33 | "rsa_pss_rsae_sha384", 34 | "rsa_pkcs1_sha384", 35 | "rsa_pss_rsae_sha512", 36 | "rsa_pkcs1_sha512", 37 | "rsa_pkcs1_sha1", 38 | ] 39 | # other options: 40 | # tls_min_version: int = CurlSslVersion.TLSv1_2 41 | # tls_grease: bool = False 42 | # tls_permute_extensions: bool = False 43 | # tls_cert_compression: Literal["zlib", "brotli"] = "brotli" 44 | # tls_signature_algorithms: Optional[List[str]] = None 45 | # http2_stream_weight: int = 256 46 | # http2_stream_exclusive: int = 1 47 | 48 | # See requests/impersonate.py and tests/unittest/test_impersonate.py for more examples 49 | } 50 | 51 | 52 | r = curl_cffi.get( 53 | url, ja3=okhttp4_android10_ja3, akamai=okhttp4_android10_akamai, extra_fp=extra_fp 54 | ) 55 | print(r.json()) 56 | 57 | 58 | JA3 and Akamai String Format 59 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 60 | 61 | A JA3 string is a simple, comma-separated representation of the key fields in a TLS ClientHello. It consists of five parts: 62 | 63 | - SSL/TLS Version, The numeric version the client requests (e.g. 771 for TLS 1.2). 64 | - Cipher Suites, A hyphen-separated list of all cipher suite IDs offered by the client (e.g. 4865-4866-4867-49195-49196). 65 | - Extension IDs, A hyphen-separated list of all TLS extension numbers the client includes (e.g. 0-11-10-35-16-5). 66 | - Supported Groups (aka “Elliptic Curves”), A hyphen-separated list of curve IDs the client supports for ECDHE (e.g. 29-23-24). 67 | - EC Point Formats, A hyphen-separated list of the point‐format IDs (almost always just 0 for “uncompressed”) (e.g. 0). 68 | 69 | They’re concatenated in that exact order, with commas between fields. For example: 70 | 71 | .. code-block:: 72 | 73 | 771,4865-4866-4867-49195-49196,0-11-10-35-16-5,29-23-24,0 74 | 75 | Note that Chrome permutes the extension order on each request, so there is a new format called JA3N, which uses sorted extension_id list. 76 | 77 | The Akamai HTTP/2 fingerprint string encodes four client‐controlled protocol parameters, joined by the pipe character (|): 78 | 79 | - SETTINGS, A semicolon‐separated list of ID:value pairs from the client’s initial SETTINGS frame. Each ID is a standard HTTP/2 setting identifier (e.g. 1 for HEADER_TABLE_SIZE, 4 for INITIAL_WINDOW_SIZE), and value is the client’s chosen value for that setting 80 | - WINDOW_UPDATE, A single integer: the value the client sends in its first WINDOW_UPDATE frame (or 0 if none was sent) 81 | - PRIORITY, Zero or more priority‐frame tuples, each formatted as ``StreamID:ExclusiveBit:DependentStreamID:Weight``. Multiple tuples are comma-separated. This captures any PRIORITY frames the client issues before sending headers 82 | - Pseudo-Header Order, The sequence in which the client sends HTTP/2 pseudo-headers in its request HEADERS frame, encoded as comma-separated single-letter codes: 83 | 84 | 85 | .. code-block:: 86 | m = :method 87 | s = :scheme 88 | p = :path 89 | a = :authority 90 | 91 | Putting it all together, an example fingerprint might look like: 92 | 93 | .. code-block:: 94 | 95 | 1:65536;4:131072;5:16384|12517377|3:0:0:201|m,p,a,s 96 | 97 | where: 98 | 99 | SETTINGS = 1:65536;4:131072;5:16384 100 | WINDOW_UPDATE = 12517377 101 | PRIORITY = 3:0:0:201 102 | Pseudo-Header Order = m,p,a,s 103 | 104 | Although JA3 and Akamai fingerprint string already captures many of the aspects of a Hello Packet, there are still some fields are not covered and can be used to detect you. 105 | This is when the ``extra_fp`` option comes in, each field of this dict is pretty easy to understand. You should first set the ja3 and akamai string, then check if you have the 106 | identical fingerprint like your target. If not, use the ``extra_fp`` to further refine your impersonation. 107 | 108 | 109 | Using CURLOPTs 110 | ~~~~~~~~~~~~~~ 111 | 112 | The other way is to use the ``curlopt`` s to specify exactly which options you want to change. 113 | 114 | To modify them, use ``curl.setopt(CurlOpt, value)``, for example: 115 | 116 | .. code-block:: python 117 | 118 | import curl_cffi 119 | from curl_cffi import Curl, CurlOpt 120 | 121 | c = Curl() 122 | c.setopt(CurlOpt.HTTP2_PSEUDO_HEADERS_ORDER, "masp") 123 | 124 | # or 125 | curl_cffi.get(url, curl_options={CurlOpt.HTTP2_PSEUDO_HEADERS_ORDER, "masp"}) 126 | 127 | Here are a list of options: 128 | 129 | For TLS/JA3 fingerprints: 130 | 131 | * https://curl.se/libcurl/c/CURLOPT_SSL_CIPHER_LIST.html 132 | 133 | and non-standard TLS options created for this project: 134 | 135 | * ``CURLOPT_SSL_ENABLE_ALPS`` 136 | * ``CURLOPT_SSL_SIG_HASH_ALGS`` 137 | * ``CURLOPT_SSL_CERT_COMPRESSION`` 138 | * ``CURLOPT_SSL_ENABLE_TICKET`` 139 | * ``CURLOPT_SSL_PERMUTE_EXTENSIONS`` 140 | 141 | For Akamai http2 fingerprints, you can fully customize the 3 parts: 142 | 143 | * ``CURLOPT_HTTP2_PSEUDO_HEADERS_ORDER``, sets http2 pseudo header order, for example: ``masp`` (non-standard HTTP/2 options created for this project). 144 | * ``CURLOPT_HTTP2_SETTINGS`` sets the settings frame values, for example ``1:65536;3:1000;4:6291456;6:262144`` (non-standard HTTP/2 options created for this project). 145 | * ``CURLOPT_HTTP2_WINDOW_UPDATE`` sets initial window update value for http2, for example ``15663105`` (non-standard HTTP/2 options created for this project). 146 | 147 | For a complete list of options and explanation, see the `curl-impersoante README`_. 148 | 149 | .. _curl-impersonate README: https://github.com/lexiforest/curl-impersonate?tab=readme-ov-file#libcurl-impersonate 150 | 151 | 152 | How to toggle firefox-specific extensions? 153 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 154 | 155 | There are a few special extensions from firefox that you should add extra options by ``extra_fp``: 156 | 157 | Extension 34: delegated credentials 158 | 159 | .. code-block:: python 160 | 161 | extra_fp = { 162 | "tls_delegated_credential": "ecdsa_secp256r1_sha256:ecdsa_secp384r1_sha384:ecdsa_secp521r1_sha512:ecdsa_sha1" 163 | } 164 | 165 | # Note that the ja3 string also includes extensiion: 34 166 | ja3 = "771,4865-4867-4866-49195-49199-52393-52392-49196-49200-49162-49161-49171-49172-156-157-47-53,0-23-65281-10-11-35-16-5-34-18-51-43-13-45-28-27-65037,4588-29-23-24-25-256-257,0" 167 | 168 | r = curl_cffi.get(url, ja3=ja3, extra_fp=extra_fp) 169 | 170 | Extension 28: record size limit 171 | 172 | .. code-block:: python 173 | 174 | extra_fp = { 175 | "tls_record_size_limit": 4001 176 | } 177 | 178 | # Note that the ja3 string also includes extensiion: 28 179 | ja3 = "771,4865-4867-4866-49195-49199-52393-52392-49196-49200-49162-49161-49171-49172-156-157-47-53,0-23-65281-10-11-35-16-5-34-18-51-43-13-45-28-27-65037,4588-29-23-24-25-256-257,0" 180 | 181 | r = curl_cffi.get(url, ja3=ja3, extra_fp=extra_fp) 182 | 183 | 184 | -------------------------------------------------------------------------------- /tests/unittest/test_impersonate.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from curl_cffi import requests 4 | from curl_cffi.const import CurlHttpVersion, CurlSslVersion 5 | 6 | 7 | def test_impersonate_with_version(server): 8 | # the test server does not understand http/2 9 | r = requests.get( 10 | str(server.url), impersonate="chrome120", http_version=CurlHttpVersion.V1_1 11 | ) 12 | assert r.status_code == 200 13 | r = requests.get( 14 | str(server.url), impersonate="safari17_0", http_version=CurlHttpVersion.V1_1 15 | ) 16 | assert r.status_code == 200 17 | 18 | 19 | def test_impersonate_without_version(server): 20 | r = requests.get( 21 | str(server.url), impersonate="chrome", http_version=CurlHttpVersion.V1_1 22 | ) 23 | assert r.status_code == 200 24 | r = requests.get( 25 | str(server.url), impersonate="safari_ios", http_version=CurlHttpVersion.V1_1 26 | ) 27 | assert r.status_code == 200 28 | 29 | 30 | def test_impersonate_non_exist(server): 31 | with pytest.raises(requests.RequestsError, match="Impersonating"): 32 | requests.get(str(server.url), impersonate="edge2131") 33 | with pytest.raises(requests.RequestsError, match="Impersonating"): 34 | requests.get(str(server.url), impersonate="chrome2952") 35 | 36 | 37 | # TODO: implement local ja3/akamai verification server with th1. 38 | 39 | 40 | @pytest.mark.skip(reason="warning is used") 41 | def test_costomized_no_impersonate_coexist(server): 42 | with pytest.raises(requests.RequestsError): 43 | requests.get(str(server.url), impersonate="chrome", ja3=",,,,") 44 | with pytest.raises(requests.RequestsError): 45 | requests.get(str(server.url), impersonate="chrome", akamai="|||") 46 | 47 | 48 | @pytest.mark.skip(reason="website is down") 49 | def test_customized_ja3_chrome126(): 50 | url = "https://tls.browserleaks.com/json" 51 | ja3 = ( 52 | "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53," 53 | "0-65037-27-51-13-43-5-18-17513-65281-23-10-45-35-11-16,25497-29-23-24,0" 54 | ) 55 | r = requests.get(url, ja3=ja3).json() 56 | assert r["ja3_text"] == ja3 57 | 58 | 59 | @pytest.mark.skip(reason="not working") 60 | def test_customized_ja3_tls_version(): 61 | url = "https://tls.browserleaks.com/json" 62 | ja3 = ( 63 | "770,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53," 64 | "0-65037-27-51-13-43-5-18-17513-65281-23-10-45-35-11-16,25497-29-23-24,0" 65 | ) 66 | r = requests.get(url, ja3=ja3).json() 67 | tls_version, _, _, _, _ = r["ja3_text"].split(",") 68 | assert tls_version == "770" 69 | 70 | 71 | @pytest.mark.skip(reason="website is down") 72 | def test_customized_ja3_ciphers(): 73 | url = "https://tls.browserleaks.com/json" 74 | ja3 = ( 75 | "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171," 76 | "0-65037-27-51-13-43-5-18-17513-65281-23-10-45-35-11-16,25497-29-23-24,0" 77 | ) 78 | r = requests.get(url, ja3=ja3).json() 79 | _, ciphers, _, _, _ = r["ja3_text"].split(",") 80 | assert ciphers == "4865-4866-4867-49195-49199-49196-49200-52393-52392-49171" 81 | 82 | 83 | # TODO: change to parameterized test 84 | @pytest.mark.skip(reason="website is down") 85 | def test_customized_ja3_extensions(): 86 | url = "https://tls.browserleaks.com/json" 87 | ja3 = ( 88 | "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53," 89 | "65037-65281-0-11-23-5-18-27-16-17513-10-35-43-45-13-51,25497-29-23-24,0" 90 | ) 91 | r = requests.get(url, ja3=ja3).json() 92 | _, _, extensions, _, _ = r["ja3_text"].split(",") 93 | assert extensions == "65037-65281-0-11-23-5-18-27-16-17513-10-35-43-45-13-51" 94 | 95 | ja3 = ( 96 | "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53," 97 | "65281-0-11-23-5-18-27-16-17513-10-35-43-45-13-51,25497-29-23-24,0" 98 | ) 99 | r = requests.get(url, ja3=ja3).json() 100 | _, _, extensions, _, _ = r["ja3_text"].split(",") 101 | assert extensions == "65281-0-11-23-5-18-27-16-17513-10-35-43-45-13-51" 102 | 103 | ja3 = ( 104 | "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53," 105 | "65281-0-11-23-27-16-17513-10-35-43-45-13-51,25497-29-23-24,0" 106 | ) 107 | r = requests.get(url, ja3=ja3).json() 108 | _, _, extensions, _, _ = r["ja3_text"].split(",") 109 | assert extensions == "65281-0-11-23-27-16-17513-10-35-43-45-13-51" 110 | 111 | # removed enable session_ticket() 112 | ja3 = ( 113 | "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53," 114 | "65281-0-11-23-5-18-27-16-17513-10-43-45-13-51,25497-29-23-24,0" 115 | ) 116 | r = requests.get(url, ja3=ja3).json() 117 | _, _, extensions, _, _ = r["ja3_text"].split(",") 118 | assert extensions == "65281-0-11-23-5-18-27-16-17513-10-43-45-13-51" 119 | 120 | # new alps code point 121 | ja3 = ( 122 | "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53," 123 | "0-5-10-11-13-16-18-23-27-35-43-45-51-17613-65037-65281,4588-29-23-24,0" 124 | ) 125 | r = requests.get(url, ja3=ja3).json() 126 | _, _, extensions, _, _ = r["ja3_text"].split(",") 127 | assert extensions == "0-5-10-11-13-16-18-23-27-35-43-45-51-17613-65037-65281" 128 | 129 | 130 | @pytest.mark.skip(reason="website is down") 131 | def test_customized_ja3_curves(): 132 | url = "https://tls.browserleaks.com/json" 133 | ja3 = ( 134 | "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53," 135 | "0-65037-27-51-13-43-5-18-17513-65281-23-10-45-35-11-16,25497-24-23-29,0" 136 | ) 137 | r = requests.get(url, ja3=ja3).json() 138 | _, _, _, curves, _ = r["ja3_text"].split(",") 139 | assert curves == "25497-24-23-29" 140 | 141 | 142 | @pytest.mark.skip(reason="website is down") 143 | def test_customized_akamai_chrome126(): 144 | url = "https://tls.browserleaks.com/json" 145 | akamai = "1:65536;2:0;4:6291456;6:262144|15663105|0|m,a,s,p" 146 | r = requests.get(url, akamai=akamai).json() 147 | assert r["akamai_text"] == akamai 148 | 149 | 150 | @pytest.mark.skip(reason="website is down") 151 | def test_customized_akamai_safari(): 152 | url = "https://tls.browserleaks.com/json" 153 | akamai = "2:0;4:4194304;3:100|10485760|0|m,s,p,a" 154 | r = requests.get(url, akamai=akamai).json() 155 | assert r["akamai_text"] == akamai 156 | 157 | # test_tls_peet_ws_settings 158 | r = requests.get(url, akamai=akamai.replace(";", ",")).json() 159 | assert r["akamai_text"] == akamai 160 | 161 | 162 | @pytest.mark.skip(reason="Unstable API") 163 | def test_customized_extra_fp_sig_hash_algs(): 164 | url = "https://tls.peet.ws/api/all" 165 | safari_algs = [ 166 | "ecdsa_secp256r1_sha256", 167 | "rsa_pss_rsae_sha256", 168 | "rsa_pkcs1_sha256", 169 | "ecdsa_secp384r1_sha384", 170 | "ecdsa_sha1", 171 | "rsa_pss_rsae_sha384", 172 | "rsa_pss_rsae_sha384", 173 | "rsa_pkcs1_sha384", 174 | "rsa_pss_rsae_sha512", 175 | "rsa_pkcs1_sha512", 176 | "rsa_pkcs1_sha1", 177 | ] 178 | fp = requests.ExtraFingerprints(tls_signature_algorithms=safari_algs) 179 | r = requests.get(url, extra_fp=fp).json() 180 | result_algs = [] 181 | for ex in r["tls"]["extensions"]: 182 | if ex["name"] == "signature_algorithms (13)": 183 | result_algs = ex["signature_algorithms"] 184 | assert safari_algs == result_algs 185 | 186 | 187 | @pytest.mark.skip(reason="Unstable API") 188 | def test_customized_extra_fp_tls_min_version(): 189 | url = "https://tls.peet.ws/api/all" 190 | safari_min_version = CurlSslVersion.TLSv1_0 191 | fp = requests.ExtraFingerprints(tls_min_version=safari_min_version) 192 | r = requests.get(url, extra_fp=fp).json() 193 | for ex in r["tls"]["extensions"]: 194 | if ex["name"] == "supported_versions (43)": 195 | # TLS 1.0 1.1, 1.2, 1.3 196 | assert len(ex["versions"]) >= 4 197 | 198 | 199 | @pytest.mark.skip(reason="Unstable API") 200 | def test_customized_extra_fp_grease(): 201 | url = "https://tls.peet.ws/api/all" 202 | fp = requests.ExtraFingerprints(tls_grease=True) 203 | r = requests.get(url, extra_fp=fp).json() 204 | assert "TLS_GREASE" in r["tls"]["ciphers"][0] 205 | 206 | 207 | @pytest.mark.skip(reason="website is down") 208 | def test_customized_extra_fp_permute(): 209 | url = "https://tls.browserleaks.com/json" 210 | ja3 = ( 211 | "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53," 212 | "65037-65281-0-11-23-5-18-27-16-17513-10-35-43-45-13-51,25497-29-23-24,0" 213 | ) 214 | 215 | r = requests.get(url, ja3=ja3).json() 216 | _, _, extensions, _, _ = r["ja3_text"].split(",") 217 | assert extensions == "65037-65281-0-11-23-5-18-27-16-17513-10-35-43-45-13-51" 218 | 219 | r = requests.get( 220 | url, ja3=ja3, extra_fp=requests.ExtraFingerprints(tls_permute_extensions=True) 221 | ).json() 222 | _, _, extensions, _, _ = r["ja3_text"].split(",") 223 | assert extensions != "65037-65281-0-11-23-5-18-27-16-17513-10-35-43-45-13-51" 224 | 225 | 226 | @pytest.mark.skip(reason="Unstable API") 227 | def test_customized_extra_fp_cert_compression(): 228 | url = "https://tls.peet.ws/api/all" 229 | fp = requests.ExtraFingerprints(tls_cert_compression="zlib") 230 | r = requests.get(url, extra_fp=fp).json() 231 | result_algs = [] 232 | for ex in r["tls"]["extensions"]: 233 | if ex["name"] == "compress_certificate (27)": 234 | result_algs = ex["algorithms"] 235 | assert result_algs[0] == "zlib (1)" 236 | 237 | 238 | @pytest.mark.skip(reason="Unstable API") 239 | def test_customized_extra_fp_stream_weight(): 240 | url = "https://tls.peet.ws/api/all" 241 | fp = requests.ExtraFingerprints(http2_stream_weight=64) 242 | r = requests.get(url, extra_fp=fp).json() 243 | assert r["http2"]["sent_frames"][2]["priority"]["weight"] == 64 244 | 245 | 246 | @pytest.mark.skip(reason="Unstable API") 247 | def test_customized_extra_fp_stream_exclusive(): 248 | url = "https://tls.peet.ws/api/all" 249 | fp = requests.ExtraFingerprints(http2_stream_exclusive=0) 250 | r = requests.get(url, extra_fp=fp).json() 251 | assert r["http2"]["sent_frames"][2]["priority"]["exclusive"] == 0 252 | -------------------------------------------------------------------------------- /tests/unittest/test_curl.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import json 3 | from io import BytesIO 4 | from typing import cast 5 | 6 | import pytest 7 | 8 | import curl_cffi 9 | from curl_cffi import Curl, CurlError, CurlInfo, CurlOpt 10 | 11 | ####################################################################################### 12 | # testing setopt 13 | ####################################################################################### 14 | 15 | 16 | def test_get(server): 17 | c = Curl() 18 | c.setopt(CurlOpt.URL, str(server.url).encode()) 19 | c.perform() 20 | 21 | 22 | def test_post(server): 23 | c = Curl() 24 | url = str(server.url.copy_with(path="/echo_body")) 25 | c.setopt(CurlOpt.URL, url.encode()) 26 | c.setopt(CurlOpt.POST, 1) 27 | c.setopt(CurlOpt.POSTFIELDS, b"foo=bar") 28 | buffer = BytesIO() 29 | c.setopt(CurlOpt.WRITEDATA, buffer) 30 | c.perform() 31 | assert buffer.getvalue() == b"foo=bar" 32 | 33 | 34 | def test_put(server): 35 | c = Curl() 36 | c.setopt(CurlOpt.URL, str(server.url).encode()) 37 | c.setopt(CurlOpt.CUSTOMREQUEST, b"PUT") 38 | c.perform() 39 | 40 | 41 | def test_delete(server): 42 | c = Curl() 43 | c.setopt(CurlOpt.URL, str(server.url).encode()) 44 | c.setopt(CurlOpt.CUSTOMREQUEST, b"DELETE") 45 | c.perform() 46 | 47 | 48 | def test_post_data_with_size(server): 49 | c = Curl() 50 | url = str(server.url.copy_with(path="/echo_body")) 51 | c.setopt(CurlOpt.URL, url.encode()) 52 | c.setopt(CurlOpt.CUSTOMREQUEST, b"POST") 53 | c.setopt(CurlOpt.POSTFIELDS, b"\0" * 7) 54 | c.setopt(CurlOpt.POSTFIELDSIZE, 7) 55 | buffer = BytesIO() 56 | c.setopt(CurlOpt.WRITEDATA, buffer) 57 | c.perform() 58 | assert buffer.getvalue() == b"\0" * 7 59 | 60 | 61 | def test_headers(server): 62 | c = Curl() 63 | url = str(server.url.copy_with(path="/echo_headers")) 64 | c.setopt(CurlOpt.URL, url.encode()) 65 | c.setopt(CurlOpt.HTTPHEADER, [b"Foo: bar"]) 66 | buffer = BytesIO() 67 | c.setopt(CurlOpt.WRITEDATA, buffer) 68 | c.perform() 69 | headers = json.loads(buffer.getvalue().decode()) 70 | assert headers["Foo"][0] == "bar" 71 | 72 | # https://github.com/lexiforest/curl_cffi/issues/16 73 | c.setopt(CurlOpt.HTTPHEADER, [b"Foo: baz"]) 74 | buffer = BytesIO() 75 | c.setopt(CurlOpt.WRITEDATA, buffer) 76 | c.perform() 77 | headers = json.loads(buffer.getvalue().decode()) 78 | assert headers["Foo"][0] == "baz" 79 | 80 | 81 | def test_proxy_headers(server): 82 | # XXX: only tests that proxy header is not present for target server, should add 83 | # tests that verifies proxy headers are sent to proxy server. 84 | c = Curl() 85 | url = str(server.url.copy_with(path="/echo_headers")) 86 | c.setopt(CurlOpt.URL, url.encode()) 87 | c.setopt(CurlOpt.PROXYHEADER, [b"Foo: bar"]) 88 | buffer = BytesIO() 89 | c.setopt(CurlOpt.WRITEDATA, buffer) 90 | c.perform() 91 | headers = json.loads(buffer.getvalue().decode()) 92 | assert "Foo" not in headers 93 | 94 | # https://github.com/lexiforest/curl_cffi/issues/16 95 | c.setopt(CurlOpt.PROXYHEADER, [b"Foo: baz"]) 96 | buffer = BytesIO() 97 | c.setopt(CurlOpt.WRITEDATA, buffer) 98 | c.perform() 99 | headers = json.loads(buffer.getvalue().decode()) 100 | assert "Foo" not in headers 101 | 102 | 103 | def test_write_function_memory_leak(server): 104 | c = Curl() 105 | for _ in range(10): 106 | url = str(server.url.copy_with(path="/echo_headers")) 107 | c.setopt(CurlOpt.URL, url.encode()) 108 | c.setopt(CurlOpt.HTTPHEADER, [b"Foo: bar"]) 109 | buffer = BytesIO() 110 | c.setopt(CurlOpt.WRITEDATA, buffer) 111 | c.perform() 112 | assert c._write_handle is None 113 | 114 | 115 | def test_write_function(server): 116 | c = Curl() 117 | url = str(server.url.copy_with(path="/echo_body")) 118 | c.setopt(CurlOpt.URL, url.encode()) 119 | c.setopt(CurlOpt.POST, 1) 120 | c.setopt(CurlOpt.POSTFIELDS, b"foo=bar") 121 | 122 | buffer = BytesIO() 123 | 124 | def write(data: bytes): 125 | buffer.write(data) 126 | return len(data) 127 | 128 | c.setopt(CurlOpt.WRITEFUNCTION, write) 129 | c.perform() 130 | assert buffer.getvalue() == b"foo=bar" 131 | 132 | 133 | def test_cookies(server): 134 | c = Curl() 135 | url = str(server.url.copy_with(path="/echo_cookies")) 136 | c.setopt(CurlOpt.URL, url.encode()) 137 | c.setopt(CurlOpt.COOKIE, b"foo=bar") 138 | buffer = BytesIO() 139 | c.setopt(CurlOpt.WRITEDATA, buffer) 140 | c.perform() 141 | cookies = json.loads(buffer.getvalue().decode()) 142 | # print(cookies) 143 | assert cookies["foo"] == "bar" 144 | 145 | 146 | def test_auth(server): 147 | c = Curl() 148 | url = str(server.url.copy_with(path="/echo_headers")) 149 | c.setopt(CurlOpt.URL, url.encode()) 150 | c.setopt(CurlOpt.USERNAME, b"foo") 151 | c.setopt(CurlOpt.PASSWORD, b"bar") 152 | buffer = BytesIO() 153 | c.setopt(CurlOpt.WRITEDATA, buffer) 154 | c.perform() 155 | headers = json.loads(buffer.getvalue().decode()) 156 | assert ( 157 | headers["Authorization"][0] == f"Basic {base64.b64encode(b'foo:bar').decode()}" 158 | ) 159 | 160 | 161 | def test_timeout(server): 162 | c = Curl() 163 | url = str(server.url.copy_with(path="/slow_response")) 164 | c.setopt(CurlOpt.URL, url.encode()) 165 | c.setopt(CurlOpt.TIMEOUT_MS, 100) 166 | with pytest.raises(CurlError, match=r"curl: \(28\)"): 167 | c.perform() 168 | 169 | 170 | def test_repeated_headers_after_error(server): 171 | c = Curl() 172 | url = str(server.url.copy_with(path="/slow_response")) 173 | c.setopt(CurlOpt.URL, url.encode()) 174 | c.setopt(CurlOpt.TIMEOUT_MS, 100) 175 | c.setopt(CurlOpt.HTTPHEADER, [b"Foo: bar"]) 176 | with pytest.raises(CurlError, match=r"curl: \(28\)"): 177 | c.perform() 178 | 179 | # another request 180 | url = str(server.url.copy_with(path="/echo_headers")) 181 | c.setopt(CurlOpt.URL, url.encode()) 182 | c.setopt(CurlOpt.HTTPHEADER, [b"Foo: bar"]) 183 | buffer = BytesIO() 184 | c.setopt(CurlOpt.WRITEDATA, buffer) 185 | c.perform() 186 | headers = json.loads(buffer.getvalue().decode()) 187 | assert len(headers["Foo"]) == 1 188 | # print(headers) 189 | 190 | 191 | def test_follow_redirect(server): 192 | c = Curl() 193 | url = str(server.url.copy_with(path="/redirect_301")) 194 | c.setopt(CurlOpt.URL, url.encode()) 195 | c.setopt(CurlOpt.FOLLOWLOCATION, 1) 196 | c.perform() 197 | assert c.getinfo(CurlInfo.RESPONSE_CODE) == 200 198 | 199 | 200 | def test_not_follow_redirect(server): 201 | c = Curl() 202 | url = str(server.url.copy_with(path="/redirect_301")) 203 | c.setopt(CurlOpt.URL, url.encode()) 204 | c.perform() 205 | assert c.getinfo(CurlInfo.RESPONSE_CODE) == 301 206 | 207 | 208 | def test_http_proxy_changed_path(server): 209 | c = Curl() 210 | proxy_url = str(server.url).rstrip("/") 211 | print("proxy url", proxy_url) 212 | c.setopt(CurlOpt.URL, b"http://example.org") 213 | c.setopt(CurlOpt.PROXY, proxy_url.encode()) 214 | buffer = BytesIO() 215 | c.setopt(CurlOpt.WRITEDATA, buffer) 216 | c.perform() 217 | rsp = json.loads(buffer.getvalue().decode()) 218 | assert rsp["Hello"] == "http_proxy!" 219 | 220 | 221 | def test_https_proxy_using_connect(server): 222 | c = Curl() 223 | proxy_url = str(server.url) 224 | c.setopt(CurlOpt.URL, b"https://example.org") 225 | c.setopt(CurlOpt.PROXY, proxy_url.encode()) 226 | c.setopt(CurlOpt.HTTPPROXYTUNNEL, 1) 227 | buffer = BytesIO() 228 | c.setopt(CurlOpt.WRITEDATA, buffer) 229 | with pytest.raises(CurlError, match=r"curl: \(35\)"): 230 | c.perform() 231 | 232 | 233 | def test_verify(https_server): 234 | c = Curl() 235 | url = str(https_server.url) 236 | c.setopt(CurlOpt.URL, url.encode()) 237 | with pytest.raises(CurlError, match="SSL certificate problem"): 238 | c.perform() 239 | 240 | 241 | def test_verify_false(https_server): 242 | c = Curl() 243 | url = str(https_server.url) 244 | c.setopt(CurlOpt.URL, url.encode()) 245 | c.setopt(CurlOpt.SSL_VERIFYPEER, 0) 246 | c.setopt(CurlOpt.SSL_VERIFYHOST, 0) 247 | c.perform() 248 | 249 | 250 | def test_referer(server): 251 | c = Curl() 252 | url = str(server.url.copy_with(path="/echo_headers")) 253 | c.setopt(CurlOpt.URL, url.encode()) 254 | c.setopt(CurlOpt.REFERER, b"http://example.org") 255 | buffer = BytesIO() 256 | c.setopt(CurlOpt.WRITEDATA, buffer) 257 | c.perform() 258 | headers = json.loads(buffer.getvalue().decode()) 259 | assert headers["Referer"][0] == "http://example.org" 260 | 261 | 262 | ####################################################################################### 263 | # testing getinfo 264 | ####################################################################################### 265 | 266 | 267 | def test_effective_url(server): 268 | c = Curl() 269 | url = str(server.url.copy_with(path="/redirect_301")) 270 | c.setopt(CurlOpt.URL, url.encode()) 271 | c.setopt(CurlOpt.FOLLOWLOCATION, 1) 272 | c.perform() 273 | assert c.getinfo(CurlInfo.EFFECTIVE_URL) == str(server.url).encode() 274 | 275 | 276 | def test_status_code(server): 277 | c = Curl() 278 | url = str(server.url) 279 | c.setopt(CurlOpt.URL, url.encode()) 280 | c.perform() 281 | assert c.getinfo(CurlInfo.RESPONSE_CODE) == 200 282 | 283 | 284 | def test_response_headers(server): 285 | c = Curl() 286 | url = str(server.url.copy_with(path="/set_headers")) 287 | c.setopt(CurlOpt.URL, url.encode()) 288 | buffer = BytesIO() 289 | c.setopt(CurlOpt.HEADERDATA, buffer) 290 | c.perform() 291 | headers = buffer.getvalue().decode() 292 | for line in headers.splitlines(): 293 | if line.startswith("x-test"): 294 | assert line.startswith("x-test: test") 295 | 296 | 297 | def test_response_cookies(server): 298 | c = Curl() 299 | url = str(server.url.copy_with(path="/set_cookies")) 300 | c.setopt(CurlOpt.URL, url.encode()) 301 | buffer = BytesIO() 302 | c.setopt(CurlOpt.HEADERDATA, buffer) 303 | c.perform() 304 | headers = buffer.getvalue() 305 | cookie = c.parse_cookie_headers(headers.splitlines()) 306 | for name, morsel in cookie.items(): 307 | if name == "foo": 308 | assert morsel.value == "bar" 309 | 310 | 311 | def test_elapsed(server): 312 | c = Curl() 313 | url = str(server.url) 314 | c.setopt(CurlOpt.URL, url.encode()) 315 | c.perform() 316 | assert cast(int, c.getinfo(CurlInfo.TOTAL_TIME)) > 0 317 | 318 | 319 | def test_reason(server): 320 | c = Curl() 321 | url = str(server.url) 322 | c.setopt(CurlOpt.URL, url.encode()) 323 | buffer = BytesIO() 324 | c.setopt(CurlOpt.HEADERDATA, buffer) 325 | c.perform() 326 | headers = buffer.getvalue() 327 | headers = headers.splitlines() 328 | assert c.get_reason_phrase(headers[0]) == b"OK" 329 | 330 | 331 | def test_resolve(server): 332 | c = Curl() 333 | url = "http://example.com:8000" 334 | c.setopt(CurlOpt.RESOLVE, ["example.com:8000:127.0.0.1"]) 335 | c.setopt(CurlOpt.URL, url) 336 | c.perform() 337 | 338 | 339 | def test_duphandle(server): 340 | c = Curl() 341 | c.setopt(CurlOpt.URL, str(server.url.copy_with(path="/redirect_loop")).encode()) 342 | c.setopt(CurlOpt.FOLLOWLOCATION, 1) 343 | c.setopt(CurlOpt.MAXREDIRS, 2) 344 | c = c.duphandle() 345 | with pytest.raises(CurlError): 346 | c.perform() 347 | 348 | 349 | def test_is_pro(): 350 | assert curl_cffi.is_pro() is False 351 | -------------------------------------------------------------------------------- /curl_cffi/requests/models.py: -------------------------------------------------------------------------------- 1 | from contextlib import suppress 2 | import queue 3 | import re 4 | import warnings 5 | from concurrent.futures import Future 6 | from typing import Any, Optional, Union 7 | from collections.abc import Awaitable, Callable 8 | from datetime import timedelta 9 | 10 | from ..curl import Curl 11 | from ..utils import CurlCffiWarning 12 | from .cookies import Cookies 13 | from .exceptions import HTTPError, RequestException 14 | from .headers import Headers 15 | 16 | # Use orjson if present 17 | try: 18 | from orjson import loads 19 | except ImportError: 20 | from json import loads 21 | 22 | with suppress(ImportError): 23 | from markdownify import markdownify as md 24 | import readability as rd 25 | 26 | CHARSET_RE = re.compile(r"charset=([\w-]+)") 27 | STREAM_END = object() 28 | 29 | 30 | def clear_queue(q: queue.Queue): 31 | with q.mutex: 32 | q.queue.clear() 33 | q.all_tasks_done.notify_all() 34 | q.unfinished_tasks = 0 35 | 36 | 37 | class Request: 38 | """Representing a sent request.""" 39 | 40 | def __init__(self, url: str, headers: Headers, method: str): 41 | self.url = url 42 | self.headers = headers 43 | self.method = method 44 | 45 | 46 | class Response: 47 | """Contains information the server sends. 48 | 49 | Attributes: 50 | url: url used in the request. 51 | content: response body in bytes. 52 | text: response body in str. 53 | status_code: http status code. 54 | reason: http response reason, such as OK, Not Found. 55 | ok: is status_code in [200, 400)? 56 | headers: response headers. 57 | cookies: response cookies. 58 | elapsed: timedelta of the request duration. 59 | encoding: http body encoding. 60 | charset: alias for encoding. 61 | primary_ip: primary ip of the server. 62 | primary_port: primary port of the server. 63 | local_ip: local ip used in this connection. 64 | local_port: local port used in this connection. 65 | charset_encoding: encoding specified by the Content-Type header. 66 | default_encoding: encoding for decoding response content if charset is not found 67 | in headers. Defaults to "utf-8". Can be set to a callable for automatic 68 | detection. 69 | redirect_count: how many redirects happened. 70 | redirect_url: the final redirected url. 71 | http_version: http version used. 72 | history: history redirections, only headers are available. 73 | download_size: total downloaded bytes (body). 74 | upload_size: total uploaded bytes (body). 75 | header_size: total header size. 76 | request_size: request size. 77 | response_size: download_size + header_size 78 | """ 79 | 80 | def __init__(self, curl: Optional[Curl] = None, request: Optional[Request] = None): 81 | self.curl = curl 82 | self.request = request 83 | self.url = "" 84 | self.content = b"" 85 | self.status_code = 200 86 | self.reason = "OK" 87 | self.ok = True 88 | self.headers = Headers() 89 | self.cookies = Cookies() 90 | self.elapsed: timedelta = timedelta() 91 | self.default_encoding: Union[str, Callable[[bytes], str]] = "utf-8" 92 | self.redirect_count = 0 93 | self.redirect_url = "" 94 | self.http_version = 0 95 | self.primary_ip: str = "" 96 | self.primary_port: int = 0 97 | self.local_ip: str = "" 98 | self.local_port: int = 0 99 | self.history: list[dict[str, Any]] = [] 100 | self.infos: dict[str, Any] = {} 101 | self.queue: Optional[queue.Queue] = None 102 | self.stream_task: Optional[Future] = None 103 | self.astream_task: Optional[Awaitable] = None 104 | self.quit_now = None 105 | self.download_size: int = 0 106 | self.upload_size: int = 0 107 | self.header_size: int = 0 108 | self.request_size: int = 0 109 | self.response_size: int = 0 110 | 111 | @property 112 | def charset(self) -> str: 113 | """Alias for encoding.""" 114 | return self.encoding 115 | 116 | @property 117 | def encoding(self) -> str: 118 | """ 119 | Determines the encoding to decode byte content into text. 120 | 121 | The method follows a specific priority to decide the encoding: 122 | 1. If ``.encoding`` has been explicitly set, it is used. 123 | 2. The encoding specified by the ``charset`` parameter in the ``Content-Type`` 124 | header. 125 | 3. The encoding specified by the ``default_encoding`` attribute. This can either 126 | be a string (e.g., "utf-8") or a callable for charset autodetection. 127 | """ 128 | if not hasattr(self, "_encoding"): 129 | encoding = self.charset_encoding 130 | if encoding is None: 131 | if isinstance(self.default_encoding, str): 132 | encoding = self.default_encoding 133 | elif callable(self.default_encoding): 134 | encoding = self.default_encoding(self.content) 135 | self._encoding = encoding or "utf-8" 136 | return self._encoding 137 | 138 | @encoding.setter 139 | def encoding(self, value: str) -> None: 140 | if hasattr(self, "_text"): 141 | raise ValueError("Cannot set encoding after text has been accessed") 142 | self._encoding = value 143 | 144 | @property 145 | def charset_encoding(self) -> Optional[str]: 146 | """Return the encoding, as specified by the Content-Type header.""" 147 | content_type = self.headers.get("Content-Type") 148 | if content_type: 149 | charset_match = CHARSET_RE.search(content_type) 150 | return charset_match.group(1) if charset_match else None 151 | return None 152 | 153 | @property 154 | def text(self) -> str: 155 | if not hasattr(self, "_text"): 156 | if not self.content: 157 | self._text = "" 158 | else: 159 | self._text = self._decode(self.content) 160 | return self._text 161 | 162 | def markdown(self) -> str: 163 | doc = rd.Document(self.content) 164 | title = doc.title() 165 | summary = doc.summary(html_partial=True) 166 | body_as_md = md(f"

{title}

{summary}
") 167 | return body_as_md 168 | 169 | def _decode(self, content: bytes) -> str: 170 | try: 171 | return content.decode(self.encoding, errors="replace") 172 | except (UnicodeDecodeError, LookupError): 173 | return content.decode("utf-8-sig") 174 | 175 | def raise_for_status(self): 176 | """Raise an error if status code is not in [200, 400)""" 177 | if not self.ok: 178 | raise HTTPError(f"HTTP Error {self.status_code}: {self.reason}", 0, self) 179 | 180 | def iter_lines(self, chunk_size=None, decode_unicode=False, delimiter=None): 181 | """ 182 | iterate streaming content line by line, separated by ``\\n``. 183 | 184 | Copied from: https://requests.readthedocs.io/en/latest/_modules/requests/models/ 185 | which is under the License: Apache 2.0 186 | """ 187 | pending = None 188 | 189 | for chunk in self.iter_content( 190 | chunk_size=chunk_size, decode_unicode=decode_unicode 191 | ): 192 | if pending is not None: 193 | chunk = pending + chunk 194 | lines = chunk.split(delimiter) if delimiter else chunk.splitlines() 195 | pending = ( 196 | lines.pop() 197 | if lines and lines[-1] and chunk and lines[-1][-1] == chunk[-1] 198 | else None 199 | ) 200 | 201 | yield from lines 202 | 203 | if pending is not None: 204 | yield pending 205 | 206 | def iter_content(self, chunk_size=None, decode_unicode=False): 207 | """ 208 | iterate streaming content chunk by chunk in bytes. 209 | """ 210 | if chunk_size: 211 | warnings.warn( 212 | "chunk_size is ignored, there is no way to tell curl that.", 213 | CurlCffiWarning, 214 | stacklevel=2, 215 | ) 216 | if decode_unicode: 217 | raise NotImplementedError() 218 | 219 | assert self.queue and self.curl, "stream mode is not enabled." 220 | 221 | while True: 222 | chunk = self.queue.get() 223 | 224 | # re-raise the exception if something wrong happened. 225 | if isinstance(chunk, RequestException): 226 | self.curl.reset() 227 | raise chunk 228 | 229 | # end of stream. 230 | if chunk is STREAM_END: 231 | break 232 | 233 | yield chunk 234 | 235 | def json(self, **kw): 236 | """return a parsed json object of the content.""" 237 | return loads(self.content, **kw) 238 | 239 | def close(self): 240 | """Close the streaming connection, only valid in stream mode.""" 241 | 242 | if self.quit_now: 243 | self.quit_now.set() 244 | if self.stream_task: 245 | self.stream_task.result() 246 | 247 | async def aiter_lines(self, chunk_size=None, decode_unicode=False, delimiter=None): 248 | """ 249 | iterate streaming content line by line, separated by ``\\n``. 250 | 251 | Copied from: https://requests.readthedocs.io/en/latest/_modules/requests/models/ 252 | which is under the License: Apache 2.0 253 | """ 254 | pending = None 255 | 256 | async for chunk in self.aiter_content( 257 | chunk_size=chunk_size, decode_unicode=decode_unicode 258 | ): 259 | if pending is not None: 260 | chunk = pending + chunk 261 | lines = chunk.split(delimiter) if delimiter else chunk.splitlines() 262 | pending = ( 263 | lines.pop() 264 | if lines and lines[-1] and chunk and lines[-1][-1] == chunk[-1] 265 | else None 266 | ) 267 | 268 | for line in lines: 269 | yield line 270 | 271 | if pending is not None: 272 | yield pending 273 | 274 | async def aiter_content(self, chunk_size=None, decode_unicode=False): 275 | """ 276 | iterate streaming content chunk by chunk in bytes. 277 | """ 278 | if chunk_size: 279 | warnings.warn( 280 | "chunk_size is ignored, there is no way to tell curl that.", 281 | CurlCffiWarning, 282 | stacklevel=2, 283 | ) 284 | if decode_unicode: 285 | raise NotImplementedError() 286 | 287 | assert self.queue and self.curl, "stream mode is not enabled." 288 | 289 | while True: 290 | chunk = await self.queue.get() 291 | 292 | # re-raise the exception if something wrong happened. 293 | if isinstance(chunk, RequestException): 294 | await self.aclose() 295 | raise chunk 296 | 297 | # end of stream. 298 | if chunk is STREAM_END: 299 | await self.aclose() 300 | return 301 | 302 | yield chunk 303 | 304 | async def atext(self) -> str: 305 | """ 306 | Return a decoded string. 307 | """ 308 | return self._decode(await self.acontent()) 309 | 310 | async def acontent(self) -> bytes: 311 | """wait and read the streaming content in one bytes object.""" 312 | chunks = [] 313 | async for chunk in self.aiter_content(): 314 | chunks.append(chunk) 315 | return b"".join(chunks) 316 | 317 | async def aclose(self): 318 | """Close the streaming connection, only valid in stream mode.""" 319 | 320 | if self.astream_task: 321 | await self.astream_task 322 | 323 | # It prints the status code of the response instead of the object's memory location. 324 | def __repr__(self) -> str: 325 | return f"" 326 | --------------------------------------------------------------------------------