├── docs
    ├── requirements.txt
    ├── asyncio.rst
    ├── websockets.rst
    ├── impersonate
    │   ├── _index.rst
    │   ├── fingerprint.rst
    │   ├── targets.rst
    │   ├── faq.rst
    │   ├── psk.rst
    │   └── customize.rst
    ├── pro.rst
    ├── Makefile
    ├── make.bat
    ├── conf.py
    ├── dev.rst
    ├── vs-requests.rst
    ├── community.rst
    ├── cookies.rst
    ├── advanced.rst
    ├── index.rst
    ├── changelog.rst
    ├── api.rst
    └── faq.rst
├── examples
    ├── scrapy_integration.py
    ├── requests_like.py
    ├── async_session.py
    ├── websockets
    │   ├── short_running.py
    │   ├── long_running_async.py
    │   └── long_running.py
    ├── curl_like.py
    ├── custom_response_class.py
    ├── upload.py
    ├── impersonate.py
    └── stream.py
├── curl_cffi
    ├── py.typed
    ├── __version__.py
    ├── requests
    │   ├── errors.py
    │   ├── __init__.py
    │   ├── exceptions.py
    │   └── models.py
    ├── utils.py
    ├── cli.py
    └── __init__.py
├── FUNDING.yml
├── assets
    ├── scrapfly.png
    ├── thordata.png
    ├── yescaptcha.png
    └── hypersolutions.png
├── MANIFEST.in
├── benchmark
    ├── requirements.txt
    ├── hardware.txt
    ├── single_worker.csv
    ├── server.py
    ├── multiple_workers.csv
    ├── ws_bench_1_server.py
    ├── benchmark.py
    ├── ws_bench_utils.py
    ├── README.md
    └── ws_bench_1_client.py
├── ffi
    ├── shim.h
    ├── shim.c
    └── cdef.c
├── tests
    ├── integration
    │   ├── test_real_world.py
    │   ├── test_response_class.py
    │   ├── test_fingerprints.py
    │   └── test_httpbin.py
    ├── unittest
    │   ├── test_cli.py
    │   ├── test_smoke.py
    │   ├── test_async.py
    │   ├── test_headers.py
    │   ├── test_cookies.py
    │   ├── test_websockets.py
    │   ├── test_upload.py
    │   ├── test_impersonate.py
    │   └── test_curl.py
    └── threads
    │   ├── test_eventlet.py
    │   └── test_gevent.py
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── fingerprint_report.md
    │   ├── question.md
    │   ├── feature_request.md
    │   └── bug_report.md
    └── workflows
    │   └── build-and-test.yaml
├── scripts
    ├── download_curl.sh
    ├── bump_version.sh
    ├── generate_consts.py
    └── build.py
├── .gitignore
├── .readthedocs.yaml
├── setup.py
├── LICENSE
├── Makefile
├── libs.json
└── pyproject.toml


/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | 


--------------------------------------------------------------------------------
/examples/scrapy_integration.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/curl_cffi/py.typed:
--------------------------------------------------------------------------------
1 | # Marker file for PEP 561.


--------------------------------------------------------------------------------
/docs/asyncio.rst:
--------------------------------------------------------------------------------
1 | Asyncio
2 | =======
3 | 
4 | TODO
5 | 


--------------------------------------------------------------------------------
/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: lexiforest
2 | buy_me_a_coffee: yifei
3 | 


--------------------------------------------------------------------------------
/docs/websockets.rst:
--------------------------------------------------------------------------------
1 | WebSockets
2 | ==========
3 | 
4 | TODO
5 | 
6 | 


--------------------------------------------------------------------------------
/assets/scrapfly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lexiforest/curl_cffi/HEAD/assets/scrapfly.png


--------------------------------------------------------------------------------
/assets/thordata.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lexiforest/curl_cffi/HEAD/assets/thordata.png


--------------------------------------------------------------------------------
/assets/yescaptcha.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lexiforest/curl_cffi/HEAD/assets/yescaptcha.png


--------------------------------------------------------------------------------
/assets/hypersolutions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lexiforest/curl_cffi/HEAD/assets/hypersolutions.png


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include tests *
2 | 
3 | include ffi/*
4 | include include/curl/*
5 | include scripts/build.py
6 | include Makefile
7 | include libs.json
8 | 


--------------------------------------------------------------------------------
/benchmark/requirements.txt:
--------------------------------------------------------------------------------
 1 | pandas
 2 | starlette
 3 | uvicorn
 4 | requests
 5 | httpx
 6 | aiohttp
 7 | pycurl
 8 | tls-client
 9 | gunicorn
10 | uvloop
11 | 


--------------------------------------------------------------------------------
/ffi/shim.h:
--------------------------------------------------------------------------------
1 | #include <stdlib.h>
2 | #include <string.h>
3 | #include <stdio.h>
4 | #define CURL_STATICLIB
5 | #include "curl/curl.h"
6 | 
7 | int _curl_easy_setopt(void* curl, int option, void* param);
8 | 


--------------------------------------------------------------------------------
/tests/integration/test_real_world.py:
--------------------------------------------------------------------------------
1 | from curl_cffi import requests
2 | 
3 | 
4 | def test_post_with_no_body():
5 |     r = requests.post(
6 |         "https://shopee.co.id/api/v2/authentication/get_active_login_page"
7 |     )
8 |     assert r.status_code == 200
9 | 


--------------------------------------------------------------------------------
/curl_cffi/__version__.py:
--------------------------------------------------------------------------------
1 | from importlib import metadata
2 | 
3 | from .curl import Curl
4 | 
5 | __title__ = "curl_cffi"
6 | __description__ = metadata.metadata("curl_cffi")["Summary"]
7 | __version__ = metadata.version("curl_cffi")
8 | __curl_version__ = Curl().version().decode()
9 | 


--------------------------------------------------------------------------------
/curl_cffi/requests/errors.py:
--------------------------------------------------------------------------------
1 | # for compatibility with 0.5.x
2 | 
3 | __all__ = ["CurlError", "RequestsError", "CookieConflict", "SessionClosed"]
4 | 
5 | from ..curl import CurlError
6 | from .exceptions import CookieConflict, SessionClosed
7 | from .exceptions import RequestException as RequestsError
8 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/fingerprint_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Fingerprint report
 3 | about: Report a new fingerprint not supported by curl_cffi
 4 | title: "New Fingerprint: "
 5 | labels: ""
 6 | ---
 7 | 
 8 | Please report in the [curl-impersonate](https://github.com/lexiforest/curl-impersonate) repo.
 9 | 
10 | 


--------------------------------------------------------------------------------
/benchmark/hardware.txt:
--------------------------------------------------------------------------------
 1 | Benchmarks run on:
 2 | 
 3 | macOS
 4 | 
 5 | Processor Name: 6-Core Intel Core i5
 6 | Processor Speed: 3.3 GHz
 7 | Number of Processors: 1
 8 | Total Number of Cores: 6
 9 | L2 Cache (per Core): 256 KB
10 | L3 Cache: 12 MB
11 | Hyper-Threading Technology: Enabled
12 | Memory: 32 GB
13 | 


--------------------------------------------------------------------------------
/scripts/download_curl.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | CURL_VERSION=curl-8_15_0
 4 | 
 5 | curl -L https://github.com/curl/curl/archive/${CURL_VERSION}.zip -o curl.zip
 6 | unzip -q -o curl.zip
 7 | mv curl-${CURL_VERSION} ${CURL_VERSION}
 8 | 
 9 | cd ${CURL_VERSION}
10 | 
11 | patchfile=../../curl-impersonate/patches/curl.patch
12 | patch -p1 < $patchfile
13 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .idea
 2 | .pytest_cache
 3 | Release
 4 | *.pyc
 5 | *.pyd
 6 | *.so
 7 | *.o
 8 | **/__pycache__
 9 | .venv
10 | 
11 | build/
12 | dist/
13 | wheelhouse/
14 | curl.egg-info/
15 | curl_cffi.egg-info/
16 | # curl_cffi/const.py
17 | curl-*/
18 | *.tar.xz
19 | *.tar.gz
20 | .preprocessed
21 | include/
22 | .DS_Store
23 | 
24 | .mypy_cache/
25 | .ruff_cache/
26 | .aider*
27 | 


--------------------------------------------------------------------------------
/scripts/bump_version.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | VERSION=$1
 4 | UPSTREAM_VERSION=$2
 5 | 
 6 | # Makefile
 7 | gsed "s/^VERSION := .*/VERSION := ${UPSTREAM_VERSION}/g" -i Makefile
 8 | 
 9 | # pyproject.toml
10 | gsed "s/^version = .*/version = \"${VERSION}\"/g" -i pyproject.toml
11 | 
12 | # build.py
13 | gsed "s/^__version__ = .*/__version__ = \"${UPSTREAM_VERSION}\"/g" -i scripts/build.py
14 | 


--------------------------------------------------------------------------------
/curl_cffi/utils.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | 
 4 | class CurlCffiWarning(UserWarning, RuntimeWarning):
 5 |     pass
 6 | 
 7 | 
 8 | def config_warnings(on: bool = False):
 9 |     if on:
10 |         warnings.simplefilter("default", category=CurlCffiWarning)
11 |     else:
12 |         warnings.simplefilter("ignore", category=CurlCffiWarning)
13 | 
14 | 
15 | def is_pro():
16 |     return False
17 | 


--------------------------------------------------------------------------------
/tests/unittest/test_cli.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | 
 3 | 
 4 | def test_cli(server):
 5 |     """Test that the curl-cffi CLI can perform basic GET requests."""
 6 |     result = subprocess.check_output(
 7 |         f"curl-cffi {server.url}",
 8 |         shell=True,
 9 |         text=True,
10 |         timeout=30,
11 |     )
12 |     # Should look like HTTP response:
13 |     assert "Hello, world!" in result
14 | 


--------------------------------------------------------------------------------
/docs/impersonate/_index.rst:
--------------------------------------------------------------------------------
 1 | Impersonate Guide
 2 | =================
 3 | 
 4 | 
 5 | You probably come across ``curl_cffi`` for it's ability to impersonate browsers. Here is
 6 | a tutorial on how to better impersonate using ``curl_cffi``.
 7 | 
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Contents:
12 |    :glob:
13 | 
14 |    fingerprint
15 |    targets
16 |    customize
17 |    psk
18 |    faq
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/docs/pro.rst:
--------------------------------------------------------------------------------
 1 | curl_cffi pro version
 2 | *********************
 3 | 
 4 | We offer a pro version for professional users of curl_cffi.
 5 | 
 6 | Feature Matrix
 7 | ==============
 8 | 
 9 | ============  =====  ======= ========
10 | Feature       http2  http3    Fingerprint updates
11 | ============  =====  ======= ========
12 | Open Source    ✅     ✅      Major ones
13 | Pro version   False  False    Weekly
14 | =====  =====  =======
15 | 


--------------------------------------------------------------------------------
/examples/requests_like.py:
--------------------------------------------------------------------------------
 1 | import curl_cffi
 2 | 
 3 | r = curl_cffi.get("https://tls.browserleaks.com/json")
 4 | print("No impersonation", r.json())
 5 | 
 6 | 
 7 | r = curl_cffi.get("https://tls.browserleaks.com/json", impersonate="chrome101")
 8 | print("With impersonation", r.json())
 9 | 
10 | 
11 | s = curl_cffi.Session(impersonate="chrome110")
12 | r = s.get("https://tls.browserleaks.com/json")
13 | print("With impersonation", r.json())
14 | 


--------------------------------------------------------------------------------
/examples/async_session.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | import curl_cffi
 4 | 
 5 | 
 6 | async def main():
 7 |     async with curl_cffi.AsyncSession() as s:
 8 |         r = await s.get("https://httpbin.org/headers")
 9 |         print(r.text)
10 | 
11 |         r = await s.get("https://httpbin.org/stream/20", stream=True)
12 |         async for chunk in r.aiter_content():
13 |             print(chunk)
14 | 
15 | 
16 | if __name__ == "__main__":
17 |     asyncio.run(main())
18 | 


--------------------------------------------------------------------------------
/examples/websockets/short_running.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | import curl_cffi
 4 | 
 5 | URL = "ws://echo.websocket.events"
 6 | 
 7 | 
 8 | ws = curl_cffi.WebSocket().connect(URL)
 9 | ws.send(b"Foo")
10 | reply = ws.recv()
11 | print(reply)
12 | 
13 | 
14 | async def async_examples():
15 |     async with curl_cffi.AsyncSession() as s:
16 |         ws = await s.ws_connect(URL)
17 |         await ws.send(b"Bar")
18 |         reply = await ws.recv()
19 |         print(reply)
20 | 
21 | 
22 | asyncio.run(async_examples())
23 | 


--------------------------------------------------------------------------------
/benchmark/single_worker.csv:
--------------------------------------------------------------------------------
 1 | name,size,duration
 2 | requests,1k,1.6857
 3 | httpx_sync,1k,1.0149
 4 | tls_client,1k,0.7551
 5 | curl_cffi_sync,1k,0.6563
 6 | curl_cffi_raw,1k,0.4655
 7 | pycurl,1k,0.4502
 8 | requests,20k,1.6896
 9 | httpx_sync,20k,1.0015
10 | tls_client,20k,2.0465
11 | curl_cffi_sync,20k,0.6723
12 | curl_cffi_raw,20k,0.4764
13 | pycurl,20k,0.4569
14 | requests,200k,1.8377
15 | httpx_sync,200k,1.1493
16 | tls_client,200k,20.2690
17 | curl_cffi_sync,200k,1.5508
18 | curl_cffi_raw,200k,1.3124
19 | pycurl,200k,1.0680
20 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Required
 2 | version: 2
 3 | 
 4 | # Set the OS, Python version and other tools you might need
 5 | build:
 6 |   os: ubuntu-22.04
 7 |   tools:
 8 |     python: "3.11"
 9 |   apt_packages:
10 |     - build-essential
11 |     - libtool
12 |   jobs:
13 |     pre_install:
14 |       - make preprocess
15 |       - python -m pip install -e .
16 | 
17 | # Optionally declare the Python requirements required to build your docs
18 | python:
19 |    install:
20 |    - requirements: docs/requirements.txt
21 | 
22 | sphinx:
23 |   configuration: docs/conf.py
24 | 
25 | formats:
26 |   - pdf
27 |   - epub
28 | 


--------------------------------------------------------------------------------
/examples/websockets/long_running_async.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | import curl_cffi
 4 | 
 5 | 
 6 | async def main():
 7 |     async with curl_cffi.AsyncSession() as s:
 8 |         ws = await s.ws_connect("wss://api.gemini.com/v1/marketdata/BTCUSD")
 9 |         print(
10 |             "For websockets, you need to set $wss_proxy environment variable!\n"
11 |             "$https_proxy will not work!"
12 |         )
13 |         print(">>> Websocket open!")
14 | 
15 |         async for message in ws:
16 |             print(message)
17 | 
18 |         print("<<< Websocket closed!")
19 | 
20 | 
21 | asyncio.run(main())
22 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from wheel.bdist_wheel import bdist_wheel
 3 | 
 4 | 
 5 | class bdist_wheel_abi3(bdist_wheel):
 6 |     def get_tag(self):
 7 |         python, abi, plat = super().get_tag()
 8 | 
 9 |         if python.startswith("cp"):
10 |             # on CPython, our wheels are abi3 and compatible back to 3.9
11 |             return "cp39", "abi3", plat
12 | 
13 |         return python, abi, plat
14 | 
15 | 
16 | setup(
17 |     # this option is only valid in setup.py
18 |     cffi_modules=["scripts/build.py:ffibuilder"],
19 |     cmdclass={
20 |         "bdist_wheel": bdist_wheel_abi3,
21 |     },
22 | )
23 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Ask a question
 3 | about: Asking for how to use a feature or when you are not sure if it's a bug
 4 | title: ""
 5 | labels: question
 6 | 
 7 | ---
 8 | 
 9 | **The question**
10 | 
11 | - What feature do you find confusing?
12 | - Which site does not work? And you don't have a clue.
13 | 
14 | **Documentation suggesion**
15 | 
16 | If the documentation is missing or confusing, add your suggestion here.
17 | 
18 | **Versions**
19 | 
20 | If it's related to a specific environment, paste your env info here.
21 | 
22 |  - OS: [e.g. linux x64]
23 |  - curl_cffi version [e.g. 0.5.7]
24 |  - `pip freeze` dump
25 | 


--------------------------------------------------------------------------------
/benchmark/server.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from starlette.applications import Starlette
 4 | from starlette.responses import PlainTextResponse
 5 | from starlette.routing import Route
 6 | 
 7 | random_1k = os.urandom(1 * 1024)
 8 | random_20k = os.urandom(20 * 1024)
 9 | random_200k = os.urandom(200 * 1024)
10 | 
11 | 
12 | app = Starlette(
13 |     routes=[
14 |         Route("/1k", lambda r: PlainTextResponse(random_1k)),
15 |         Route("/20k", lambda r: PlainTextResponse(random_20k)),
16 |         Route("/200k", lambda r: PlainTextResponse(random_200k)),
17 |     ],
18 | )
19 | 
20 | # Run:
21 | # gunicorn benchmark.server:app -b 127.0.0.1:8000 -n benchmark -w 8 -k \
22 | # uvicorn.workers.UvicornWorker
23 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea or feature for this project
 4 | title: ""
 5 | labels: enhancement
 6 | 
 7 | ---
 8 | 
 9 | **Is your feature request related to a problem? Please describe.**
10 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
11 | 
12 | **Describe the solution you'd like**
13 | A clear and concise description of what you want to happen.
14 | 
15 | **Describe alternatives you've considered**
16 | A clear and concise description of any alternative solutions or features you've considered.
17 | 
18 | **Additional context**
19 | Add any other context or screenshots about the feature request here.
20 | 


--------------------------------------------------------------------------------
/tests/unittest/test_smoke.py:
--------------------------------------------------------------------------------
 1 | # Simple smoke test to real world websites
 2 | from curl_cffi import requests
 3 | 
 4 | URLS = [
 5 |     "https://www.google.com",
 6 |     "https://www.apple.com",
 7 | ]
 8 | 
 9 | 
10 | def test_without_impersonate():
11 |     for url in URLS:
12 |         r = requests.get(url)
13 |         assert r.status_code == 200
14 | 
15 | 
16 | def test_with_impersonate():
17 |     for url in URLS:
18 |         r = requests.get(url, impersonate="chrome")
19 |         assert r.status_code == 200
20 | 
21 | 
22 | async def test_async():
23 |     async with requests.AsyncSession() as s:
24 |         for url in URLS:
25 |             r = await s.get(url)
26 |             assert r.status_code == 200
27 | 


--------------------------------------------------------------------------------
/examples/curl_like.py:
--------------------------------------------------------------------------------
 1 | from io import BytesIO
 2 | 
 3 | from curl_cffi import Curl, CurlOpt
 4 | 
 5 | buffer = BytesIO()
 6 | c = Curl()
 7 | c.setopt(CurlOpt.CUSTOMREQUEST, b"GET")
 8 | c.setopt(CurlOpt.URL, b"https://tls.browserleaks.com/json")
 9 | c.setopt(CurlOpt.WRITEDATA, buffer)
10 | c.perform()
11 | body = buffer.getvalue()
12 | print("NO impersonate:")
13 | print(body.decode())
14 | print("")
15 | 
16 | 
17 | buffer = BytesIO()
18 | c.setopt(CurlOpt.WRITEDATA, buffer)
19 | c.setopt(CurlOpt.URL, b"https://tls.browserleaks.com/json")
20 | c.impersonate("chrome110")
21 | c.setopt(CurlOpt.HTTPHEADER, [b"User-Agent: Curl/impersonate"])
22 | c.perform()
23 | body = buffer.getvalue()
24 | print("with impersonate:")
25 | print(body.decode())
26 | c.close()
27 | 


--------------------------------------------------------------------------------
/benchmark/multiple_workers.csv:
--------------------------------------------------------------------------------
 1 | name,size,duration
 2 | requests,1k,1.0432
 3 | httpx_sync,1k,0.7141
 4 | tls_client,1k,0.2622
 5 | curl_cffi_sync,1k,0.3528
 6 | curl_cffi_raw,1k,0.1410
 7 | pycurl,1k,0.1293
 8 | aiohttp,1k,0.2924
 9 | httpx_async,1k,1.7600
10 | curl_cffi_async,1k,0.3095
11 | requests,20k,1.0526
12 | httpx_sync,20k,0.6814
13 | tls_client,20k,1.5532
14 | curl_cffi_sync,20k,0.3530
15 | curl_cffi_raw,20k,0.1350
16 | pycurl,20k,0.0941
17 | aiohttp,20k,0.2929
18 | httpx_async,20k,1.6954
19 | curl_cffi_async,20k,0.3355
20 | requests,200k,1.4353
21 | httpx_sync,200k,1.3627
22 | tls_client,200k,15.2174
23 | curl_cffi_sync,200k,1.3735
24 | curl_cffi_raw,200k,1.0463
25 | pycurl,200k,1.0445
26 | aiohttp,200k,0.4401
27 | httpx_async,200k,3.4437
28 | curl_cffi_async,200k,0.8381
29 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | dev:
16 | 	python -m http.server --dir build/html/ --bind 0.0.0.0
17 | 
18 | .PHONY: dev help Makefile
19 | 
20 | # Catch-all target: route all unknown targets to Sphinx using the new
21 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
22 | %: Makefile
23 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
24 | 


--------------------------------------------------------------------------------
/tests/unittest/test_async.py:
--------------------------------------------------------------------------------
 1 | from curl_cffi import AsyncCurl, Curl, CurlOpt
 2 | 
 3 | 
 4 | async def test_init(server):
 5 |     ac = AsyncCurl()  # noqa F841
 6 | 
 7 | 
 8 | async def test_add_handle(server):
 9 |     ac = AsyncCurl()
10 |     c = Curl()
11 |     c.setopt(CurlOpt.URL, "http://example.com")
12 |     c.setopt(CurlOpt.WRITEFUNCTION, lambda x: len(x))
13 |     fut = ac.add_handle(c)
14 |     await fut
15 | 
16 | 
17 | async def test_socket_action(server):
18 |     ac = AsyncCurl()
19 |     running = ac.socket_action(-1, 0)
20 |     # assert running == 0
21 |     c = Curl()
22 |     c.setopt(CurlOpt.URL, "http://example.com")
23 |     c.setopt(CurlOpt.WRITEFUNCTION, lambda x: len(x))
24 |     fut = ac.add_handle(c)
25 |     await fut
26 |     running = ac.socket_action(-1, 0)  # noqa F841
27 |     # assert running == 1
28 | 
29 | 
30 | async def test_process_data(server): ...
31 | 


--------------------------------------------------------------------------------
/tests/integration/test_response_class.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from curl_cffi import requests
 3 | 
 4 | 
 5 | def test_default_response():
 6 |     response = requests.get("http://example.com")
 7 |     assert type(response) is requests.Response
 8 |     print(response.status_code)
 9 | 
10 | 
11 | class CustomResponse(requests.Response):
12 |     @property
13 |     def status(self):
14 |         return self.status_code
15 | 
16 | 
17 | def test_custom_response():
18 |     session = requests.Session(response_class=CustomResponse)
19 |     response = session.get("http://example.com")
20 |     assert isinstance(response, CustomResponse)
21 |     assert hasattr(response, "status")
22 |     print(response.status)
23 | 
24 | 
25 | class WrongTypeResponse:
26 |     pass
27 | 
28 | 
29 | def test_wrong_type_custom_response():
30 |     with pytest.raises(TypeError):
31 |         requests.Session(response_class=WrongTypeResponse)
32 | 


--------------------------------------------------------------------------------
/tests/threads/test_eventlet.py:
--------------------------------------------------------------------------------
 1 | import eventlet
 2 | 
 3 | eventlet.monkey_patch()
 4 | 
 5 | import threading  #  noqa: E402
 6 | import time  #  noqa: E402
 7 | 
 8 | from curl_cffi import requests  #  noqa: E402
 9 | 
10 | 
11 | def delay():
12 |     requests.get("http://192.168.64.5:8080/delay/2", thread="eventlet")
13 | 
14 | 
15 | def delay_not_working():
16 |     requests.get("http://192.168.64.5:8080/delay/2")
17 | 
18 | 
19 | def test_gevent_parallel(fn):
20 |     start = time.time()
21 |     threads = []
22 |     for _ in range(6):
23 |         t = threading.Thread(target=fn)
24 |         threads.append(t)
25 |         t.start()
26 |     for t in threads:
27 |         t.join()
28 |     # if no thread, the time should be 12
29 |     print(time.time() - start)
30 |     # assert time.time() - start < 3
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     test_gevent_parallel(delay_not_working)
35 |     test_gevent_parallel(delay)
36 | 


--------------------------------------------------------------------------------
/tests/threads/test_gevent.py:
--------------------------------------------------------------------------------
 1 | from gevent import monkey
 2 | 
 3 | monkey.patch_all()
 4 | 
 5 | import threading  #  noqa: E402
 6 | import time  #  noqa: E402
 7 | 
 8 | from curl_cffi import requests  #  noqa: E402
 9 | 
10 | 
11 | def delay():
12 |     requests.get("http://192.168.64.5:8080/delay/2", thread="gevent")
13 | 
14 | 
15 | def delay_not_working():
16 |     requests.get("http://192.168.64.5:8080/delay/2")
17 | 
18 | 
19 | def test_gevent_parallel(fn):
20 |     start = time.time()
21 |     threads = []
22 |     for _ in range(6):
23 |         t = threading.Thread(target=fn)
24 |         threads.append(t)
25 |         t.start()
26 |     for t in threads:
27 |         t.join()
28 |     # if no thread, the time should be 12
29 |     print(time.time() - start)
30 |     # assert time.time() - start < 3
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     test_gevent_parallel(delay_not_working)
35 |     test_gevent_parallel(delay)
36 | 


--------------------------------------------------------------------------------
/examples/custom_response_class.py:
--------------------------------------------------------------------------------
 1 | import curl_cffi
 2 | from curl_cffi import Curl, CurlInfo
 3 | from typing import cast
 4 | 
 5 | 
 6 | class CustomResponse(curl_cffi.Response):
 7 |     def __init__(
 8 |         self, curl: Curl | None = None, request: curl_cffi.Request | None = None
 9 |     ):
10 |         super().__init__(curl, request)
11 |         self.local_port = cast(int, curl.getinfo(CurlInfo.LOCAL_PORT))
12 |         self.connect_time = cast(float, curl.getinfo(CurlInfo.CONNECT_TIME))
13 | 
14 |     @property
15 |     def status(self):
16 |         return self.status_code
17 | 
18 |     def custom_method(self):
19 |         return "this is a custom method"
20 | 
21 | 
22 | session = curl_cffi.Session(response_class=CustomResponse)
23 | response: CustomResponse = session.get("http://example.com")
24 | print(f"{response.status=}")
25 | print(response.custom_method())
26 | print(f"{response.local_port=}")
27 | print(f"{response.connect_time=}")
28 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/curl_cffi/cli.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import sys
 3 | import curl_cffi
 4 | 
 5 | 
 6 | def main():
 7 |     parser = argparse.ArgumentParser(
 8 |         prog="curl-cffi",
 9 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
10 |         description="A curl-like tool using curl-cffi with browser impersonation",
11 |     )
12 |     parser.add_argument(
13 |         "-i",
14 |         "--impersonate",
15 |         default="chrome",
16 |         help="Browser to impersonate",
17 |     )
18 |     parser.add_argument("urls", nargs="+", help="URLs to fetch")
19 | 
20 |     args = parser.parse_args()
21 | 
22 |     for url in args.urls:
23 |         try:
24 |             response = curl_cffi.requests.get(url, impersonate=args.impersonate)
25 |             print(response.text)
26 |         except Exception as e:
27 |             print(f"Error fetching {url}: {e}", file=sys.stderr)
28 |             sys.exit(1)
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     main()
33 | 


--------------------------------------------------------------------------------
/ffi/shim.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "shim.h"
 3 | 
 4 | int _curl_easy_setopt(void* curl, int option, void* parameter) {
 5 |     // printf("****** hijack test begins: \n");
 6 |     // int val = curl_easy_setopt(instance->curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
 7 |     // printf("****** hijack test ends. opt: %d, val: %d, result is: %d\n", CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0, val);
 8 |     // CURLoption opt_value = (CURLoption) option;
 9 |     // printf("option: %d, setopt parameter: %d\n", option, *(int*)parameter);
10 |     // for integer options, we need to convert param from pointers to integers
11 |     if (option < CURLOPTTYPE_OBJECTPOINT) {
12 |         return (int)curl_easy_setopt(curl, (CURLoption)option, *(long*)parameter);
13 |     }
14 |     if (CURLOPTTYPE_OFF_T <= option && option < CURLOPTTYPE_BLOB) {
15 |         return (int)curl_easy_setopt(curl, (CURLoption)option, *(curl_off_t*)parameter);
16 |     }
17 |     return (int)curl_easy_setopt(curl, (CURLoption)option, parameter);
18 | }
19 | 


--------------------------------------------------------------------------------
/examples/websockets/long_running.py:
--------------------------------------------------------------------------------
 1 | from curl_cffi import WebSocket
 2 | 
 3 | msg_count = 0
 4 | 
 5 | 
 6 | def on_message(ws: WebSocket, message: str | bytes):
 7 |     global msg_count
 8 | 
 9 |     print("------------------------------------------------------")
10 |     print(message)
11 |     print("======================================================")
12 | 
13 |     msg_count += 1
14 |     if msg_count >= 100:
15 |         ws.close()
16 | 
17 | 
18 | def on_error(ws: WebSocket, error: Exception):
19 |     print(error)
20 | 
21 | 
22 | def on_open(ws: WebSocket):
23 |     print(
24 |         "For websockets, you need to set $wss_proxy environment variable!\n"
25 |         "$https_proxy will not work!"
26 |     )
27 |     print(">>> Websocket open!")
28 | 
29 | 
30 | def on_close(ws: WebSocket, code: int, reason: str):
31 |     print(
32 |         f"<<< Websocket closed! code: {code}, reason: {reason}, clean: "
33 |         f"{code in (1000, 1001)}"
34 |     )
35 | 
36 | 
37 | ws = WebSocket(
38 |     on_open=on_open,
39 |     on_close=on_close,
40 |     on_message=on_message,
41 |     on_error=on_error,
42 | )
43 | ws.run_forever("wss://api.gemini.com/v1/marketdata/BTCUSD")
44 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | 
 4 | Copyright (c) 2018 multippt
 5 | Copyright (c) 2022 curl_cffi developers
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | of this software and associated documentation files (the "Software"), to deal
 9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in all
15 | copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/tests/integration/test_fingerprints.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from curl_cffi import requests
 4 | 
 5 | JA3_URL = "https://tls.browserleaks.com/json"
 6 | # Copied from my browser on macOS
 7 | CHROME_JA3_HASH = "53ff64ddf993ca882b70e1c82af5da49"
 8 | # Edge 101 is the same as Chrome 101
 9 | EDGE_JA3_HASH = "53ff64ddf993ca882b70e1c82af5da49"
10 | # Same as safari 16.x
11 | SAFARI_JA3_HASH = "8468a1ef6cb71b13e1eef8eadf786f7d"
12 | 
13 | 
14 | def test_not_impersonate():
15 |     r = requests.get(JA3_URL)
16 |     assert r.json()["ja3_hash"] != CHROME_JA3_HASH
17 | 
18 | 
19 | def test_impersonate():
20 |     r = requests.get(JA3_URL, impersonate="chrome101")
21 |     assert r.json()["ja3_hash"] == CHROME_JA3_HASH
22 | 
23 | 
24 | def test_impersonate_edge():
25 |     r = requests.get(JA3_URL, impersonate="edge101")
26 |     assert r.json()["ja3_hash"] == EDGE_JA3_HASH
27 | 
28 | 
29 | def test_impersonate_safari():
30 |     r = requests.get(JA3_URL, impersonate="safari15_5")
31 |     assert r.json()["ja3_hash"] == SAFARI_JA3_HASH
32 | 
33 | 
34 | def test_impersonate_unknown():
35 |     with pytest.raises(requests.RequestsError, match="not supported"):
36 |         requests.get(JA3_URL, impersonate="unknown")
37 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # For the full list of built-in configuration values, see the documentation:
 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | 
 6 | # -- Project information -----------------------------------------------------
 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 8 | import os
 9 | import sys
10 | 
11 | sys.path.insert(0, os.path.abspath("../"))
12 | 
13 | project = "curl_cffi"
14 | copyright = "2023-2025, lexiforest"
15 | author = "lexiforest"
16 | 
17 | # -- General configuration ---------------------------------------------------
18 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
19 | 
20 | extensions = [
21 |     "sphinx.ext.todo",
22 |     "sphinx.ext.viewcode",
23 |     "sphinx.ext.autodoc",
24 |     "sphinx.ext.napoleon",
25 | ]
26 | 
27 | templates_path = ["_templates"]
28 | exclude_patterns = []
29 | 
30 | root_doc = "index"
31 | 
32 | 
33 | # -- Options for HTML output -------------------------------------------------
34 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
35 | 
36 | html_theme = "bizstyle"
37 | html_static_path = ["_static"]
38 | 


--------------------------------------------------------------------------------
/docs/dev.rst:
--------------------------------------------------------------------------------
 1 | Development and contributing
 2 | ==============
 3 | 
 4 | This page documents how to compile curl-impersonate and curl-cffi from source. If binary
 5 | package is not available on your platform, you may refer to this page for some inspirations.
 6 | 
 7 | First, you need to check if there are libcurl-impersonate binaries for you platform. If
 8 | so, you can simply download and install them.
 9 | 
10 | For now, a pre-compiled `libcurl-impersonate` is downloaded from github and built
11 | into a bdist wheel, which is a binary package format used by PyPI. However, the
12 | right way is to download curl and curl-impersonate sources on our side and compile
13 | them all together.
14 | 
15 | macOS
16 | 
17 | To install the local editable build:
18 | 
19 | .. code-block:: shell
20 | 
21 |     # This is for using the libcurl-impersonate built by GitHub actions
22 | 
23 |     sudo mkdir /Users/runner
24 |     sudo chmod 777 /Users/runner
25 | 
26 |     # Dependencies
27 | 
28 |     brew install libidn2 zstd
29 | 
30 |     # Then install
31 | 
32 |     pip install -e .[test]
33 |     pip install -e .[dev]
34 | 
35 | Contributing
36 | ------------
37 | 
38 | When opening PR, please do not use the ``main`` branch in your fork, otherwise I cannot
39 | add my modification, such as unittests.
40 | 


--------------------------------------------------------------------------------
/docs/vs-requests.rst:
--------------------------------------------------------------------------------
 1 | Compatibility with requests
 2 | ***************************
 3 | 
 4 | Although we try our best to mimic the requests API, some functionality is not easy to implement and left out.
 5 | Here are a list of known incompatibilities:
 6 | 
 7 | - files API are slightly different, but more error-proof.
 8 | - retries are not supported yet, tracked in [#24](https://github.com/lexiforest/curl_cffi/issues/24)
 9 | - redirect history are not supported, tracked in [#82](https://github.com/lexiforest/curl_cffi/issues/82)
10 | - empty-domains cookies may lost during redirects, tracked in [#55](https://github.com/lexiforest/curl_cffi/issues/55)
11 | - response object can not be pickled.
12 | - The ``requests`` proxies dict is supported, but we prefer ``proxy=...``, unless you really use different proxies for http and https.
13 | - You can use use transports/adapters, instead, you can use ``curl_cffi`` as adapter for ``reuqests``.
14 | 
15 | 
16 | Transports and Adapters
17 | =======================
18 | 
19 | ``curl_cffi`` is deeply coupled with ``libcurl-impersonate``. Unlike ``requests`` or ``httpx``,
20 | There is no way to use a different networking library or mount different adapters.
21 | 
22 | Alternatively, you can use ``curl-cffi`` as a requests adapter via `curl-adapter <https://github.com/el1s7/curl-adapter>`_.
23 | In this way, you get the full functionality of requests.
24 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Report a bug in curl_cffi
 4 | title: ""
 5 | labels: bug
 6 | 
 7 | ---
 8 | 
 9 | <!--
10 | If you want to ask for why a certain site blocks you, please ask in the discussion section.
11 | 
12 | Issues are for bug reports and feature requests, documentation improvements, not how to use curl_cffi.
13 | -->
14 | 
15 | Please check the following items and answer all the questions when reporting a bug, 
16 | otherwise it will be closed immediately.
17 | 
18 | - [ ] **This is NOT a site-related "bugs"**, e.g. some site blocks me when using ``curl_cffi``,
19 |     UNLESS you have verified that the reason is imperfect impersonation.
20 | - [ ] A code snippet that can reproduce this bug will be provided, even if it's a one-liner.
21 | - [ ] Version and environment information will be pasted as below.
22 | 
23 | **Describe the bug**
24 | A clear and concise description of what the bug is.
25 | 
26 | **To Reproduce**
27 | ```py
28 | # Minimal reproducible code, like target websites, and request parameters, etc.
29 | ```
30 | 
31 | **Expected behavior**
32 | A clear and concise description of what you expected to happen.
33 | 
34 | **Versions**
35 |  - OS: [e.g. linux x64, Windows 7, macOS Sequoia]
36 |  - curl_cffi version [e.g. 0.5.7, 0.7.3]
37 |  - `pip freeze` dump
38 | 
39 | **Additional context**
40 | - Which session are you using? async or sync?
41 | - If using async session, which loop implementation are you using?
42 | - If you have tried, does this work with other http clients, e.g. `requests`, `httpx` or real browsers.
43 | 


--------------------------------------------------------------------------------
/examples/upload.py:
--------------------------------------------------------------------------------
 1 | """
 2 | We do not support requests.post(url, files=...), for 2 reasons.
 3 | 
 4 | - Curl's mime struct need to be freed manually after each request.
 5 | - requests' files parameter is quite a mess, it's just not worth it.
 6 | 
 7 | You use the multipart instead, it's very simple and straightforward.
 8 | """
 9 | 
10 | import curl_cffi
11 | 
12 | mp = curl_cffi.CurlMime()
13 | mp.addpart(
14 |     name="image",  # form field name
15 |     content_type="image/png",  # mime type
16 |     filename="image.png",  # filename seen by remote server
17 |     local_path="./image.png",  # local file to upload
18 | )
19 | 
20 | with open("./image.jpg", "rb") as file:
21 |     data = file.read()
22 | 
23 | # you can add multiple files under the same field name
24 | mp.addpart(
25 |     name="image",
26 |     content_type="image/jpg",
27 |     filename="image.jpg",
28 |     data=data,  # note the difference vs above
29 | )
30 | 
31 | # from a list
32 | mp = curl_cffi.CurlMime.from_list(
33 |     [
34 |         {
35 |             "name": "text",
36 |             "content_type": "text/plain",
37 |             "filename": "test.txt",
38 |             "local_path": "./test.txt",
39 |         },
40 |         {
41 |             "name": "foo",
42 |             "content_type": "text/plain",
43 |             "filename": "another.txt",
44 |             "data": "bar",
45 |         },
46 |     ]
47 | )
48 | 
49 | r = curl_cffi.post("https://httpbin.org/post", data={"foo": "bar"}, multipart=mp)
50 | print(r.json())
51 | 
52 | # close the form object, otherwise you have to wait for GC to recycle it. If you files
53 | # are too large, you may run out of memory quickly.
54 | mp.close()
55 | 


--------------------------------------------------------------------------------
/docs/community.rst:
--------------------------------------------------------------------------------
 1 | Community
 2 | =========
 3 | 
 4 | Scrapy integrations
 5 | ------
 6 | 
 7 | If you are using scrapy, check out these middlewares:
 8 | 
 9 | - [divtiply/scrapy-curl-cffi](https://github.com/divtiply/scrapy-curl-cffi)
10 | - [tieyongjie/scrapy-fingerprint](https://github.com/tieyongjie/scrapy-fingerprint)
11 | - [jxlil/scrapy-impersonate](https://github.com/jxlil/scrapy-impersonate)
12 | 
13 | 
14 | Using with eventlet/gevent
15 | ------
16 | 
17 | Just set ``thread`` to eventlet or gevent.
18 | 
19 | .. code-block:: python
20 | 
21 |    from curl_cffi import requests
22 | 
23 |    s = requests.Session(thread="eventlet")
24 |    s.get(url)
25 | 
26 | 
27 | As a urllib3/requests adapter
28 | ------
29 | 
30 | You can also use curl-cffi as a requests adapter via `curl-adapter <https://github.com/el1s7/curl-adapter>`_.
31 | In this way, you get the full functionality of requests.
32 | 
33 | .. code-block:: python
34 | 
35 |    import requests
36 |    from curl_adapter import CurlCffiAdapter
37 | 
38 |    session = requests.Session()
39 |    session.mount("http://", CurlCffiAdapter())
40 |    session.mount("https://", CurlCffiAdapter())
41 | 
42 |    # just use requests session like you normally would
43 |    session.get("https://example.com")
44 | 
45 | 
46 | As a httpx transport
47 | ------
48 | 
49 | You can also use curl-cffi as a httpx transport via `httpx-curl-cffi <https://github.com/vgavro/httpx-curl-cffi>`_.
50 | With this, you get the full functionality of httpx.
51 | 
52 | .. code-block:: python
53 | 
54 |    from httpx import Client, AsyncClient
55 |    from httpx_curl_cffi import CurlTransport, AsyncCurlTransport, CurlOpt
56 | 
57 |    client = Client(transport=CurlTransport(impersonate="chrome", default_headers=True))
58 |    client.get("https://tools.scrapfly.io/api/fp/ja3")
59 | 
60 |    async_client = AsyncClient(transport=AsyncCurlTransport(
61 |        impersonate="chrome",
62 |        default_headers=True,
63 |        # required for parallel requests, see curl_cffi issues below
64 |        curl_options={CurlOpt.FRESH_CONNECT: True}
65 |    ))
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/docs/impersonate/fingerprint.rst:
--------------------------------------------------------------------------------
 1 | What is TLS and http/2, http/3 fingerprinting?
 2 | ----------------------------------------------
 3 | 
 4 | TLS and http/2
 5 | ~~~~~~~~~~~~~~
 6 | 
 7 | TLS is the ``s`` in ``https``. ``https`` has been uniformly deployed across the world.
 8 | There are many extension and cipher suites a implementation can choose to use. According to
 9 | the RFC, there are many valid combinations. But in reality, browser vendors tend to use
10 | fixed combinations, and these combinations can be used identify if the request is from a
11 | certain browser or an automated script. The digest of this combination is called a TLS
12 | fingerprints. The most common digesting method is called `JA3`.
13 | 
14 | Similar to TLS, there are a few settings in http/2 connection can be used to identify the
15 | source of a request. The most commonly used digesting method is proposed by Akamai, and called
16 | the Akamai http2 fingerprint.
17 | 
18 | To learn the details of TLS and http2 fingerprinting, you can read these great articles from lwthiker:
19 | 
20 | 1. https://lwthiker.com/networks/2022/06/17/tls-fingerprinting.html
21 | 2. https://lwthiker.com/networks/2022/06/17/http2-fingerprinting.html
22 | 
23 | The format of JA3 and Akamai digest is briefly discussed below.
24 | 
25 | http/3
26 | ~~~~~~
27 | 
28 | As of http/3, the newest version of http. Basically, it's http/2 reimplemented over QUIC,
29 | thus it can be fingerprinted in a similar way with http/2.
30 | 
31 | Http3 fingerprints has not yet been publicly exploited and reported. But given the rapidly increasing
32 | marketshare of http/3(35% of internet traffic), it is expected that some strict WAF vendors have begun
33 | to utilize http/3 fingerprinting.
34 | 
35 | It has also been noticed by many users, that, for a lot of sites, there is less or even none
36 | detection when using http/3.
37 | 
38 | ``curl_cffi`` provides TLS and http/2 impersonation in the open source version.
39 | 
40 | For http/3 impersonation and http/3 proxy support, please head over to `impersonate.pro <https://impersonate.pro>`_
41 | for the commercial version of ``curl_cffi``.
42 | 
43 | 


--------------------------------------------------------------------------------
/curl_cffi/__init__.py:
--------------------------------------------------------------------------------
 1 | __all__ = [
 2 |     "Curl",
 3 |     "AsyncCurl",
 4 |     "CurlMime",
 5 |     "CurlError",
 6 |     "CurlInfo",
 7 |     "CurlOpt",
 8 |     "CurlMOpt",
 9 |     "CurlECode",
10 |     "CurlHttpVersion",
11 |     "CurlSslVersion",
12 |     "CurlWsFlag",
13 |     "config_warnings",
14 |     "ffi",
15 |     "is_pro",
16 |     "lib",
17 |     "Session",
18 |     "AsyncSession",
19 |     "BrowserType",
20 |     "BrowserTypeLiteral",
21 |     "request",
22 |     "head",
23 |     "get",
24 |     "post",
25 |     "put",
26 |     "patch",
27 |     "delete",
28 |     "options",
29 |     "Cookies",
30 |     "Headers",
31 |     "Request",
32 |     "Response",
33 |     "AsyncWebSocket",
34 |     "WebSocket",
35 |     "WebSocketError",
36 |     "WebSocketClosed",
37 |     "WebSocketTimeout",
38 |     "WsCloseCode",
39 |     "ExtraFingerprints",
40 |     "CookieTypes",
41 |     "HeaderTypes",
42 |     "ProxySpec",
43 |     "exceptions",
44 | ]
45 | 
46 | import _cffi_backend  # noqa: F401  # required by _wrapper
47 | 
48 | from .__version__ import __curl_version__, __description__, __title__, __version__  # noqa: F401
49 | 
50 | # This line includes _wrapper.so into the wheel
51 | from ._wrapper import ffi, lib
52 | from .aio import AsyncCurl
53 | from .const import (
54 |     CurlECode,
55 |     CurlHttpVersion,
56 |     CurlInfo,
57 |     CurlMOpt,
58 |     CurlOpt,
59 |     CurlSslVersion,
60 |     CurlWsFlag,
61 | )
62 | from .curl import Curl, CurlError, CurlMime
63 | 
64 | from .requests import (
65 |     AsyncSession,
66 |     AsyncWebSocket,
67 |     BrowserType,
68 |     BrowserTypeLiteral,
69 |     Cookies,
70 |     CookieTypes,
71 |     ExtraFingerprints,
72 |     Headers,
73 |     HeaderTypes,
74 |     ProxySpec,
75 |     Request,
76 |     Response,
77 |     Session,
78 |     WebSocket,
79 |     WebSocketClosed,
80 |     WebSocketError,
81 |     WebSocketTimeout,
82 |     WsCloseCode,
83 |     delete,
84 |     exceptions,
85 |     get,
86 |     head,
87 |     options,
88 |     patch,
89 |     post,
90 |     put,
91 |     request,
92 | )
93 | 
94 | from .utils import config_warnings, is_pro
95 | 
96 | config_warnings(on=False)
97 | 


--------------------------------------------------------------------------------
/examples/impersonate.py:
--------------------------------------------------------------------------------
 1 | import curl_cffi
 2 | 
 3 | # OKHTTP impersonatation examples
 4 | # credits: https://github.com/bogdanfinn/tls-client/blob/master/profiles/contributed_custom_profiles.go
 5 | 
 6 | url = "https://tls.browserleaks.com/json"
 7 | 
 8 | okhttp4_android10_ja3 = ",".join(
 9 |     [
10 |         "771",
11 |         "4865-4866-4867-49195-49196-52393-49199-49200-52392-49171-49172-156-157-47-53",
12 |         "0-23-65281-10-11-35-16-5-13-51-45-43-21",
13 |         "29-23-24",
14 |         "0",
15 |     ]
16 | )
17 | 
18 | okhttp4_android10_akamai = "4:16777216|16711681|0|m,p,a,s"
19 | 
20 | extra_fp = {
21 |     "tls_signature_algorithms": [
22 |         "ecdsa_secp256r1_sha256",
23 |         "rsa_pss_rsae_sha256",
24 |         "rsa_pkcs1_sha256",
25 |         "ecdsa_secp384r1_sha384",
26 |         "rsa_pss_rsae_sha384",
27 |         "rsa_pkcs1_sha384",
28 |         "rsa_pss_rsae_sha512",
29 |         "rsa_pkcs1_sha512",
30 |         "rsa_pkcs1_sha1",
31 |     ]
32 |     # other options:
33 |     # tls_min_version: int = CurlSslVersion.TLSv1_2
34 |     # tls_grease: bool = False
35 |     # tls_permute_extensions: bool = False
36 |     # tls_cert_compression: Literal["zlib", "brotli"] = "brotli"
37 |     # tls_signature_algorithms: Optional[List[str]] = None
38 |     # http2_stream_weight: int = 256
39 |     # http2_stream_exclusive: int = 1
40 |     # See requests/impersonate.py and tests/unittest/test_impersonate.py for more
41 |     # examples
42 | }
43 | 
44 | 
45 | r = curl_cffi.get(
46 |     url, ja3=okhttp4_android10_ja3, akamai=okhttp4_android10_akamai, extra_fp=extra_fp
47 | )
48 | 
49 | print(r.json())
50 | 
51 | 
52 | # Special firefox extension
53 | 
54 | 
55 | # ruff: noqa: E501
56 | extra_fp = {
57 |     "tls_delegated_credential": "ecdsa_secp256r1_sha256:ecdsa_secp384r1_sha384:ecdsa_secp521r1_sha512:ecdsa_sha1",
58 |     "tls_record_size_limit": 4001,
59 | }
60 | 
61 | # Note that the ja3 string also includes extensiion: 28 and 34
62 | # ruff: noqa: E501
63 | ja3 = "771,4865-4867-4866-49195-49199-52393-52392-49196-49200-49162-49161-49171-49172-156-157-47-53,0-23-65281-10-11-35-16-5-34-18-51-43-13-45-28-27-65037,4588-29-23-24-25-256-257,0"
64 | 
65 | r = curl_cffi.get(url, ja3=ja3, extra_fp=extra_fp)
66 | print(r.json())
67 | 


--------------------------------------------------------------------------------
/tests/unittest/test_headers.py:
--------------------------------------------------------------------------------
 1 | from curl_cffi.requests import Headers
 2 | from curl_cffi.requests.utils import update_header_line
 3 | 
 4 | 
 5 | def test_headers():
 6 |     headers = Headers()
 7 |     headers["foo"] = "bar"
 8 |     headers["foo"] = "baz"
 9 |     assert headers["foo"] == "baz"
10 |     assert headers.get("foo") == "baz"
11 |     assert headers.get("bar") is None
12 |     assert headers
13 | 
14 | 
15 | def test_headers_none_value():
16 |     headers = Headers({"foo": None, "bar": ""})
17 |     assert headers.get("foo") is None
18 |     assert headers["bar"] == ""
19 | 
20 | 
21 | def test_header_output():
22 |     headers = Headers({"X-Foo": "bar"})
23 |     header_list = headers.multi_items()
24 |     assert header_list[0][0] == "X-Foo"
25 | 
26 | 
27 | def test_replace_header():
28 |     header_lines = []
29 |     update_header_line(header_lines, "content-type", "image/png")
30 |     assert header_lines == ["content-type: image/png"]
31 |     update_header_line(header_lines, "Content-Type", "application/json")
32 |     assert header_lines == ["content-type: image/png"]
33 |     update_header_line(header_lines, "Content-Type", "application/json", replace=True)
34 |     assert header_lines == ["Content-Type: application/json"]
35 |     update_header_line(header_lines, "Host", "example.com", replace=True)
36 |     assert header_lines == ["Content-Type: application/json", "Host: example.com"]
37 | 
38 | 
39 | def test_none_headers():
40 |     """Allow using None to explictly remove headers"""
41 |     headers = Headers({"Content-Type": None})
42 |     assert headers["content-type"] is None
43 | 
44 | 
45 | def test_wrapped_headers_preserve_encoding():
46 |     headers = Headers({"foo": "bar"}, encoding="utf-8")
47 |     wrapped_headers = Headers(headers)
48 |     assert wrapped_headers.encoding == "utf-8"
49 | 
50 | 
51 | def test_wrapped_empty_headers_preserve_encoding():
52 |     headers = Headers({}, encoding="utf-8")
53 |     wrapped_headers = Headers(headers)
54 |     assert wrapped_headers.encoding == "utf-8"
55 | 
56 | 
57 | def test_wrapped_headers_change_encoding():
58 |     headers = Headers({"foo": "bar"}, encoding="utf-8")
59 |     wrapped_headers = Headers(headers, encoding="ascii")
60 |     assert wrapped_headers.encoding == "ascii"
61 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .ONESHELL:
 2 | SHELL := bash
 3 | 
 4 | # this is the upstream libcurl-impersonate version
 5 | VERSION := 1.2.5
 6 | CURL_VERSION := curl-8_15_0
 7 | 
 8 | $(CURL_VERSION):
 9 | 	curl -L https://github.com/curl/curl/archive/$(CURL_VERSION).zip -o curl.zip
10 | 	unzip -q -o curl.zip
11 | 	mv curl-$(CURL_VERSION) $(CURL_VERSION)
12 | 
13 | curl-impersonate-$(VERSION)/patches: $(CURL_VERSION)
14 | 	curl -L "https://github.com/lexiforest/curl-impersonate/archive/refs/tags/v$(VERSION).tar.gz" \
15 | 		-o "curl-impersonate-$(VERSION).tar.gz"
16 | 	tar -xf curl-impersonate-$(VERSION).tar.gz
17 | 
18 | .preprocessed: curl-impersonate-$(VERSION)/patches
19 | 	cd $(CURL_VERSION)
20 | 	# for p in $</curl*.patch; do patch -p1 < ../$$p; done
21 | 	patch -p1 < ../$</curl.patch
22 | 	# Re-generate the configure script
23 | 	autoreconf -fi
24 | 	mkdir -p ../include/curl
25 | 	cp -R include/curl/* ../include/curl/
26 | 	# Sentinel files: https://tech.davis-hansson.com/p/make/
27 | 	cd ..
28 | 	touch .preprocessed
29 | 
30 | local-curl: $(CURL_VERSION)
31 | 	cp /usr/local/lib/libcurl-impersonate* /Users/runner/work/_temp/install/lib/
32 | 	cd $(CURL_VERSION)
33 | 	for p in ../curl-impersonate/patches/curl*.patch; do patch -p1 < ../$$p; done
34 | 	# Re-generate the configure script
35 | 	autoreconf -fi
36 | 	mkdir -p ../include/curl
37 | 	cp -R include/curl/* ../include/curl/
38 | 	# Sentinel files: https://tech.davis-hansson.com/p/make/
39 | 	touch .preprocessed
40 | 
41 | gen-const:
42 | 	python scripts/generate_consts.py $(CURL_VERSION)
43 | 
44 | preprocess: .preprocessed
45 | 	@echo generating patched libcurl header files
46 | 
47 | install-editable:
48 | 	pip install -e .
49 | 
50 | build: .preprocessed
51 | 	rm -rf dist/
52 | 	pip install build
53 | 	python -m build --wheel
54 | 
55 | lint:
56 | 	ruff check --exclude issues
57 | 
58 | format:
59 | 	ruff format --exclude issues
60 | 
61 | test:
62 | 	python -bb -m pytest tests/unittest
63 | 
64 | clean:
65 | 	rm -rf build/ dist/ curl_cffi.egg-info/ $(CURL_VERSION)/ curl-impersonate-$(VERSION)/
66 | 	rm -rf curl_cffi/*.o curl_cffi/*.so curl_cffi/_wrapper.c
67 | 	rm -rf .preprocessed $(CURL_VERSION).tar.xz curl-impersonate-$(VERSION).tar.gz
68 | 	rm -rf include/
69 | 
70 | .PHONY: clean build test install-editable preprocess gen-const
71 | 


--------------------------------------------------------------------------------
/docs/cookies.rst:
--------------------------------------------------------------------------------
 1 | Cookies
 2 | =======
 3 | 
 4 | Save and load cookies
 5 | ------
 6 | 
 7 | Do not use ``get_dict`` to dump and load cookies. Cookies are more than just plain
 8 | key-value pairs.
 9 | 
10 | Using pickle:
11 | 
12 | .. code-block:: python
13 | 
14 |     # example from: https://github.com/encode/httpx/issues/895
15 |     import pickle
16 |     # import httpx
17 |     import curl_cffi
18 | 
19 |     def save_cookies(client):
20 |         with open("cookies.pk", "wb") as f:
21 |             pickle.dump(client.cookies.jar._cookies, f)
22 | 
23 |     def load_cookies():
24 |         if not os.path.isfile("cookies.pk"):
25 |             return None
26 |         with open("cookies.pk", "rb") as f:
27 |             return pickle.load(f)
28 | 
29 |     # client = httpx.Client(cookies=load_cookies())
30 |     client = curl_cffi.Session()
31 |     client.get("https://httpbin.org/cookies/set/foo/bar")
32 |     save_cookies(client)
33 | 
34 |     client = curl_cffi.Session()
35 |     client.cookies.jar._cookies.update(load_cookies())
36 |     print(client.cookies.get("foo"))
37 | 
38 | 
39 | Using mozilla cookie jar:
40 | 
41 | See: https://github.com/lexiforest/curl_cffi/issues/381
42 | 
43 | TODO: expose libcurl's native cookies.txt support.
44 | 
45 | 
46 | Discard cookies when using Session
47 | ----------------------------------
48 | 
49 | You may need to discard cookies when using sessions, especially when using ``AsyncSession``.
50 | 
51 | Use the ``discard_cookies`` option.
52 | 
53 | 
54 | .. code-block:: python
55 | 
56 |     s = requests.Session()
57 | 
58 |     set_url = "https://httpbin.org/cookies"
59 | 
60 |     r = s.get(set_url)
61 |     assert r.cookies["foo"] == s.cookies["foo"]
62 |     old_cookie = r.cookies["foo"]
63 | 
64 |     # Let's start discarding cookies
65 |     s.discard_cookies = True
66 |     r = s.get(set_url)
67 |     assert r.cookies["foo"] != s.cookies["foo"]
68 |     assert old_cookie == s.cookies["foo"]
69 | 
70 |     # The behavior can be reverted
71 |     s.discard_cookies = False
72 |     r = s.get(set_url)
73 |     assert r.cookies["foo"] == s.cookies["foo"]
74 |     old_cookie = r.cookies["foo"]
75 | 
76 |     # Also works as request parameter
77 |     r = s.get(set_url, discard_cookies=True)
78 |     assert r.cookies["foo"] != s.cookies["foo"]
79 |     assert old_cookie == s.cookies["foo"]
80 | 


--------------------------------------------------------------------------------
/tests/unittest/test_cookies.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from curl_cffi.requests.cookies import Cookies, CurlMorsel
 4 | from curl_cffi.requests.errors import CookieConflict, RequestsError
 5 | 
 6 | 
 7 | def test_cookies_conflict():
 8 |     c = Cookies()
 9 |     c.set("foo", "bar", domain="example.com")
10 |     c.set("foo", "baz", domain="test.local")
11 |     with pytest.raises(CookieConflict):
12 |         c.get("foo")
13 | 
14 | 
15 | def test_cookies_conflict_on_subdomain():
16 |     c = Cookies()
17 |     c.set("foo", "bar", domain=".example.com")
18 |     c.set("foo", "baz", domain="a.example.com")
19 |     assert c.get("foo") in ("bar", "baz")
20 | 
21 | 
22 | def test_cookies_conflict_but_same():
23 |     c = Cookies()
24 |     c = Cookies()
25 |     c.set("foo", "bar", domain="example.com")
26 |     c.set("foo", "bar", domain="test.local")
27 |     assert c.get("foo") == "bar"
28 | 
29 | 
30 | def test_curl_format_with_hostname():
31 |     m = CurlMorsel(name="foo", value="bar", hostname="example.com")
32 |     assert m.to_curl_format() == "example.com\tFALSE\t/\tFALSE\t0\tfoo\tbar"
33 |     m = CurlMorsel(name="foo", value="bar", hostname="example.com", secure=True)
34 |     assert m.to_curl_format() == "example.com\tFALSE\t/\tTRUE\t0\tfoo\tbar"
35 |     m = CurlMorsel(name="foo", value="bar", hostname="example.com", path="/path")
36 |     assert m.to_curl_format() == "example.com\tFALSE\t/path\tFALSE\t0\tfoo\tbar"
37 | 
38 | 
39 | def test_curl_format_without_hostname():
40 |     m = CurlMorsel(name="foo", value="bar")
41 |     with pytest.raises(RequestsError):
42 |         m.to_curl_format()
43 | 
44 | 
45 | def test_get_dict():
46 |     c = Cookies({"foo": "bar"})
47 |     d = c.get_dict()
48 |     assert d == {"foo": "bar"}
49 | 
50 |     c = Cookies({"foo": "bar", "hello": "world", "a": "b"})
51 |     d = c.get_dict()
52 |     assert len(d) == 3
53 |     assert d["foo"] == "bar"
54 |     assert d["hello"] == "world"
55 |     assert d["a"] == "b"
56 | 
57 |     c = Cookies()
58 |     c.set("foo", "bar", domain="example.com")
59 |     c.set("hello", "world", domain="example.com")
60 |     c.set("foo", "bar", domain="test.local")
61 |     d_example = c.get_dict("example.com")
62 |     d_test = c.get_dict("test.local")
63 |     assert len(d_example) == 2
64 |     assert d_example["foo"] == "bar"
65 |     assert d_example["hello"] == "world"
66 |     assert len(d_test) == 1
67 |     assert d_test["foo"] == "bar"
68 | 


--------------------------------------------------------------------------------
/tests/integration/test_httpbin.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from curl_cffi import CurlHttpVersion, CurlOpt, requests
 4 | 
 5 | #######################################################################################
 6 | # testing httpbin
 7 | #######################################################################################
 8 | 
 9 | 
10 | def test_gzip():
11 |     r = requests.get("https://httpbin.org/gzip")
12 |     assert r.status_code == 200
13 |     assert r.json()["gzipped"]
14 | 
15 | 
16 | def test_brotli():
17 |     r = requests.get("https://httpbin.org/brotli")
18 |     assert r.status_code == 200
19 |     assert r.json()["brotli"]
20 | 
21 | 
22 | def test_redirect_n():
23 |     r = requests.get("https://httpbin.org/absolute-redirect/3")
24 |     assert r.status_code == 200
25 | 
26 | 
27 | def test_relative_redirect_n():
28 |     r = requests.get("https://httpbin.org/relative-redirect/3")
29 |     assert r.status_code == 200
30 | 
31 | 
32 | def test_redirect_max():
33 |     with pytest.raises(requests.RequestsError, match="Failed"):
34 |         requests.get("https://httpbin.org/redirect/5", max_redirects=3)
35 | 
36 | 
37 | def test_imperonsate_default_headers():
38 |     r = requests.get("http://postman-echo.com/headers", impersonate="chrome110")
39 |     headers = r.json()
40 |     assert "Mozilla" in headers["headers"]["user-agent"]
41 |     r = requests.get(
42 |         "http://httpbin.org/headers",
43 |         impersonate="chrome110",
44 |         default_headers=False,
45 |     )
46 |     headers = r.json()
47 |     assert "user-agent" not in headers["headers"]
48 | 
49 | 
50 | def test_curl_options():
51 |     url = "https://postman-echo.com/headers"
52 |     r = requests.get(url, impersonate="chrome110", default_headers=False)
53 |     headers = r.json()
54 |     print(headers)
55 |     assert r.http_version == CurlHttpVersion.V2_0
56 |     r = requests.get(
57 |         url,
58 |         curl_options={CurlOpt.HTTP_VERSION: CurlHttpVersion.V1_1},
59 |         impersonate="chrome110",
60 |         default_headers=False,
61 |     )
62 |     headers = r.json()
63 |     assert r.http_version == CurlHttpVersion.V1_1
64 |     print(headers)
65 | 
66 | 
67 | def test_http_version():
68 |     url = "https://postman-echo.com/headers"
69 |     r = requests.get(url, impersonate="chrome110", http_version=CurlHttpVersion.V1_1)
70 |     headers = r.json()
71 |     assert r.http_version == CurlHttpVersion.V1_1
72 |     print(headers)
73 | 


--------------------------------------------------------------------------------
/docs/impersonate/targets.rst:
--------------------------------------------------------------------------------
 1 | Supported browser impersonate targets
 2 | -------------------------------------
 3 | 
 4 | ``curl_cffi`` supports the same browser versions as supported by our `fork of curl-impersonate <https://github.com/lexiforest/curl-impersonate>`_.
 5 | 
 6 | Browser versions will be added **only** when their fingerprints change. If you see a version, e.g.
 7 | ``chrome122``, was skipped, you can simply impersonate it with your own headers and the previous version.
 8 | 
 9 | If you are too busy to look up those details, you can try our commercial version at `impersonate.pro <https://impersonate.pro>`_,
10 | which has a weekly updated list of browser profiles and even more browser types.
11 | 
12 | If you are trying to impersonate a target other than a browser, use ``ja3=...``, ``akamai=...`` and ``extra_fp=...``
13 | to specify your own customized fingerprints. See below for details.
14 | 
15 | - chrome99
16 | - chrome100
17 | - chrome101
18 | - chrome104
19 | - chrome107
20 | - chrome110
21 | - chrome116 :sup:`1`
22 | - chrome119 :sup:`1`
23 | - chrome120 :sup:`1`
24 | - chrome123 :sup:`3`
25 | - chrome124 :sup:`3`
26 | - chrome131 :sup:`4`
27 | - chrome133a :sup:`5` :sup:`6`
28 | - chrome136 :sup:`7`
29 | - chrome99_android
30 | - chrome131_android :sup:`4`
31 | - edge99
32 | - edge101
33 | - safari153 :sup:`2`
34 | - safari155 :sup:`2`
35 | - safari170 :sup:`1`
36 | - safari172_ios :sup:`1`
37 | - safari180 :sup:`4`
38 | - safari180_ios :sup:`4`
39 | - safari184 :sup:`7`
40 | - safari184_ios :sup:`7`
41 | - safari260 :sup:`8`
42 | - safari260_ios :sup:`8`
43 | - firefox133 :sup:`5`
44 | - tor145 :sup:`7`
45 | 
46 | Notes:
47 | 
48 | 1. Added in version ``0.6.0``.
49 | 2. Fixed in version ``0.6.0``, previous http2 fingerprints were `not correct <https://github.com/lwthiker/curl-impersonate/issues/215>`_.
50 | 3. Added in version ``0.7.0``.
51 | 4. Added in version ``0.8.0``.
52 | 5. Added in version ``0.9.0``.
53 | 6. The version postfix ``-a``(e.g. ``chrome133a``) means that this is an alternative version, i.e. the fingerprint has not been officially updated by browser, but has been observed because of A/B testing.
54 | 7. Added in version ``0.11.0``
55 | 8. Added in version ``0.12.0``
56 | 
57 | Which target version to use?
58 | ----------------------------
59 | 
60 | Generally speaking, you should use the latest Chrome or Safari versions. As of v0.11, they're
61 | ``chrome136``, ``safari184`` and ``safari184_ios``. To always impersonate the latest available
62 | browser versions, you can simply use ``chrome``, ``firefox``, ``safari`` and ``chrome_android``, ``safari_ios``.
63 | 
64 | .. code-block:: python
65 | 
66 |     import curl_cffi
67 | 
68 |     curl_cffi.get(url, impersonate="chrome")
69 | 
70 | 
71 | Tips:
72 | 
73 | iOS has restrictions on WebView and TLS libs, so ``safari_*_ios`` should work for a lot of apps.
74 | If you encountered an android app with custom fingerprints, you can try the ``safari_ios``
75 | fingerprints, given that this app should have an iOS version.
76 | 


--------------------------------------------------------------------------------
/benchmark/ws_bench_1_server.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Websocket server example - TLS (WSS)
  4 | """
  5 | 
  6 | from asyncio import (
  7 |     FIRST_COMPLETED,
  8 |     AbstractEventLoop,
  9 |     CancelledError,
 10 |     Task,
 11 |     get_running_loop,
 12 |     wait,
 13 | )
 14 | 
 15 | from aiohttp import web
 16 | from ws_bench_utils import binary_data_generator, config, get_loop, logger
 17 | 
 18 | 
 19 | async def recv(ws: web.WebSocketResponse) -> None:
 20 |     """Just receive the data in a tight loop. Do nothing else.
 21 | 
 22 |     Args:
 23 |         ws (web.WebSocketResponse): The WebSocket server object.
 24 |     """
 25 |     async for msg in ws:
 26 |         if msg.type == web.WSMsgType.BINARY:
 27 |             continue
 28 | 
 29 |         if msg.type == web.WSMsgType.ERROR:
 30 |             break
 31 | 
 32 | 
 33 | async def send(ws: web.WebSocketResponse) -> None:
 34 |     """Send the generated chunks until total size is hit.
 35 | 
 36 |     Args:
 37 |         ws (web.WebSocketResponse): The WebSocket server object.
 38 |     """
 39 |     try:
 40 |         async for binary_data in binary_data_generator(
 41 |             config.total_gb, config.chunk_size
 42 |         ):
 43 |             await ws.send_bytes(binary_data)
 44 |     except ConnectionError as exc:
 45 |         logger.warning(exc)
 46 | 
 47 | 
 48 | async def ws_handler(request: web.Request) -> web.WebSocketResponse:
 49 |     """Handle the connection and run the relevant server action, send/recv/both.
 50 | 
 51 |     Args:
 52 |         request (web.Request): Web request object.
 53 | 
 54 |     Returns:
 55 |         web.WebSocketResponse: Response object
 56 |     """
 57 |     loop: AbstractEventLoop = get_running_loop()
 58 |     waiters: set[Task[None]] = set()
 59 |     ws: web.WebSocketResponse = web.WebSocketResponse()
 60 |     _ = await ws.prepare(request)
 61 |     logger.info("Secure client connected.")
 62 | 
 63 |     try:
 64 |         # NOTE: Uncomment for send/recv or both concurrently
 65 |         # waiters.add(loop.create_task(recv(ws)))
 66 |         waiters.add(loop.create_task(send(ws)))
 67 | 
 68 |         _, _ = await wait(waiters, return_when=FIRST_COMPLETED)
 69 | 
 70 |     except Exception:
 71 |         logger.exception("Connection closed with exception")
 72 | 
 73 |     finally:
 74 |         for wait_task in waiters:
 75 |             try:
 76 |                 if not wait_task.done():
 77 |                     _ = wait_task.cancel()
 78 |                     await wait_task
 79 |             except CancelledError:
 80 |                 ...
 81 | 
 82 |     logger.info("Client disconnected.")
 83 |     return ws
 84 | 
 85 | 
 86 | def main() -> None:
 87 |     """Entrypoint"""
 88 | 
 89 |     # Create and start the aiohttp server
 90 |     app: web.Application = web.Application()
 91 |     _ = app.add_routes(routes=[web.get("/ws", ws_handler)])
 92 |     logger.info("Starting server on %s", config.srv_path)
 93 |     web.run_app(
 94 |         app,
 95 |         host=config.srv_host.exploded,
 96 |         port=config.srv_port,
 97 |         loop=get_loop(),
 98 |         ssl_context=config.ssl_ctx,
 99 |         access_log=logger,
100 |         print=logger.debug,
101 |     )
102 | 
103 | 
104 | if __name__ == "__main__":
105 |     main()
106 | 


--------------------------------------------------------------------------------
/ffi/cdef.c:
--------------------------------------------------------------------------------
 1 | // easy interfaces
 2 | void *curl_easy_init();
 3 | int _curl_easy_setopt(void *curl, int option, void *param);
 4 | int curl_easy_getinfo(void *curl, int option, void *ret);
 5 | int curl_easy_perform(void *curl);
 6 | void curl_easy_cleanup(void *curl);
 7 | void curl_easy_reset(void *curl);
 8 | int curl_easy_impersonate(void *curl, char *target, int default_headers);
 9 | void *curl_easy_duphandle(void *curl);
10 | int curl_easy_upkeep(void *curl);
11 | 
12 | char *curl_version();
13 | 
14 | // slist interfaces
15 | struct curl_slist {
16 |    char *data;
17 |    struct curl_slist *next;
18 | };
19 | struct curl_slist *curl_slist_append(struct curl_slist *list, char *string);
20 | void curl_slist_free_all(struct curl_slist *list);
21 | 
22 | // callbacks
23 | extern "Python" size_t buffer_callback(void *ptr, size_t size, size_t nmemb, void *userdata);
24 | extern "Python" size_t write_callback(void *ptr, size_t size, size_t nmemb, void *userdata);
25 | extern "Python" int debug_function(void *curl, int type, char *data, size_t size, void *clientp);
26 | 
27 | // multi interfaces
28 | struct CURLMsg {
29 |    int msg;       /* what this message means */
30 |    void *easy_handle; /* the handle it concerns */
31 |    union {
32 |      void *whatever;    /* message-specific data */
33 |      int result;   /* return code for transfer */
34 |    } data;
35 | };
36 | void *curl_multi_init();
37 | int curl_multi_cleanup(void *curlm);
38 | int curl_multi_add_handle(void *curlm, void *curl);
39 | int curl_multi_remove_handle(void *curlm, void *curl);
40 | int curl_multi_socket_action(void *curlm, int sockfd, int ev_bitmask, int *running_handle);
41 | int curl_multi_setopt(void *curlm, int option, void* param);
42 | int curl_multi_assign(void *curlm, int sockfd, void *sockptr);
43 | int curl_multi_perform(void *curlm, int *running_handle);
44 | int curl_multi_timeout(void *curlm, long *timeout_ms);
45 | int curl_multi_wait(void *curlm, void *extra_fds, unsigned int extra_nfds, int timeout_ms, int *numfds);
46 | int curl_multi_poll(void *curlm, void *extra_fds, unsigned int extra_nfds, int timeout_ms, int *numfds);
47 | int curl_multi_wakeup(void *curlm);
48 | const char *curl_multi_strerror(int code);
49 | struct CURLMsg *curl_multi_info_read(void* curlm, int *msg_in_queue);
50 | 
51 | // multi callbacks
52 | extern "Python" int socket_function(void *curl, int sockfd, int what, void *clientp, void *socketp);
53 | extern "Python" int timer_function(void *curlm, int timeout_ms, void *clientp);
54 | 
55 | // websocket
56 | struct curl_ws_frame {
57 |   int age;              /* zero */
58 |   int flags;            /* See the CURLWS_* defines */
59 |   uint64_t offset;    /* the offset of this data into the frame */
60 |   uint64_t bytesleft; /* number of pending bytes left of the payload */
61 |   size_t len;
62 |   ...;
63 | };
64 | 
65 | int curl_ws_recv(void *curl, void *buffer, size_t buflen, size_t *recv, const struct curl_ws_frame **meta);
66 | int curl_ws_send(void *curl, const void *buffer, size_t buflen, size_t *sent, int fragsize, unsigned int sendflags);
67 | 
68 | // mime
69 | void *curl_mime_init(void* curl);  // -> form
70 | void *curl_mime_addpart(void *form);  // -> part/field
71 | int curl_mime_name(void *field, char *name);
72 | int curl_mime_data(void *field, char *name, int datasize);
73 | int curl_mime_type(void *field, char *type);
74 | int curl_mime_filename(void *field, char *filename);
75 | int curl_mime_filedata(void *field, char *filename);
76 | void curl_mime_free(void *form);
77 | 


--------------------------------------------------------------------------------
/tests/unittest/test_websockets.py:
--------------------------------------------------------------------------------
  1 | from curl_cffi.requests import AsyncSession, WebSocket, Session
  2 | from curl_cffi.requests.websockets import CurlWsFlag
  3 | 
  4 | 
  5 | def test_websocket(ws_server):
  6 |     ws = WebSocket()
  7 |     ws.connect(ws_server.url)
  8 | 
  9 |     # deprecated
 10 |     with Session() as s:
 11 |         s.ws_connect(ws_server.url)
 12 | 
 13 | 
 14 | def test_hello(ws_server):
 15 |     ws = WebSocket()
 16 |     ws.connect(ws_server.url)
 17 |     ws.send(b"Foo me once")
 18 |     content, _ = ws.recv()
 19 |     assert content == b"Foo me once"
 20 | 
 21 |     # deprecated
 22 |     with Session() as s:
 23 |         ws = s.ws_connect(ws_server.url)
 24 |         ws.send(b"Foo me once")
 25 |         content, _ = ws.recv()
 26 |         assert content == b"Foo me once"
 27 | 
 28 | 
 29 | def test_hello_twice(ws_server):
 30 |     ws = WebSocket()
 31 |     ws.connect(ws_server.url)
 32 | 
 33 |     ws.send(b"Bar")
 34 |     reply, _ = ws.recv()
 35 | 
 36 |     for _ in range(10):
 37 |         ws.send_str("Bar")
 38 |         reply = ws.recv_str()
 39 |         assert reply == "Bar"
 40 | 
 41 |     with Session() as s:
 42 |         ws = s.ws_connect(ws_server.url)
 43 |         ws.send(b"Foo me once")
 44 |         content, _ = ws.recv()
 45 |         assert content == b"Foo me once"
 46 | 
 47 | 
 48 | def test_receive_large_messages(ws_server):
 49 |     ws = WebSocket()
 50 |     ws.connect(ws_server.url)
 51 |     for _ in range(10):
 52 |         ws.send("*" * 10000)
 53 |     for _ in range(10):
 54 |         buffer, _ = ws.recv()
 55 |         assert len(buffer) == 10000
 56 |     ws.close()
 57 | 
 58 | 
 59 | def test_receive_large_messages_run_forever(ws_server):
 60 |     def on_open(ws: WebSocket):
 61 |         ws.send("*" * 10000)
 62 | 
 63 |     chunk_counter = 0
 64 | 
 65 |     def on_data(ws: WebSocket, data, frame):
 66 |         nonlocal chunk_counter
 67 |         if frame.flags & CurlWsFlag.CLOSE:
 68 |             return
 69 |         chunk_counter += 1
 70 | 
 71 |     message = ""
 72 | 
 73 |     def on_message(ws: WebSocket, msg):
 74 |         nonlocal message
 75 |         message = msg
 76 |         # Gracefully close the connection to exit the run_forever loop
 77 |         ws.send("", CurlWsFlag.CLOSE)
 78 | 
 79 |     ws = WebSocket(
 80 |         on_open=on_open,
 81 |         on_data=on_data,
 82 |         on_message=on_message,
 83 |     )
 84 |     ws.run_forever(ws_server.url)
 85 | 
 86 |     assert chunk_counter >= 1
 87 |     assert len(message) == 10000
 88 | 
 89 | 
 90 | def test_on_data_callback(ws_server):
 91 |     on_data_called = False
 92 | 
 93 |     def on_data(ws: WebSocket, data, frame):
 94 |         nonlocal on_data_called
 95 |         on_data_called = True
 96 | 
 97 |     ws = WebSocket(on_data=on_data)
 98 |     ws.connect(ws_server.url)
 99 | 
100 |     ws.send("Hello")
101 |     ws.recv()
102 |     assert on_data_called is False
103 |     ws.close()
104 | 
105 | 
106 | async def test_hello_twice_async(ws_server):
107 |     ws = None
108 |     async with AsyncSession() as s:
109 |         try:
110 |             ws = await s.ws_connect(ws_server.url)
111 |             await ws.send(b"Bar")
112 |             reply, _ = await ws.recv()
113 | 
114 |             for _ in range(10):
115 |                 await ws.send_str("Bar")
116 |                 reply = await ws.recv_str()
117 |                 assert reply == "Bar"
118 |         finally:
119 |             if ws:
120 |                 await ws.close()
121 | 


--------------------------------------------------------------------------------
/tests/unittest/test_upload.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pathlib import Path
  3 | 
  4 | from curl_cffi import CurlMime, requests
  5 | 
  6 | ASSET_FOLDER = Path(__file__).parent.parent.parent / "assets"
  7 | 
  8 | 
  9 | def test_upload_single_file(file_server):
 10 |     multipart = CurlMime.from_list(
 11 |         [
 12 |             {
 13 |                 "name": "image",
 14 |                 "content_type": "image/jpg",
 15 |                 "filename": "scrapfly.png",
 16 |                 "local_path": str(ASSET_FOLDER / "scrapfly.png"),
 17 |             },
 18 |         ]
 19 |     )
 20 | 
 21 |     r = requests.post(file_server.url + "/file", multipart=multipart)
 22 |     data = r.json()
 23 |     assert data["filename"] == "scrapfly.png"
 24 |     assert data["content_type"] == "image/jpg"
 25 |     assert data["size"] == os.path.getsize(ASSET_FOLDER / "scrapfly.png")
 26 |     multipart.close()
 27 | 
 28 | 
 29 | def test_upload_with_text_fields(file_server):
 30 |     multipart = CurlMime.from_list(
 31 |         [
 32 |             {
 33 |                 "name": "image",
 34 |                 "content_type": "image/jpg",
 35 |                 "filename": "scrapfly.png",
 36 |                 "local_path": str(ASSET_FOLDER / "scrapfly.png"),
 37 |             },
 38 |             {"name": "foo", "data": b"bar"},
 39 |         ]
 40 |     )
 41 | 
 42 |     r = requests.post(
 43 |         file_server.url + "/file", data={"foo": "bar"}, multipart=multipart
 44 |     )
 45 |     data = r.json()
 46 |     assert data["filename"] == "scrapfly.png"
 47 |     assert data["content_type"] == "image/jpg"
 48 |     assert data["size"] == os.path.getsize(ASSET_FOLDER / "scrapfly.png")
 49 |     assert data["foo"] == "bar"
 50 |     multipart.close()
 51 | 
 52 | 
 53 | def test_upload_multiple_files(file_server):
 54 |     multipart = CurlMime.from_list(
 55 |         [
 56 |             {
 57 |                 "name": "images",
 58 |                 "content_type": "image/jpg",
 59 |                 "filename": "scrapfly.png",
 60 |                 "local_path": str(ASSET_FOLDER / "scrapfly.png"),
 61 |             },
 62 |             {
 63 |                 "name": "images",
 64 |                 "content_type": "image/jpg",
 65 |                 "filename": "scrapfly.png",
 66 |                 "local_path": str(ASSET_FOLDER / "scrapfly.png"),
 67 |             },
 68 |         ]
 69 |     )
 70 | 
 71 |     r = requests.post(file_server.url + "/files", multipart=multipart)
 72 |     data = r.json()
 73 |     assert len(data["files"]) == 2
 74 |     assert data["files"][0]["filename"] == "scrapfly.png"
 75 |     assert data["files"][0]["content_type"] == "image/jpg"
 76 |     assert data["files"][0]["size"] == os.path.getsize(ASSET_FOLDER / "scrapfly.png")
 77 |     multipart.close()
 78 | 
 79 | 
 80 | def test_upload_multiple_files_different_name(file_server):
 81 |     multipart = CurlMime.from_list(
 82 |         [
 83 |             {
 84 |                 "name": "image1",
 85 |                 "content_type": "image/jpg",
 86 |                 "filename": "scrapfly.png",
 87 |                 "local_path": str(ASSET_FOLDER / "scrapfly.png"),
 88 |             },
 89 |             {
 90 |                 "name": "image2",
 91 |                 "content_type": "image/jpg",
 92 |                 "filename": "scrapfly.png",
 93 |                 "local_path": str(ASSET_FOLDER / "yescaptcha.png"),
 94 |             },
 95 |         ]
 96 |     )
 97 | 
 98 |     r = requests.post(file_server.url + "/two-files", multipart=multipart)
 99 |     data = r.json()
100 |     assert data["size1"] == os.path.getsize(ASSET_FOLDER / "scrapfly.png")
101 |     assert data["size2"] == os.path.getsize(ASSET_FOLDER / "yescaptcha.png")
102 |     multipart.close()
103 | 


--------------------------------------------------------------------------------
/docs/impersonate/faq.rst:
--------------------------------------------------------------------------------
 1 | Impersonation FAQ
 2 | =================
 3 | 
 4 | 
 5 | How to check if my impersonation is working?
 6 | --------------------------------------------
 7 | 
 8 | The most reliable way is to use WireShark, and compare packets from ``curl_cffi`` and your
 9 | targets.
10 | 
11 | If it's challenging for you to use WireShark, you can use the following sites for JA3 and Akamai fingerprints:
12 | 
13 | 1. https://tls.browserleaks.com/json
14 | 2. https://tls.peet.ws/api/all
15 | 3. https://scrapfly.io/web-scraping-tools/browser-fingerprint
16 | 
17 | For http/3 fingerprints, use our service:
18 | 
19 | 1. https://fp.impersonate.pro/api/http3
20 | 
21 | 
22 | I'm still being detected even if I impersonated correctly
23 | ---------------------------------------------------------
24 | 
25 | First, JA3 and akamai fingerprints are not comprehensive, there are other fields that can
26 | be detected, we have a few more options listed in ``extra_fp``. Be sure to also check them.
27 | 
28 | .. note::
29 | 
30 |     Since ``curl-impersonate`` was posted on `Hacker News <https://news.ycombinator.com/item?id=42547820>`_,
31 |     some features and behaviors of ``curl_cffi`` is being detected by professional players.
32 |     If we continue to fix these niche behavior in public, it would soon be noticed by those providers.
33 | 
34 |     In short, if you are using curl_cffi in production and you are sure about being blocked by TLS or http
35 |     detection, try the `curl_cffi pro version <https://impersonate.pro>`_.
36 | 
37 | 
38 | Should I randomize my fingerprints for each request?
39 | ----------------------------------------------------
40 | 
41 | You can choose a random version from the list above, like:
42 | 
43 | .. code-block:: python
44 | 
45 |     random.choice(["chrome119", "chrome120", ...])
46 | 
47 | However, be aware of the browser market share, very old versions are not good choices.
48 | 
49 | Generally, you should not try to generate a customized random fingerprints. The reason
50 | is that, for a given browser version, the fingerprints are fixed. If you create a new
51 | random fingerprints, the server is easy to know that you are not using a typical browser.
52 | 
53 | If you were thinking about ``ja3``, and not ``ja3n``, then the fingerprints is already
54 | randomized, due to the ``extension permutation`` feature introduced in Chrome 110.
55 | 
56 | As far as we know, most websites use an allowlist, not a blocklist to filter out bot
57 | traffic. So do not expect random ja3 fingerprints would work in the wild.
58 | 
59 | Moreover, do not generate random ja3 strings. There are certain limits for a valid ja3 string.
60 | For example:
61 | 
62 | * TLS 1.3 ciphers must be at the front.
63 | * GREASE extension must be the first.
64 | * etc.
65 | 
66 | You should copy ja3 strings from sniffing tools, not generate them, unless you can make
67 | sure all the requirements are met.
68 | 
69 | Can I change JavaScript fingerprints with this library?
70 | -------------------------------------------------------
71 | 
72 | No, you can not. As the name suggests, JavaScript fingerprints are generated using JavaScript
73 | APIs provided by real browsers. ``curl_cffi`` is a python binding to a C library, with no
74 | browser or JavaScript runtime under the hood.
75 | 
76 | If you need to impersonate browsers on the JavaScript perspective, you can search for
77 | "Anti-detect Browser", "Playwright stealth" and similar keywords. Or simply use a
78 | commercial plan from our sponsors.
79 | 
80 | 
81 | Why are all the User-Agents macOS?
82 | ----------------------------------
83 | 
84 | Simple, because I primarily use macOS and I copied the headers from my own browser. Fingerprints
85 | are generally the same across desktop OSes, if you want it to look like Windows, just update the
86 | user-agent and other related headers to Windows.
87 | 
88 | 


--------------------------------------------------------------------------------
/docs/advanced.rst:
--------------------------------------------------------------------------------
  1 | Advanced Topics
  2 | **************
  3 | 
  4 | Proxies
  5 | =======
  6 | 
  7 | You can use the ``proxy`` parameter:
  8 | 
  9 | .. code-block:: python
 10 | 
 11 |     import curl_cffi
 12 | 
 13 |     curl_cffi.get(url, proxy="http://user:pass@example.com:3128")
 14 | 
 15 | You can also use the ``http_proxy``, ``https_proxy``, and ``ws_proxy``, ``wss_proxy``
 16 | environment variables, respectively.
 17 | 
 18 | .. warning::
 19 | 
 20 |    For beginners, a very common mistake is to add ``https://`` prefix to the ``https`` proxy.
 21 | 
 22 |    For explanation of differences between ``http_proxy`` and ``https_proxy``, please see
 23 |    `#6 <https://github.com/lexiforest/curl_cffi/issues/6>`_.
 24 | 
 25 | For compatibility with ``requests``, we also support using dicts.
 26 | 
 27 | .. code-block:: python
 28 | 
 29 |     import curl_cffi
 30 | 
 31 |     proxies = {
 32 |         "http": "http://localhost:3128",
 33 |         "https": "http://localhost:3128"
 34 |     }
 35 |     curl_cffi.get(url, proxies=proxies)
 36 | 
 37 | 
 38 | .. note::
 39 | 
 40 |    Prefer the single `proxy` parameter, unless you do have different proxies for http and https
 41 | 
 42 | 
 43 | Low-level curl API
 44 | =========
 45 | 
 46 | Although we provide an easy to use ``requests``-like API, sometimes, you may prefer to use the ``curl``-like API.
 47 | 
 48 | The curl API is very much like what you may have used -- ``pycurl``, with extra impersonation support.
 49 | 
 50 | 
 51 | .. code-block:: python
 52 | 
 53 |     from curl_cffi import Curl, CurlOpt
 54 |     from io import BytesIO
 55 | 
 56 |     buffer = BytesIO()
 57 |     c = Curl()
 58 |     c.setopt(CurlOpt.URL, b'https://tls.browserleaks.com/json')
 59 |     c.setopt(CurlOpt.WRITEDATA, buffer)
 60 | 
 61 |     c.impersonate("chrome124")
 62 | 
 63 |     c.perform()
 64 |     c.close()
 65 |     body = buffer.getvalue()
 66 |     print(body.decode())
 67 | 
 68 | For a complete list of options, see :doc:`api`
 69 | 
 70 | 
 71 | Using ``CURLOPT_*`` in requests API
 72 | ===================================
 73 | 
 74 | Sometimes, you know an option from libcurl, but we haven't exposed it in the requests API.
 75 | You can simply add the ``curl_options`` dict to apply the option.
 76 | 
 77 | .. code-block:: python
 78 | 
 79 | 
 80 | .. note::
 81 | 
 82 |    Using curl_options is preferred over using ``session.curl.setopt``, the latter may get
 83 |    overriden internally, while the former is executed after all options have been set.
 84 | 
 85 | 
 86 | Selecting http version
 87 | ======================
 88 | 
 89 | The recommended and default http version is http/2, the present and most widely used http version
 90 | as of 2025.
 91 | 
 92 | According to `Wikipedia <https://en.wikipedia.org/wiki/HTTP>`_, the marketshare is:
 93 | 
 94 | - HTTP/1.1, 33.8%
 95 | - HTTP/2, 35.3%
 96 | - HTTP/3, 30.9%
 97 | 
 98 | To change http versions, use the ``http_version`` parameter.
 99 | 
100 | .. code-block:: python
101 | 
102 |    import curl_cffi
103 |    curl_cffi.get("https://cloudflare-quic.com", http_version="v3")
104 | 
105 | Common values are: ``v1``, ``v2``, ``v3`` and ``v3only``.
106 | 
107 | To get the actual used http version, you need to compare the response field with const from libcurl:
108 | 
109 | .. code-block:: python
110 | 
111 |     >>> from curl_cffi import CurlHttpVersion
112 |     >>> r = curl_cffi.get("https://example.com", http_version="v2")
113 |     >>> r.http_version == CurlHttpVersion.V2_0
114 |     True
115 | 
116 | 
117 | Keeping session alive in http/2
118 | ======
119 | 
120 | With http/2, you can optionally send a ping frame to keep the connection alive when not actively using it.
121 | 
122 | 
123 | .. code-block:: python
124 | 
125 |    import curl_cffi
126 | 
127 |    s = Session()
128 |    s.get("https://example.com")
129 |    s.upkeep()
130 | 
131 | 


--------------------------------------------------------------------------------
/libs.json:
--------------------------------------------------------------------------------
  1 | [
  2 |     {
  3 |         "system": "Windows",
  4 |         "machine": "AMD64",
  5 |         "pointer_size": 64,
  6 |         "libdir": "./lib64",
  7 |         "sysname": "win32",
  8 |         "so_name": "libcurl.dll",
  9 |         "so_arch": "x86_64"
 10 |     },
 11 |     {
 12 |         "system": "Windows",
 13 |         "machine": "AMD64",
 14 |         "pointer_size": 32,
 15 |         "libdir": "./lib32",
 16 |         "sysname": "win32",
 17 |         "so_name": "libcurl.dll",
 18 |         "so_arch": "i686"
 19 |     },
 20 |     {
 21 |         "system": "Windows",
 22 |         "machine": "ARM64",
 23 |         "pointer_size": 64,
 24 |         "libdir": "./libarm64",
 25 |         "sysname": "win32",
 26 |         "so_name": "libcurl.dll",
 27 |         "so_arch": "arm64"
 28 |     },
 29 |     {
 30 |         "system": "Darwin",
 31 |         "machine": "x86_64",
 32 |         "pointer_size": 64,
 33 |         "libdir": "/Users/runner/work/_temp/install/lib",
 34 |         "sysname": "macos",
 35 |         "so_name": "libcurl-impersonate.4.dylib",
 36 |         "so_arch": "x86_64"
 37 |     },
 38 |     {
 39 |         "system": "Darwin",
 40 |         "machine": "arm64",
 41 |         "pointer_size": 64,
 42 |         "libdir": "/Users/runner/work/_temp/install/lib",
 43 |         "sysname": "macos",
 44 |         "so_name": "libcurl-impersonate.4.dylib",
 45 |         "so_arch": "arm64"
 46 |     },
 47 |     {
 48 |         "system": "Linux",
 49 |         "machine": "x86_64",
 50 |         "pointer_size": 64,
 51 |         "libdir": "",
 52 |         "sysname": "linux",
 53 |         "link_type": "static",
 54 |         "libc": "gnu",
 55 |         "so_name": "libcurl-impersonate.so",
 56 |         "so_arch": "x86_64"
 57 |     },
 58 |     {
 59 |         "system": "Linux",
 60 |         "machine": "x86_64",
 61 |         "pointer_size": 64,
 62 |         "libdir": "",
 63 |         "sysname": "linux",
 64 |         "link_type": "static",
 65 |         "libc": "musl",
 66 |         "so_name": "libcurl-impersonate.so",
 67 |         "so_arch": "x86_64"
 68 |     },
 69 |     {
 70 |         "system": "Linux",
 71 |         "machine": "i686",
 72 |         "pointer_size": 32,
 73 |         "libdir": "",
 74 |         "sysname": "linux",
 75 |         "link_type": "static",
 76 |         "libc": "gnu",
 77 |         "so_name": "libcurl-impersonate.so",
 78 |         "so_arch": "i386"
 79 |     },
 80 |     {
 81 |         "system": "Linux",
 82 |         "machine": "aarch64",
 83 |         "pointer_size": 64,
 84 |         "libdir": "",
 85 |         "sysname": "linux",
 86 |         "link_type": "static",
 87 |         "libc": "gnu",
 88 |         "so_name": "libcurl-impersonate.so",
 89 |         "so_arch": "aarch64"
 90 |     },
 91 |     {
 92 |         "system": "Linux",
 93 |         "machine": "riscv64",
 94 |         "pointer_size": 64,
 95 |         "libdir": "",
 96 |         "sysname": "linux",
 97 |         "link_type": "static",
 98 |         "libc": "gnu",
 99 |         "so_name": "libcurl-impersonate.so",
100 |         "so_arch": "riscv64"
101 |     },
102 |     {
103 |         "system": "Linux",
104 |         "machine": "aarch64",
105 |         "pointer_size": 64,
106 |         "libdir": "~/.local/lib",
107 |         "sysname": "linux",
108 |         "link_type": "dynamic",
109 |         "libc": "musl",
110 |         "so_name": "libcurl-impersonate.so",
111 |         "so_arch": "aarch64"
112 |     },
113 |     {
114 |         "system": "Linux",
115 |         "machine": "armv6l",
116 |         "pointer_size": 32,
117 |         "libdir": "",
118 |         "sysname": "linux",
119 |         "link_type": "static",
120 |         "libc": "gnueabihf",
121 |         "so_name": "libcurl-impersonate.so",
122 |         "so_arch": "arm"
123 |     },
124 |     {
125 |         "system": "Linux",
126 |         "machine": "armv7l",
127 |         "pointer_size": 32,
128 |         "libdir": "",
129 |         "sysname": "linux",
130 |         "link_type": "static",
131 |         "libc": "gnueabihf",
132 |         "so_name": "libcurl-impersonate.so",
133 |         "so_arch": "arm"
134 |     }
135 | ]
136 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
  1 | .. curl_cffi documentation master file, created by
  2 |    sphinx-quickstart on Sat Feb 17 22:22:59 2024.
  3 |    You can adapt this file completely to your liking, but it should at least
  4 |    contain the root `toctree` directive.
  5 | 
  6 | curl_cffi's documentation
  7 | =========================
  8 | 
  9 | .. toctree::
 10 |    :maxdepth: 2
 11 |    :caption: Contents:
 12 |    :glob:
 13 | 
 14 |    quick_start
 15 |    impersonate/_index
 16 |    advanced
 17 |    vs-requests
 18 |    cookies
 19 |    community
 20 |    api
 21 |    faq
 22 |    changelog
 23 |    dev
 24 | 
 25 | `Discuss on Telegram`_
 26 | 
 27 | .. _Discuss on Telegram: https://t.me/+lL9n33eZp480MGM1
 28 | 
 29 | curl_cffi is a Python binding for `curl-impersonate fork`_ via `cffi`_. For commercial
 30 | support, visit `impersonate.pro <https://impersonate.pro>`_.
 31 | 
 32 | .. _curl-impersonate fork: https://github.com/lexiforest/curl-impersonate
 33 | .. _cffi: https://cffi.readthedocs.io/en/latest/
 34 | 
 35 | Unlike other pure Python http clients like ``httpx`` or ``requests``, ``curl_cffi`` can
 36 | impersonate browsers' TLS signatures or JA3 fingerprints. If you are blocked by some
 37 | website for no obvious reason, you can give this package a try.
 38 | 
 39 | If you are looking for Python http3 clients, curl_cffi added http3 support since ``v0.11``.
 40 | 
 41 | Sponsors
 42 | --------
 43 | 
 44 | 
 45 | Bypass Cloudflare with API
 46 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
 47 | 
 48 | .. image:: https://raw.githubusercontent.com/lexiforest/curl_cffi/main/assets/yescaptcha.png
 49 |    :width: 149
 50 |    :alt: YesCaptcha
 51 |    :target: https://yescaptcha.com/i/stfnIO
 52 | 
 53 | `Yescaptcha <https://yescaptcha.com/i/stfnIO>`_ is a proxy service that bypasses Cloudflare and uses the API interface to
 54 | obtain verified cookies (e.g. ``cf_clearance``). Click `here <https://yescaptcha.com/i/stfnIO>`_
 55 | to register.
 56 | 
 57 | 
 58 | You can also click `here <https://buymeacoffee.com/yifei>`_ to buy me a coffee.
 59 | 
 60 | 
 61 | Features
 62 | --------
 63 | 
 64 | - Supports JA3/TLS and http2 fingerprints impersonation, including recent browsers and custom fingerprints.
 65 | - Much faster than requests/httpx, on par with aiohttp/pycurl, see `benchmarks <https://github.com/lexiforest/curl_cffi/tree/main/benchmark>`_.
 66 | - Mimics requests API, no need to learn another one.
 67 | - Pre-compiled, so you don't have to compile on your machine.
 68 | - Supports ``asyncio`` with proxy rotation on each request.
 69 | - Supports http 2.0 & 3.0, which requests does not.
 70 | - Supports websocket.
 71 | 
 72 | .. list-table:: Feature matrix
 73 |    :widths: 20 16 16 16 16 16
 74 |    :header-rows: 1
 75 | 
 76 |    * - 
 77 |      - requests
 78 |      - aiohttp
 79 |      - httpx
 80 |      - pycurl
 81 |      - curl_cffi
 82 |    * - http2
 83 |      - ❌
 84 |      - ❌
 85 |      - ✅
 86 |      - ✅
 87 |      - ✅
 88 |    * - http3
 89 |      - ❌
 90 |      - ❌
 91 |      - ❌
 92 |      - ✅
 93 |      - ✅
 94 |    * - sync
 95 |      - ✅
 96 |      - ❌
 97 |      - ✅
 98 |      - ✅
 99 |      - ✅
100 |    * - async
101 |      - ❌
102 |      - ✅
103 |      - ✅
104 |      - ❌
105 |      - ✅
106 |    * - websocket
107 |      - ❌
108 |      - ✅
109 |      - ❌
110 |      - ❌
111 |      - ✅
112 |    * - fingerprints
113 |      - ❌
114 |      - ❌
115 |      - ❌
116 |      - ❌
117 |      - ✅
118 |    * - speed
119 |      - 🐇
120 |      - 🐇🐇
121 |      - 🐇
122 |      - 🐇🐇
123 |      - 🐇🐇
124 | 
125 | Notes:
126 | 
127 | 1. For pycurl, you need a http/3 enabled libcurl, while curl_cffi packages libcurl-impersonate inside Python wheels.
128 | 2. Full http/3 supported was added in v0.12.0.
129 | 
130 | Install
131 | -------
132 | 
133 | .. code-block:: sh
134 | 
135 |     pip install curl_cffi --upgrade
136 | 
137 | For more details, see :doc:`quick_start`.
138 | 
139 | Documentation
140 | -------------
141 | 
142 | You can first check out :doc:`quick_start`. Then the :doc:`impersonate`.
143 | 
144 | For advanced topics, checkout :doc:`cookies`, :doc:`asyncio` and :doc:`websockets`.
145 | 
146 | You can also find common use cases in the `examples <https://github.com/lexiforest/curl_cffi/tree/main/examples>`_ directory.
147 | 
148 | Finally, if something is missing from the tutorial, you can always find them in the :doc:`api`.
149 | 
150 | If you have any questions, be sure to check out the :doc:`faq` section before opening an issue.
151 | 
152 | 
153 | Indices and tables
154 | ==================
155 | 
156 | * :ref:`genindex`
157 | * :ref:`modindex`
158 | * :ref:`search`
159 | 


--------------------------------------------------------------------------------
/docs/impersonate/psk.rst:
--------------------------------------------------------------------------------
 1 | TLS PSK(41) Extension
 2 | =====================
 3 | 
 4 | 
 5 | What is the TLS PSK(41) extension, how to deal with it?
 6 | 
 7 | PSK is short for ``Pre-Shared Key``, as defined in `RFC 8446 <https://www.rfc-editor.org/rfc/rfc8446.html#section-2.2>`_,
 8 | 
 9 |   Once a handshake has completed, the server can send the client a PSK
10 |   identity that corresponds to a unique key derived from the initial
11 |   handshake (see Section 4.6.1).  The client can then use that PSK
12 |   identity in future handshakes to negotiate the use of the associated
13 |   PSK.
14 | 
15 | Usually, when you first visit a website, the PSK extension is not present in the extension
16 | list. But when you visit the same website for the second time, in a relatively short time,
17 | the client may offer a PSK extension with the key from the server.
18 | 
19 | For example, you can visit ``https://tls.peet.ws/api/all``, and then refresh the page,
20 | the PSK extension will be there.
21 | 
22 | To correctly implement the PSK extension, the client must have some kind of a session
23 | cache held in memory or persisted on disk. All the major browsers have this feature for
24 | a very long time. ``curl_cffi`` added this feature in version ``0.11.0``, with libcurl
25 | ``8.13.0``.
26 | 
27 | The mechanism and behavior of a PSK looks like an http session cookie, where the server sent
28 | a cryptographic value as a key to resume a previous disconnected session. When the server generates
29 | a PSK, it is possible that the server keeps the mapping between the incoming IP and key.
30 | Thus, it can be problematic if you reuse a TLS session with rotating proxies.
31 | 
32 | .. code-block::
33 | 
34 |     ┌───────────┐                                    ┌───────────┐
35 |     │           │                                    │           │
36 |     │           │             IP: 10.0.0.1           │           │
37 |     │           ┼─────────────TLS─Hello──────────────►           │
38 |     │           │                                    │           │
39 |     │           ◄─────────────PSK:─xxx───────────────┼           │
40 |     │           │                                    │           │
41 |     │           │                                    │           │
42 |     │           │                                    │           │
43 |     │           │                                    │  Server   │
44 |     │  Client   │             IP: 10.0.0.2           │           │
45 |     │           ┼─────────────TLS─with─PSK───────────►           │
46 |     │           │                                    │           │
47 |     │           ◄─────────────Blocked────────────────┼           │
48 |     │           │                                    │           │
49 |     │           │             PSK: xxx was           │           │
50 |     │           │             associated with        │           │
51 |     │           │             10.0.0.1, not          │           │
52 |     └───────────┘             10.0.0.2               └───────────┘
53 | 
54 | 
55 | Luckily, since curl_cffi ``0.12.0``, we added a new option called ``proxy_credential_no_reuse``,
56 | when enabled, the TLS session cache will be bound based on the proxy username and IP,
57 | such that the session can only be reused when the proxy username and IP matches. From the
58 | server's viewpoint, the ``Pre-Shared Key`` will be locked to the same source IP, not
59 | bouncing around among different exit nodes.
60 | 
61 | 
62 | .. code-block:: python
63 | 
64 |    # Python example to be added.
65 |    # We might enable this by default when proxies are used.
66 | 
67 | 
68 | How do I enable PSK extension anyway?
69 | -------------------------------------
70 | 
71 | You don't. If you haven't, please read the explanation above first. Generally speaking,
72 | the client should manage this extension, and it should automatically offer this extension
73 | on the second request.
74 | 
75 | From the server's perspective, if you forcefully add a PSK extension with random value,
76 | it's an obvious sign that you are not a valid visitor, just like you providing an invalid cookie
77 | value.
78 | 
79 | However, it's reasonable that you don't want the PSK extension to be sent, i.e. pretending
80 | to be a first time visitor. We don't support this for now, your option is to use an older
81 | version of curl_cffi or create a new session on each request.
82 | 
83 | Note, some other impersonation-oriented http clients give you the control over adding the
84 | PSK or not, but you should let the client decide, if you are trying to impersonating browsers.
85 | 


--------------------------------------------------------------------------------
/.github/workflows/build-and-test.yaml:
--------------------------------------------------------------------------------
  1 | name: Build, test and release
  2 | on:
  3 |   pull_request:
  4 |     branches:
  5 |       - main
  6 |   push:
  7 |     branches:
  8 |       - main
  9 |       - bugfix/*
 10 |       - feature/*
 11 |       - release/*
 12 |       - chore/*
 13 |     tags:
 14 |       - v*
 15 | 
 16 | permissions:
 17 |   contents: write
 18 | 
 19 | jobs:
 20 |   lint:
 21 |     name: Lint
 22 |     runs-on: ubuntu-latest
 23 |     steps:
 24 |     - uses: actions/checkout@v4
 25 |     - uses: actions/setup-python@v5
 26 |       with:
 27 |         python-version: '3.9'
 28 |     - name: Lint
 29 |       run: |
 30 |         pip install mypy ruff
 31 |         make lint
 32 | 
 33 |   sdist:
 34 |     name: Build sdist wheel
 35 |     runs-on: ubuntu-latest
 36 |     steps:
 37 |     - uses: actions/checkout@v4
 38 | 
 39 |     - uses: actions/setup-python@v5
 40 |       with:
 41 |         python-version: '3.10'
 42 | 
 43 |     - name: build sdist
 44 |       run: |
 45 |         make preprocess
 46 |         pip install build
 47 |         python -m build --sdist
 48 |         pip install ./dist/*.tar.gz
 49 | 
 50 |     - name: upload artifacts
 51 |       uses: actions/upload-artifact@v4  # https://github.com/actions/upload-artifact/issues/478
 52 |       with:
 53 |         name: curl_cffi-${{ github.sha }}-sdist.zip
 54 |         path: ./dist/*.tar.gz
 55 | 
 56 |   bdist:
 57 |     name: Build bdist wheels and test
 58 |     runs-on: ${{ matrix.os }}
 59 |     strategy:
 60 |       matrix:
 61 |         os: [ubuntu-24.04, macos-15-intel, macos-14, windows-2022, windows-11-arm]
 62 |     steps:
 63 |     - uses: actions/checkout@v4
 64 | 
 65 |     - uses: actions/setup-python@v5
 66 |       with:
 67 |         python-version: '3.11'
 68 | 
 69 |     - if: runner.os == 'Linux'
 70 |       uses: docker/setup-qemu-action@v3
 71 |       with:
 72 |         platforms: all
 73 | 
 74 |     # macOS make is too old
 75 |     - if: runner.os == 'macOS'
 76 |       run: |
 77 |         brew install make automake libtool
 78 | 
 79 |     # When we build all dependencies with the 11.0 target, we can downgrade this back.
 80 |     - if: ${{ matrix.os == 'macos-14' }}
 81 |       run: echo "MACOSX_DEPLOYMENT_TARGET=14.0" >> "$GITHUB_ENV"
 82 | 
 83 |     # TODO: fix the target
 84 |     - if: ${{ matrix.os == 'macos-15-intel' }}
 85 |       run: echo "MACOSX_DEPLOYMENT_TARGET=15.0" >> "$GITHUB_ENV"
 86 | 
 87 |     - name: Build and test wheels
 88 |       uses: pypa/cibuildwheel@v3.1.3
 89 | 
 90 |     # - name: Setup tmate session
 91 |     #   uses: mxschmitt/action-tmate@v3
 92 | 
 93 |     - uses: actions/upload-artifact@v4  # https://github.com/actions/upload-artifact/issues/478
 94 |       with:
 95 |         name: curl_cffi-${{ github.sha }}-${{ matrix.os }}.zip
 96 |         path: ./wheelhouse/*.whl
 97 | 
 98 |   build_latest:
 99 |     name: Build bdist on latest OSes
100 |     runs-on: ${{ matrix.os }}
101 |     strategy:
102 |       matrix:
103 |         # For linux, it's built inside a container, no need to test for latest versions.
104 |         os: [macos-latest, windows-latest, windows-11-arm]
105 |     steps:
106 |     - uses: actions/checkout@v4
107 | 
108 |     - uses: actions/setup-python@v5
109 |       with:
110 |         python-version: '3.11'
111 | 
112 |     - if: runner.os == 'Linux'
113 |       uses: docker/setup-qemu-action@v3
114 |       with:
115 |         platforms: all
116 | 
117 |     # macOS make is too old
118 |     - if: runner.os == 'macOS'
119 |       run: |
120 |         brew install make automake libtool
121 | 
122 |     - if: ${{ matrix.os == 'macos-latest' }}
123 |       run: echo "MACOSX_DEPLOYMENT_TARGET=15.0" >> "$GITHUB_ENV"
124 | 
125 |     - name: Build and test wheels
126 |       uses: pypa/cibuildwheel@v3.1.3
127 | 
128 | 
129 |   upload_all:
130 |     needs: [bdist, sdist]
131 |     runs-on: ubuntu-latest
132 |     steps:
133 |     - uses: actions/download-artifact@v4.1.7  # https://github.com/actions/upload-artifact/issues/478
134 |       if: startsWith(github.ref, 'refs/tags/')
135 |       with:
136 |         pattern: curl_cffi-*
137 |         merge-multiple: true
138 |         path: dist
139 | 
140 |     - uses: pypa/gh-action-pypi-publish@v1.12.4
141 |       if: startsWith(github.ref, 'refs/tags/')
142 |       with:
143 |         password: ${{ secrets.PYPI_TOKEN }}
144 |         packages-dir: dist/
145 | 
146 |     - name: Upload release files
147 |       if: startsWith(github.ref, 'refs/tags/')
148 |       uses: softprops/action-gh-release@v2
149 |       with:
150 |         files: |
151 |           ./dist/*.whl
152 |           ./dist/*.tar.gz
153 | 


--------------------------------------------------------------------------------
/scripts/generate_consts.py:
--------------------------------------------------------------------------------
  1 | import platform
  2 | import re
  3 | import subprocess
  4 | import sys
  5 | 
  6 | CONST_FILE = "curl_cffi/const.py"
  7 | CURL_VERSION = sys.argv[1]
  8 | 
  9 | uname = platform.uname()
 10 | 
 11 | 
 12 | print("extract consts from curl.h")
 13 | with open(CONST_FILE, "w") as f:
 14 |     f.write("# This file is automatically generated, do not modify it directly.\n\n")
 15 |     f.write("from enum import IntEnum\n\n\n")
 16 |     f.write("class CurlOpt(IntEnum):\n")
 17 |     f.write('    """``CULROPT_`` constancs extracted from libcurl,\n')
 18 |     f.write('    see: https://curl.se/libcurl/c/curl_easy_setopt.html"""\n\n')
 19 |     cmd = rf"""
 20 |         echo '#include "{CURL_VERSION}/include/curl/curl.h"' | gcc -E - | grep -i "CURLOPT_.\+ =" | sed "s/  CURLOPT_/    /g" | sed "s/,//g"
 21 |     """  # noqa E501
 22 |     output = subprocess.check_output(cmd, shell=True)
 23 |     clean_output = re.sub(
 24 |         r"__attribute__\(.*\) ", "", output.decode(), flags=re.MULTILINE
 25 |     )
 26 |     f.write(clean_output)
 27 |     f.write(
 28 |         """
 29 |     if locals().get("WRITEDATA"):
 30 |         FILE = locals().get("WRITEDATA")
 31 |     if locals().get("READDATA"):
 32 |         INFILE = locals().get("READDATA")
 33 |     if locals().get("HEADERDATA"):
 34 |         WRITEHEADER = locals().get("HEADERDATA")\n\n
 35 | """
 36 |     )
 37 | 
 38 |     f.write("class CurlInfo(IntEnum):\n")
 39 |     f.write('    """``CURLINFO_`` constancs extracted from libcurl,\n')
 40 |     f.write('    see: https://curl.se/libcurl/c/curl_easy_getinfo.html"""\n\n')
 41 |     cmd = rf"""
 42 |         echo '#include "{CURL_VERSION}/include/curl/curl.h"' | gcc -E - | grep -i "CURLINFO_.\+ =" | sed "s/  CURLINFO_/    /g" | sed "s/,//g"
 43 |     """  # noqa E501
 44 |     output = subprocess.check_output(cmd, shell=True)
 45 |     f.write(output.decode())
 46 |     f.write(
 47 |         """
 48 |     if locals().get("RESPONSE_CODE"):
 49 |         HTTP_CODE = locals().get("RESPONSE_CODE")\n\n
 50 | """
 51 |     )
 52 | 
 53 |     f.write("class CurlMOpt(IntEnum):\n")
 54 |     f.write('    """``CURLMOPT_`` constancs extracted from libcurl,\n')
 55 |     f.write('    see: https://curl.se/libcurl/c/curl_multi_setopt.html"""\n\n')
 56 |     cmd = rf"""
 57 |         echo '#include "{CURL_VERSION}/include/curl/curl.h"' | gcc -E - | grep -i "CURLMOPT_.\+ =" | sed "s/  CURLMOPT_/    /g" | sed "s/,//g"
 58 |     """  # noqa E501
 59 |     output = subprocess.check_output(cmd, shell=True)
 60 |     f.write(output.decode())
 61 |     f.write("\n\n")
 62 | 
 63 |     f.write("class CurlECode(IntEnum):\n")
 64 |     f.write('    """``CURLECODE_`` constancs extracted from libcurl,\n')
 65 |     f.write('    see: https://curl.se/libcurl/c/libcurl-errors.html"""\n\n')
 66 |     cmd = rf"""
 67 |         echo '#include "{CURL_VERSION}/include/curl/curl.h"' | gcc -E - | grep -i CURLE_ | sed "s/[, ][=0]*//g" | sed "s/CURLE_/    /g" | awk '{{print $0 " = " NR-1}}'
 68 |     """  # noqa E501
 69 |     output = subprocess.check_output(cmd, shell=True)
 70 |     f.write(output.decode())
 71 |     f.write("\n")
 72 | 
 73 |     # These lines are not easy to be extracted automatically
 74 |     f.write(
 75 |         '''
 76 | class CurlHttpVersion(IntEnum):
 77 |     """``CURL_HTTP_VERSION`` constants from libcurl, see comments for details."""
 78 | 
 79 |     NONE = 0
 80 |     V1_0 = 1  # please use HTTP 1.0 in the request */
 81 |     V1_1 = 2  # please use HTTP 1.1 in the request */
 82 |     V2_0 = 3  # please use HTTP 2 in the request */
 83 |     V2TLS = 4  # use version 2 for HTTPS, version 1.1 for HTTP */
 84 |     V2_PRIOR_KNOWLEDGE = 5  # please use HTTP 2 without HTTP/1.1 Upgrade */
 85 |     V3 = 30  # Makes use of explicit HTTP/3 with fallback.
 86 |     V3ONLY = 31  # No fallback
 87 | 
 88 | 
 89 | class CurlWsFlag(IntEnum):
 90 |     """``CURL_WS_FLAG`` constants from libcurl, see comments for details."""
 91 | 
 92 |     TEXT = 1 << 0
 93 |     BINARY = 1 << 1
 94 |     CONT = 1 << 2
 95 |     CLOSE = 1 << 3
 96 |     PING = 1 << 4
 97 |     OFFSET = 1 << 5
 98 | 
 99 | 
100 | class CurlSslVersion(IntEnum):
101 |     """``CURL_SSLVERSION`` constants from libcurl, see comments for details."""
102 | 
103 |     DEFAULT = 0
104 |     TLSv1 = 1
105 |     SSLv2 = 2
106 |     SSLv3 = 3
107 |     TLSv1_0 = 4
108 |     TLSv1_1 = 5
109 |     TLSv1_2 = 6
110 |     TLSv1_3 = 7
111 |     MAX_DEFAULT = 1 << 16
112 | 
113 | 
114 | class CurlIpResolve(IntEnum):
115 |     """``CURL_IPRESOLVE`` constants from libcurl, see comments for details."""
116 | 
117 |     WHATEVER = 0  # default, uses addresses to all IP versions that your system allows
118 |     V4 = 1  # uses only IPv4 addresses/connections
119 |     V6 = 2  # uses only IPv6 addresses/connections
120 | 
121 | '''
122 |     )
123 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [project]
  2 | name = "curl_cffi"
  3 | version = "0.14.0"
  4 | authors = [{ name = "lexiforest", email = "infinitesheldon@gmail.com" }]
  5 | description = "libcurl ffi bindings for Python, with impersonation support."
  6 | license = { text = "MIT License" }
  7 | dependencies = [
  8 |     "cffi>=1.12.0",
  9 |     "certifi>=2024.2.2",
 10 | ]
 11 | readme = "README.md"
 12 | requires-python = ">=3.10"
 13 | urls = { "repository" = "https://github.com/lexiforest/curl_cffi" }
 14 | classifiers = [
 15 |     "Development Status :: 4 - Beta",
 16 |     "Intended Audience :: Developers",
 17 |     "Programming Language :: Python :: 3",
 18 |     "Programming Language :: Python :: 3.10",
 19 |     "Programming Language :: Python :: 3.11",
 20 |     "Programming Language :: Python :: 3.12",
 21 |     "Programming Language :: Python :: 3.13",
 22 |     "Programming Language :: Python :: 3.14",
 23 | ]
 24 | 
 25 | 
 26 | [project.optional-dependencies]
 27 | extra = [
 28 |     "readability-lxml>=0.8.1",
 29 |     "markdownify>=1.1.0",
 30 |     "lxml_html_clean",
 31 | ]
 32 | dev = [
 33 |     "charset_normalizer>=3.3.2,<4.0",
 34 |     "coverage>=6.4.1,<7.0",
 35 |     "cryptography>=42.0.5,<43.0",
 36 |     "httpx==0.23.1",  # don't change, tests will raise "httpx.InvalidURL: Invalid URL component 'path'"
 37 |     "mypy>=1.9.0,<2.0",
 38 |     "pytest>=8.1.1,<9.0",
 39 |     "pytest-asyncio>=0.23.6,<1.0",
 40 |     "pytest-trio>=0.8.0,<1.0",
 41 |     "ruff>=0.3.5,<1.0",
 42 |     "trio>=0.25.0,<1.0",
 43 |     "trustme>=1.1.0,<2.0",
 44 |     "uvicorn>=0.29.0,<1.0",
 45 |     "websockets>=14.0",
 46 |     "typing_extensions",
 47 | ]
 48 | build = [
 49 |     "cibuildwheel",
 50 |     "wheel",
 51 | ]
 52 | test = [
 53 |     "charset_normalizer>=3.3.2,<4.0",
 54 |     "cryptography>=42.0.5,<43.0",
 55 |     "fastapi>=0.110.0,<1.0",
 56 |     "httpx==0.23.1",  # don't change, tests will raise "httpx.InvalidURL: Invalid URL component 'path'"
 57 |     "proxy.py>=2.4.3,<3.0",
 58 |     "pytest>=8.1.1,<9.0",
 59 |     "pytest-asyncio>=0.23.6,<1.0",
 60 |     "pytest-trio>=0.8.0,<1.0",
 61 |     "python-multipart>=0.0.9,<1.0",
 62 |     "trio>=0.25.0,<1.0",
 63 |     "trustme>=1.1.0,<2.0",
 64 |     "uvicorn>=0.29.0,<1.0",
 65 |     "websockets>=14.0",
 66 |     "typing_extensions",
 67 | ]
 68 | 
 69 | [project.scripts]
 70 | curl-cffi = "curl_cffi.cli:main"
 71 | 
 72 | [build-system]
 73 | requires = ["wheel", "setuptools", "cffi>=1.12.0"]
 74 | build-backend = "setuptools.build_meta"
 75 | 
 76 | 
 77 | [tool.setuptools]
 78 | packages = ["curl_cffi", "curl_cffi.requests"]
 79 | package-data = { curl_cffi = ["libcurl.dll"] }
 80 | 
 81 | 
 82 | [tool.cibuildwheel]
 83 | # Building for these platforms is enough since we are using abi3 packages
 84 | build = [
 85 |     "cp310-macosx_x86_64",
 86 |     "cp310-macosx_arm64",
 87 |     "cp310-win_amd64",
 88 |     "cp310-win_arm64",
 89 |     # "cp310-win32",
 90 |     "cp310-manylinux_x86_64",
 91 |     "cp310-manylinux_aarch64",
 92 |     "cp310-manylinux_riscv64",
 93 |     "cp310-manylinux_i686",
 94 |     "cp310-manylinux_armv7l",
 95 |     "cp310-musllinux_x86_64",
 96 |     "cp310-musllinux_aarch64",
 97 | ]
 98 | before-all = "make preprocess"
 99 | test-requires = "pytest"
100 | test-command = "python -bb -m pytest {project}/tests/unittest"
101 | test-extras = ["test"]
102 | # trustme not available for these images
103 | test-skip = [
104 |     "cp310-manylinux_i686",
105 |     "cp310-win_arm64",
106 |     "cp310-manylinux_armv7l",
107 |     "cp310-manylinux_riscv64",
108 | ]
109 | build-verbosity = 1
110 | 
111 | 
112 | # configure cibuildwheel to build native archs ('auto'), and some emulated ones
113 | [tool.cibuildwheel.linux]
114 | archs = ["auto", "aarch64", "riscv64", "i686", "armv7l"]
115 | environment = { LD_LIBRARY_PATH="$HOME/.local/lib" }
116 | environment-pass = ["LD_LIBRARY_PATH"]
117 | 
118 | 
119 | [tool.cibuildwheel.macos]
120 | before-all = "gmake preprocess"
121 | 
122 | [tool.cibuildwheel.windows]
123 | before-build = "pip install delvewheel"
124 | repair-wheel-command = "delvewheel repair --add-path ./lib64;./lib32 -w {dest_dir} {wheel}"
125 | 
126 | 
127 | [tool.pytest.ini_options]
128 | # pythonpath = [ "." ]
129 | asyncio_mode = "auto"
130 | 
131 | 
132 | [tool.ruff]
133 | line-length = 88
134 | 
135 | [tool.ruff.lint]
136 | select = [
137 |     "E",  # pycodestyle
138 |     "F",  # Pyflakes
139 |     "UP",  # pyupgrade
140 |     "B",  # flake8-bugbear
141 |     "SIM",  # flake8-simplify
142 | ]
143 | ignore = [
144 |     "UP007",
145 |     "UP045",  # X | None
146 | ]
147 | 
148 | [tool.isort]
149 | profile = "black"
150 | line_length = 88
151 | 
152 | [tool.mypy]
153 | python_version = "3.9"
154 | ignore_missing_imports = true
155 | #warn_unused_ignores = true
156 | #strict = true
157 | exclude = ["benchmark/", "docs/", "examples/", "scripts/", "tests/", "build/"]
158 | 


--------------------------------------------------------------------------------
/docs/changelog.rst:
--------------------------------------------------------------------------------
  1 | Change Log
  2 | ==========
  3 | 
  4 | Please see the `GitHub Releases <https://github.com/lexiforest/curl_cffi/releases>`_ page for details.
  5 | 
  6 | - v0.13
  7 |     - Added support for Windows on Arm
  8 |     - Improved support for websockets
  9 | 
 10 | - v0.12
 11 |     - Added support for safari 26
 12 |     - Improved support for websockets
 13 | 
 14 | - v0.11
 15 |     - Added support for http3
 16 |     - Added tor145, new safari and chrome targets
 17 | 
 18 | 
 19 | - v0.10.0
 20 |     - Added support for using curl_cffi directly
 21 | 
 22 | 
 23 | - v0.9.0
 24 |     - Brought back Windows support
 25 |     - Added support for Firefox
 26 |     - Added support for Chrome 133a
 27 | 
 28 | 
 29 | - v0.8.0
 30 |     - Added more recent impersonate versions, Safari 18.0 for iOS and macOS, Chrome 131.
 31 |     - Added ``quote`` parameter for setting which letter should be quoted in URL.
 32 |     - Added ``response_class`` parameter for using a customized ``Response`` class.
 33 | 
 34 | 
 35 | - v0.7.3
 36 |     - Bugfixes.
 37 | - v0.7.2
 38 |     - Added requests-like exception hierarchy.
 39 | - v0.7.1
 40 |     - Added ``Cookies.get_dict()``, for compatibility with ``requests``.
 41 |     - Fixed type conversion in C shim, by @qishipai.
 42 |     - Fixed cookie ``subdomains`` attribute.
 43 | - v0.7.0
 44 |     - Added more recent impersonate versions, up to Chrome 124.
 45 |     - Upgraded ``libcurl`` to 8.7.1.
 46 |     - Supported custom impersonation.
 47 |     - Added support for list of tuple in post fields.
 48 |     - Updated header strategy: always exclude empty headers, never send Expect header.
 49 |     - Changed default redirect limit to 30.
 50 |     - Prefer not sending CONNECT for plain http proxy.
 51 |     - Fix Windows build.
 52 |     - Fix Safari Stream priority.
 53 | 
 54 | 
 55 | The minimum Python version is now 3.8. Windows fingerprints are wrong in 0.6.x.
 56 | 
 57 | - v0.6.1
 58 |     - ``AsyncSession.close`` is now a coroutine.
 59 |     - This is a bugfix release.
 60 | - v0.6.0
 61 |     - Added more recent impersonate versions, up to Chrome 120 and Safari 17.0
 62 |     - Upgraded libcurl to 8.1.1
 63 |     - Added experimental websocket support
 64 |     - Supported proactive eventloop on Windows
 65 |     - Added win32 and macOS arm64 build targets
 66 |     - Added `allow_redirects` to Session parameters
 67 |     - Use certifi to replace packaged cacert.pem
 68 |     - Improved proxy support by accepting `proxy=...`
 69 |     - Bumped minimum python versiont to 3.8
 70 |     - Added files support
 71 |     - Added client certs support
 72 |     - Incorporated build time files for sdist
 73 |     - Bugfix: async curl timer leak
 74 | 
 75 | 
 76 | - v0.5.10
 77 |     - Add stream support
 78 |     - Add support for secure cookies
 79 |     - Add curl_infos to extract extra info after performing
 80 |     - Bugfix: `timeout=None` not working
 81 | - v0.5.9
 82 |     - Add interface support
 83 |     - Make POST work as in real world
 84 |     - Add support for custom resolve
 85 |     - Switched to libcurl's COOKIELIST to sync cookies between python and curl
 86 |     - Add default_headers option for sessions like in curl-impersonate
 87 |     - Add curl_options for extra curl_options in Session
 88 |     - Add http_version option for limiting http version to 1.1 or whatever
 89 |     - Add debug option for extra curl debug info
 90 |     - Add CurlError.code
 91 |     - Bugfix: duplicated header lines for the same header
 92 |     - Bugfix: clearing headers when request fails
 93 |     - Bugfix: fix HEAD request
 94 |     - Bugfix: reset curl options when errors occur
 95 | - v0.5.7
 96 |     - Refactor JSON serialization to mimic browser behavior (#66)
 97 |     - Add http options to Session classes (#72)
 98 |     - Add Windows eventloop warning
 99 | - v0.5.6
100 |     - Make Session.curl a thread-local variable (#50)
101 |     - Add support for eventlet and gevent with threadpool
102 |     - Bugfix: Only close future if it's not done or cancelled
103 | - 0.5.5
104 |     - Bugfix: Fix high CPU usage (#46)
105 | - 0.5.4
106 |     - Bugfix: Fix cert and error buffer when calling curl_easy_reset
107 | - 0.5.3
108 |     - Bugfix: Reset curl after performing, fix #39
109 | - 0.5.2
110 |     - Bugfix: Clear headers after async perform
111 | - 0.5.1
112 |     - Bugfix: Clean up timerfunction when curl already closed
113 | - 0.5.0
114 |     - Added asyncio support
115 | 
116 | 
117 | - 0.4.0
118 |     - Removed c shim callback function, use cffi native callback function
119 | 
120 | 
121 | - 0.3.6
122 |     - Updated to curl-impersonate v0.5.4, supported chrome107 and chrome110
123 | - 0.3.0, copied more code from `httpx` to support session
124 |     - Add `requests.Session`
125 |     - Breaking change: `Response.cookies` changed from `http.cookies.SimpleCookie` to `curl_cffi.requests.Cookies`
126 |     - Using ABI3 wheels to reduce package size.
127 | 
128 | 


--------------------------------------------------------------------------------
/benchmark/benchmark.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import queue
  3 | import threading
  4 | import time
  5 | from io import BytesIO
  6 | 
  7 | import aiohttp
  8 | import httpx
  9 | import pandas as pd
 10 | import pycurl
 11 | import requests
 12 | import tls_client
 13 | 
 14 | import curl_cffi
 15 | import curl_cffi.requests
 16 | 
 17 | # import uvloop
 18 | # uvloop.install()
 19 | 
 20 | results = []
 21 | 
 22 | 
 23 | class FakePycurlSession:
 24 |     def __init__(self):
 25 |         self.c = pycurl.Curl()
 26 | 
 27 |     def get(self, url):
 28 |         buffer = BytesIO()
 29 |         self.c.setopt(pycurl.URL, url)
 30 |         self.c.setopt(pycurl.WRITEDATA, buffer)
 31 |         self.c.perform()
 32 | 
 33 |     def __del__(self):
 34 |         self.c.close()
 35 | 
 36 | 
 37 | class FakeCurlCffiSession:
 38 |     def __init__(self):
 39 |         self.c = curl_cffi.Curl()
 40 | 
 41 |     def get(self, url):
 42 |         buffer = BytesIO()
 43 |         self.c.setopt(curl_cffi.CurlOpt.URL, url)
 44 |         self.c.setopt(curl_cffi.CurlOpt.WRITEDATA, buffer)
 45 |         self.c.perform()
 46 | 
 47 |     def __del__(self):
 48 |         self.c.close()
 49 | 
 50 | 
 51 | for size in ["1k", "20k", "200k"]:
 52 |     stats = {}
 53 |     url = "http://localhost:8000/" + size
 54 | 
 55 |     for name, SessionClass in [
 56 |         ("requests", requests.Session),
 57 |         ("httpx_sync", httpx.Client),
 58 |         ("tls_client", tls_client.Session),
 59 |         ("curl_cffi_sync", curl_cffi.requests.Session),
 60 |         ("curl_cffi_raw", FakeCurlCffiSession),
 61 |         ("pycurl", FakePycurlSession),
 62 |     ]:
 63 |         s = SessionClass()
 64 |         start = time.time()
 65 |         for _ in range(1000):
 66 |             s.get(url)
 67 |         dur = time.time() - start
 68 |         stats[name] = dur
 69 |         results.append({"name": name, "size": size, "duration": dur})
 70 | 
 71 |     print(f"One worker, {size}: {stats}")
 72 | 
 73 | df = pd.DataFrame(results)
 74 | df.to_csv("single_worker.csv", index=False, float_format="%.4f")
 75 | 
 76 | results = []
 77 | 
 78 | 
 79 | def worker(q, done, SessionClass):
 80 |     s = SessionClass()
 81 |     while not done.is_set():
 82 |         try:
 83 |             url = q.get_nowait()
 84 |         except Exception:
 85 |             continue
 86 |         s.get(url)
 87 |         q.task_done()
 88 | 
 89 | 
 90 | async def aiohttp_worker(q, done, s):
 91 |     while not done.is_set():
 92 |         url = await q.get()
 93 |         async with s.get(url) as response:
 94 |             await response.read()
 95 |         q.task_done()
 96 | 
 97 | 
 98 | async def httpx_worker(q, done, s):
 99 |     while not done.is_set():
100 |         url = await q.get()
101 |         await s.get(url)
102 |         q.task_done()
103 | 
104 | 
105 | for size in ["1k", "20k", "200k"]:
106 |     url = "http://localhost:8000/" + size
107 |     stats = {}
108 |     for name, SessionClass in [
109 |         ("requests", requests.Session),
110 |         ("httpx_sync", httpx.Client),
111 |         ("tls_client", tls_client.Session),
112 |         ("curl_cffi_sync", curl_cffi.requests.Session),
113 |         ("curl_cffi_raw", FakeCurlCffiSession),
114 |         ("pycurl", FakePycurlSession),
115 |     ]:
116 |         q = queue.Queue()
117 |         for _ in range(1000):
118 |             q.put(url)
119 |         done = threading.Event()
120 |         start = time.time()
121 |         threads = []
122 |         for _ in range(10):
123 |             t = threading.Thread(target=worker, args=(q, done, SessionClass))
124 |             threads.append(t)
125 |             t.start()
126 |         q.join()
127 |         done.set()
128 |         dur = time.time() - start
129 |         stats[name] = dur
130 |         results.append({"name": name, "size": size, "duration": dur})
131 |         for t in threads:
132 |             t.join()
133 |     # print(stats)
134 | 
135 |     async def test_asyncs_workers(url, size, stats):
136 |         for name, worker, SessionClass in [
137 |             ("aiohttp", aiohttp_worker, aiohttp.ClientSession),
138 |             ("httpx_async", httpx_worker, httpx.AsyncClient),
139 |             ("curl_cffi_async", httpx_worker, curl_cffi.requests.AsyncSession),
140 |         ]:
141 |             q = asyncio.Queue()
142 |             for _ in range(1000):
143 |                 await q.put(url)
144 |             done = asyncio.Event()
145 |             start = time.time()
146 |             workers = []
147 |             async with SessionClass() as s:
148 |                 for _ in range(10):
149 |                     w = asyncio.create_task(worker(q, done, s))
150 |                     workers.append(w)
151 |                 await q.join()
152 |                 done.set()
153 |             dur = time.time() - start
154 |             stats[name] = dur
155 |             results.append({"name": name, "size": size, "duration": dur})
156 |             for w in workers:
157 |                 w.cancel()
158 | 
159 |     asyncio.run(test_asyncs_workers(url, size, stats))
160 |     print(f"10 Workers, {size}: {stats}")
161 | 
162 | df = pd.DataFrame(results)
163 | df.to_csv("multiple_workers.csv", index=False, float_format="%.4f")
164 | 


--------------------------------------------------------------------------------
/benchmark/ws_bench_utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Cross-platform utility code for the WebSocket benchmarks.
  4 | """
  5 | 
  6 | import asyncio
  7 | import os
  8 | from collections.abc import AsyncGenerator, Generator
  9 | from dataclasses import dataclass
 10 | from ipaddress import IPv4Address
 11 | from logging import DEBUG, Formatter, Logger, StreamHandler, getLogger
 12 | from pathlib import Path
 13 | from ssl import PROTOCOL_TLS_SERVER, SSLContext
 14 | from typing import TextIO
 15 | 
 16 | 
 17 | def get_logger() -> Logger:
 18 |     """Setup the logger.
 19 | 
 20 |     Returns:
 21 |         Logger: Initialized logger object
 22 |     """
 23 |     console: Logger = getLogger(name=__name__)
 24 |     console_handler: StreamHandler[TextIO] = StreamHandler()
 25 |     console.setLevel(level=DEBUG)
 26 |     console_handler.setLevel(level=DEBUG)
 27 |     formatter: Formatter = Formatter(
 28 |         fmt="[%(asctime)s] [%(levelname)s] %(message)s", datefmt="%d-%m-%Y %H:%M:%S"
 29 |     )
 30 |     console_handler.setFormatter(fmt=formatter)
 31 |     console.addHandler(hdlr=console_handler)
 32 |     return console
 33 | 
 34 | 
 35 | # Initialize logger
 36 | logger: Logger = get_logger()
 37 | 
 38 | 
 39 | def get_ssl_ctx(cert_file: Path, cert_key: Path) -> SSLContext | None:
 40 |     """Load in the SSL context if cert files present and host is non-Windows.
 41 | 
 42 |     Returns:
 43 |         `SSLContext | None`: The SSL context or `None`.
 44 |     """
 45 | 
 46 |     if not (cert_file.is_file() and cert_key.is_file()):
 47 |         logger.warning(
 48 |             "Certificate file(s) %s or %s not found, disabling TLS",
 49 |             cert_file,
 50 |             cert_key,
 51 |         )
 52 |         return None
 53 | 
 54 |     ssl_context: SSLContext = SSLContext(PROTOCOL_TLS_SERVER)
 55 |     ssl_context.load_cert_chain(cert_file, cert_key)
 56 |     return ssl_context
 57 | 
 58 | 
 59 | @dataclass
 60 | class TestConfig:
 61 |     """
 62 |     Configuration values, should be changed as needed.
 63 |     """
 64 | 
 65 |     total_gb: int = 10
 66 |     chunk_size: int = 65536
 67 |     large_chunk_size: int = 4 * 1024**2
 68 |     total_bytes: int = total_gb * 1024**3
 69 |     recv_queue: int = 512
 70 |     send_queue: int = 128
 71 |     cert_file: Path = Path("localhost.crt")
 72 |     cert_key: Path = Path("localhost.key")
 73 |     data_filename: Path = Path("testdata.bin")
 74 |     hash_filename: Path = data_filename.with_suffix(".hash")
 75 |     srv_host: IPv4Address = IPv4Address("127.0.0.1")
 76 |     srv_port: int = 4443
 77 |     ssl_ctx: SSLContext | None = get_ssl_ctx(cert_file, cert_key)
 78 |     proto: str = "wss://" if ssl_ctx else "ws://"
 79 |     srv_path: str = f"{proto}{srv_host}:{srv_port}/ws"
 80 | 
 81 | 
 82 | # Initialize config object
 83 | config: TestConfig = TestConfig()
 84 | 
 85 | 
 86 | async def binary_data_generator(
 87 |     total_gb: float, chunk_size: int
 88 | ) -> AsyncGenerator[bytes]:
 89 |     """An asynchronous generator that yields chunks of binary data efficiently
 90 |     for benchmarking. It generates one chunk of random data and reuses it to
 91 |     eliminate chunk generation overhead as a performance factor.
 92 | 
 93 |     Args:
 94 |         total_gb (`float`): The total amount of data to generate.
 95 |         chunk_size (`int`): Data should be yielded in chunks of this size.
 96 | 
 97 |     Yields:
 98 |         `Iterator[AsyncGenerator[bytes, None]]`: Chunks until total size is reached.
 99 |     """
100 |     bytes_to_send: int = int(total_gb * 1024**3)
101 |     bytes_sent = 0
102 | 
103 |     # Create one reusable chunk of random data to avoid calling os.urandom() in a loop.
104 |     reusable_chunk = os.urandom(chunk_size)
105 |     while bytes_sent < bytes_to_send:
106 |         # Calculate the size of the next chunk to send
107 |         current_chunk_size = min(chunk_size, bytes_to_send - bytes_sent)
108 | 
109 |         # If it's a full-sized chunk, yield the reusable one. Otherwise, yield a slice.
110 |         if current_chunk_size == chunk_size:
111 |             yield reusable_chunk
112 |         else:
113 |             yield reusable_chunk[:current_chunk_size]
114 |         bytes_sent += current_chunk_size
115 | 
116 | 
117 | def get_loop() -> asyncio.AbstractEventLoop:
118 |     """Returns the correct event loop for the platform and what's installed.
119 | 
120 |     Returns:
121 |         asyncio.AbstractEventLoop: The created and installed event loop.
122 |     """
123 | 
124 |     try:
125 |         # pylint: disable-next=import-outside-toplevel
126 |         import uvloop
127 | 
128 |         loop: asyncio.AbstractEventLoop = uvloop.new_event_loop()
129 |     except ImportError:
130 |         loop = asyncio.new_event_loop()
131 | 
132 |     asyncio.set_event_loop(loop)
133 |     return loop
134 | 
135 | 
136 | def generate_random_chunks() -> Generator[bytes]:
137 |     """Generate chunks of random data up to a total size.
138 | 
139 |     Returns:
140 |         Generator[bytes]: Generator that yields random chunks.
141 |     """
142 |     num_chunks: int = config.total_bytes // config.large_chunk_size
143 |     for _ in range(num_chunks):
144 |         yield os.urandom(config.large_chunk_size)
145 | 
146 |     remaining_size = config.total_bytes % config.large_chunk_size
147 |     if remaining_size > 0:
148 |         yield os.urandom(remaining_size)
149 | 


--------------------------------------------------------------------------------
/benchmark/README.md:
--------------------------------------------------------------------------------
  1 | Benchmark
  2 | ======
  3 | 
  4 | benchmark between curl_cffi and other python http clients
  5 | 
  6 | Sync clients
  7 | ------
  8 | 
  9 | - curl_cffi
 10 | - requests
 11 | - pycurl
 12 | - [python-tls-client](https://github.com/FlorianREGAZ/Python-Tls-Client.git)
 13 | - httpx
 14 | 
 15 | Async clients
 16 | ------
 17 | 
 18 | - curl_cffi
 19 | - httpx
 20 | - aiohttp
 21 | 
 22 | Target
 23 | ------
 24 | 
 25 | All the clients run with session/client enabled.
 26 | 
 27 | Async WebSocket
 28 | ------
 29 | 
 30 | Two distinct benchmarks are provided to evaluate the performance of the `AsyncWebSocket` implementation under different conditions.
 31 | 
 32 | 1. Simple Throughput Test ([`client`](ws_bench_1_client.py), [`server`](ws_bench_1_server.py))
 33 | 
 34 |     This is a lightweight, in-memory benchmark designed to measure the raw throughput and overhead of the WebSocket client. The server sends a repeating chunk of random bytes from memory, and the client receives it. This test is useful for quick sanity checks and detecting performance regressions under ideal, CPU-cached conditions.
 35 | 
 36 | 2. Verified Streaming Test ([`benchmark`](ws_bench_2.py))
 37 | 
 38 |     This is a rigorous, end-to-end test. It first generates a multi-gigabyte file of random data and its SHA256 hash. The benchmark then streams this file from disk over the WebSocket connection. The receiving end calculates the hash of the incoming stream and verifies it against the original, ensuring complete data integrity.
 39 | 
 40 |     **Important**: This test requires enough RAM free on the system equal to the size of the random data. It measures the performance of the entire system pipeline, including Disk I/O speed, CPU hashing speed, and network transfer. On modern systems, it is likely to be bottlenecked by the CPU's hashing performance or the disk's read speed.
 41 | 
 42 | ### Prerequisites
 43 | 
 44 | - Python 3.10+
 45 | - Pip packages
 46 | 
 47 | ```bash
 48 | pip install aiohttp curl_cffi
 49 | ```
 50 | 
 51 | > `uvloop` is highly recommended for performance on Linux and macOS. The benchmarks will automatically fall back to the standard asyncio event loop if it is not installed or on Windows.
 52 | 
 53 | ### Setup
 54 | 
 55 | 1. TLS certificate (optional)
 56 | 
 57 |     These benchmarks are configured to use WSS (secure WebSockets) by default on Linux and macOS. To generate a self-signed certificate:
 58 | 
 59 |     ```bash
 60 |     openssl req -x509 -newkey rsa:2048 -nodes -keyout localhost.key -out localhost.crt -days 365 -subj "/CN=localhost"
 61 |     ```
 62 | 
 63 |     > **Note**: If you are on any platform and skip certificate generation, the benchmarks will use the insecure `ws://` instead.
 64 | 
 65 | 2. Configuration
 66 | 
 67 |     The benchmark parameters (total data size, chunk size) can be modified by editing the `TestConfig` class within the [`ws_bench_utils.py`](ws_bench_utils.py) file. By default, both benchmarks are configured for `10 GiB` of data transfer.
 68 | 
 69 | ### Running the Benchmarks
 70 | 
 71 | It is recommended to run the server and client in separate terminal windows.
 72 | 
 73 | #### Benchmark 1: Simple Throughput Test
 74 | 
 75 | 1. Start the Server:
 76 | 
 77 |     ```bash
 78 |     python ws_bench_1_server.py
 79 |     ```
 80 | 
 81 | 2. Run the Client:
 82 | 
 83 |     ```bash
 84 |     python ws_bench_1_client.py
 85 |     ```
 86 | 
 87 | #### Benchmark 2: Verified Streaming Test
 88 | 
 89 | 1. Generate Test File (Initial Setup):
 90 | 
 91 |     This command will create a large (`10 GiB`) file named `testdata.bin` and its hash. Ensure you have sufficient disk space:
 92 | 
 93 |     ```bash
 94 |     python ws_bench_2.py generate
 95 |     ```
 96 | 
 97 | 2. Start the Server:
 98 | 
 99 |     ```bash
100 |     python ws_bench_2.py server
101 |     ```
102 | 
103 | 3. Run the Client (Choose one):
104 | 
105 |     - To test download speed (server sends, client receives):
106 | 
107 |     ```bash
108 |     python ws_bench_2.py client --test download
109 |     ```
110 | 
111 |     - To test upload speed (client sends, server receives):
112 | 
113 |     ```bash
114 |     python ws_bench_2.py client --test upload
115 |     ```
116 | 
117 | ### Performance Considerations
118 | 
119 | Benchmark results can vary significantly based on system-level factors. The following should be kept in mind:
120 | 
121 | - **Loopback Interface**: These tests run on a local loopback interface (`127.0.0.1`), which does not represent real-world internet conditions (latency, packet loss, etc.).
122 | 
123 | - **CPU Affinity**: For maximum consistency, especially on multi-core or multi-CPU (NUMA) systems, you can pin the server and client processes to specific CPU cores. This avoids performance penalties from processes migrating between cores or crossing CPU socket boundaries.
124 | 
125 | **On Linux:**
126 | Use `taskset` to specify a CPU core (e.g., core 0 for the server, core 1 for the client).
127 | 
128 | ```bash
129 | # Terminal 1
130 | taskset -c 0 python ws_bench_1_server.py
131 | 
132 | # Terminal 2
133 | taskset -c 1 python ws_bench_1_client.py
134 | ```
135 | 
136 | **On Windows:**
137 | Use the `start /affinity` command. The affinity mask is a hexadecimal number (`1` for CPU 0, `2` for CPU 1, `4` for CPU 2, etc.).
138 | 
139 | ```powershell
140 | # PowerShell/CMD 1
141 | start /affinity 1 python ws_bench_1_server.py
142 | 
143 | # PowerShell/CMD 2
144 | start /affinity 2 python ws_bench_1_client.py
145 | ```
146 | 
147 | - **Concurrent Tests**: The first benchmark code (`ws_bench_1_client.py`) can be uncommented to run upload and download tests concurrently. Note that a concurrent test will terminate as soon as the faster of the two directions (typically download) completes.
148 | 
149 | - **Queue Sizes**: Adjust the `send_queue` and `recv_queue` sizes within the [`TestConfig`](ws_bench_utils.py) class to observe the impact on performance and backpressure.
150 | 


--------------------------------------------------------------------------------
/scripts/build.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import platform
  4 | import shutil
  5 | import struct
  6 | import tempfile
  7 | from glob import glob
  8 | from pathlib import Path
  9 | from urllib.request import urlretrieve
 10 | 
 11 | from cffi import FFI
 12 | 
 13 | # this is the upstream libcurl-impersonate version
 14 | __version__ = "1.2.5"
 15 | 
 16 | 
 17 | def detect_arch():
 18 |     with open(Path(__file__).parent.parent / "libs.json") as f:
 19 |         archs = json.loads(f.read())
 20 | 
 21 |     uname = platform.uname()
 22 |     glibc_flavor = "gnueabihf" if uname.machine in ["armv7l", "armv6l"] else "gnu"
 23 | 
 24 |     libc, _ = platform.libc_ver()
 25 |     # https://github.com/python/cpython/issues/87414
 26 |     libc = glibc_flavor if libc == "glibc" else "musl"
 27 |     pointer_size = struct.calcsize("P") * 8
 28 | 
 29 |     for arch in archs:
 30 |         if (
 31 |             arch["system"] == uname.system
 32 |             and arch["machine"] == uname.machine
 33 |             and arch["pointer_size"] == pointer_size
 34 |             and ("libc" not in arch or arch.get("libc") == libc)
 35 |         ):
 36 |             if arch["libdir"]:
 37 |                 arch["libdir"] = os.path.expanduser(arch["libdir"])
 38 |             else:
 39 |                 global tmpdir
 40 |                 if "CI" in os.environ:
 41 |                     tmpdir = "./tmplibdir"
 42 |                     os.makedirs(tmpdir, exist_ok=True)
 43 |                     arch["libdir"] = tmpdir
 44 |                 else:
 45 |                     tmpdir = tempfile.TemporaryDirectory()
 46 |                     arch["libdir"] = tmpdir.name
 47 |             return arch
 48 |     raise Exception(f"Unsupported arch: {uname}")
 49 | 
 50 | 
 51 | arch = detect_arch()
 52 | print(f"Using {arch['libdir']} to store libcurl-impersonate")
 53 | 
 54 | 
 55 | def download_libcurl():
 56 |     if (Path(arch["libdir"]) / arch["so_name"]).exists():
 57 |         print(".so files already downloaded.")
 58 |         return
 59 | 
 60 |     file = "libcurl-impersonate.tar.gz"
 61 |     sysname = "linux-" + arch["libc"] if arch["system"] == "Linux" else arch["sysname"]
 62 | 
 63 |     url = (
 64 |         f"https://github.com/lexiforest/curl-impersonate/releases/download/"
 65 |         f"v{__version__}/libcurl-impersonate-v{__version__}"
 66 |         f".{arch['so_arch']}-{sysname}.tar.gz"
 67 |     )
 68 | 
 69 |     print(f"Downloading libcurl-impersonate from {url}...")
 70 |     urlretrieve(url, file)
 71 | 
 72 |     print("Unpacking downloaded files...")
 73 |     os.makedirs(arch["libdir"], exist_ok=True)
 74 |     shutil.unpack_archive(file, arch["libdir"])
 75 | 
 76 |     if arch["system"] == "Windows":
 77 |         for file in glob(os.path.join(arch["libdir"], "lib/*.lib")):
 78 |             shutil.move(file, arch["libdir"])
 79 |         for file in glob(os.path.join(arch["libdir"], "bin/*.dll")):
 80 |             shutil.move(file, arch["libdir"])
 81 | 
 82 |     print("Files after unpacking")
 83 |     print(os.listdir(arch["libdir"]))
 84 | 
 85 | 
 86 | def get_curl_archives():
 87 |     print("Files for linking")
 88 |     print(os.listdir(arch["libdir"]))
 89 |     if arch["system"] == "Linux" and arch.get("link_type") == "static":
 90 |         # note that the order of libraries matters
 91 |         # https://stackoverflow.com/a/36581865
 92 |         return [
 93 |             f"{arch['libdir']}/libcurl-impersonate.a",
 94 |             f"{arch['libdir']}/libssl.a",
 95 |             f"{arch['libdir']}/libcrypto.a",
 96 |             f"{arch['libdir']}/libz.a",
 97 |             f"{arch['libdir']}/libzstd.a",
 98 |             f"{arch['libdir']}/libnghttp2.a",
 99 |             f"{arch['libdir']}/libngtcp2.a",
100 |             f"{arch['libdir']}/libngtcp2_crypto_boringssl.a",
101 |             f"{arch['libdir']}/libnghttp3.a",
102 |             f"{arch['libdir']}/libbrotlidec.a",
103 |             f"{arch['libdir']}/libbrotlienc.a",
104 |             f"{arch['libdir']}/libbrotlicommon.a",
105 |             f"{arch['libdir']}/libcares.a",
106 |         ]
107 |     else:
108 |         return []
109 | 
110 | 
111 | def get_curl_libraries():
112 |     if arch["system"] == "Windows":
113 |         return [
114 |             "Crypt32",
115 |             "Secur32",
116 |             "wldap32",
117 |             "Normaliz",
118 |             "libcurl",
119 |             "zstd",
120 |             "zlib",
121 |             "ssl",
122 |             "nghttp2",
123 |             "nghttp3",
124 |             "ngtcp2",
125 |             "ngtcp2_crypto_boringssl",
126 |             "crypto",
127 |             "brotlienc",
128 |             "brotlidec",
129 |             "brotlicommon",
130 |             "iphlpapi",
131 |             "cares",
132 |         ]
133 |     elif arch["system"] == "Darwin" or (
134 |         arch["system"] == "Linux" and arch.get("link_type") == "dynamic"
135 |     ):
136 |         return ["curl-impersonate"]
137 |     else:
138 |         return []
139 | 
140 | 
141 | ffibuilder = FFI()
142 | system = platform.system()
143 | root_dir = Path(__file__).parent.parent
144 | download_libcurl()
145 | 
146 | 
147 | ffibuilder.set_source(
148 |     "curl_cffi._wrapper",
149 |     """
150 |         #include "shim.h"
151 |     """,
152 |     # FIXME from `curl-impersonate`
153 |     libraries=get_curl_libraries(),
154 |     extra_objects=get_curl_archives(),
155 |     library_dirs=[arch["libdir"]],
156 |     source_extension=".c",
157 |     include_dirs=[
158 |         str(root_dir / "include"),
159 |         str(root_dir / "ffi"),
160 |         str(Path(arch["libdir"]) / "include"),
161 |     ],
162 |     sources=[
163 |         str(root_dir / "ffi/shim.c"),
164 |     ],
165 |     extra_compile_args=(
166 |         ["-Wno-implicit-function-declaration"] if system == "Darwin" else []
167 |     ),
168 |     extra_link_args=(["-lstdc++"] if system != "Windows" else []),
169 | )
170 | 
171 | with open(root_dir / "ffi/cdef.c") as f:
172 |     cdef_content = f.read()
173 |     ffibuilder.cdef(cdef_content)
174 | 
175 | 
176 | if __name__ == "__main__":
177 |     ffibuilder.compile(verbose=False)
178 | 


--------------------------------------------------------------------------------
/benchmark/ws_bench_1_client.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Websocket client simple benchmark - TLS (WSS)
  4 | """
  5 | 
  6 | import time
  7 | from asyncio import (
  8 |     FIRST_COMPLETED,
  9 |     AbstractEventLoop,
 10 |     CancelledError,
 11 |     Task,
 12 |     sleep,
 13 |     wait,
 14 | )
 15 | 
 16 | from typing_extensions import Never
 17 | from ws_bench_utils import binary_data_generator, config, get_loop, logger
 18 | 
 19 | from curl_cffi import AsyncSession, AsyncWebSocket, WebSocketClosed
 20 | 
 21 | 
 22 | def calculate_stats(start_time: float, total_len: int) -> tuple[float, float]:
 23 |     """Calculate the amount of time it took and the throughput average.
 24 | 
 25 |     Args:
 26 |         start_time (`float`): The start time from the performance counter
 27 | 
 28 |     Returns:
 29 |         `tuple[float, float]`: The duration and rate in Gbps
 30 |     """
 31 |     end_time: float = time.perf_counter()
 32 |     duration: float = end_time - start_time
 33 |     rate_gbps: float = (total_len * 8) / duration / (1024**3)
 34 |     return duration, rate_gbps
 35 | 
 36 | 
 37 | async def health_check() -> Never:
 38 |     """A simple coroutine that continuously prints a dot to prove that the event loop
 39 |     is alive and not starved from being able to run this task.
 40 | 
 41 |     Returns:
 42 |         Never: Keeps printing dots until the task is cancelled.
 43 |     """
 44 |     counter = 0
 45 |     logger.info("Starting sanity check. You should see a continuous stream of dots '.'")
 46 |     logger.info("If the dots stop for a long time, the event loop is blocked.")
 47 |     try:
 48 |         while True:
 49 |             await sleep(0.05)
 50 |             print(".", end="", flush=True)
 51 |             counter += 1
 52 |             if counter % 100 == 0:
 53 |                 print("")
 54 |     finally:
 55 |         print("\r\x1b[K", end="")
 56 |         logger.info("Sanity check complete.")
 57 | 
 58 | 
 59 | async def ws_counter(ws: AsyncWebSocket) -> None:
 60 |     """Simple coroutine which counts how many bytes were received.
 61 | 
 62 |     Args:
 63 |         ws (`AsyncWebSocket`): Instantiated Curl CFFI AsyncWebSocket object.
 64 |     """
 65 |     recvd_len: int = 0
 66 |     start_time: float = time.perf_counter()
 67 |     logger.info("Receiving data from server")
 68 |     try:
 69 |         async for msg in ws:
 70 |             recvd_len += len(msg)
 71 | 
 72 |     except WebSocketClosed as exc:
 73 |         logger.debug(exc)
 74 | 
 75 |     finally:
 76 |         duration, avg_rate = calculate_stats(start_time, recvd_len)
 77 |         print("\r\x1b[K", end="")
 78 |         logger.info(
 79 |             "Received: %.2f GB in %.2f seconds", recvd_len / (1024**3), duration
 80 |         )
 81 |         logger.info("Average throughput (recv): %.2f Gbps", avg_rate)
 82 | 
 83 | 
 84 | async def ws_sender(ws: AsyncWebSocket) -> None:
 85 |     """Simple coroutine which just sends the same chunk of bytes until exhausted.
 86 | 
 87 |     Args:
 88 |         ws (`AsyncWebSocket`): Instantiated Curl CFFI AsyncWebSocket object.
 89 |     """
 90 |     sent_len: int = 0
 91 |     start_time: float = time.perf_counter()
 92 |     logger.info("Sending data to server")
 93 |     try:
 94 |         async for data_chunk in binary_data_generator(
 95 |             total_gb=config.total_gb, chunk_size=min(65535, config.chunk_size)
 96 |         ):
 97 |             _ = await ws.send(payload=data_chunk)
 98 |             sent_len += len(data_chunk)
 99 | 
100 |     except WebSocketClosed as exc:
101 |         logger.debug(exc)
102 | 
103 |     finally:
104 |         duration, avg_rate = calculate_stats(start_time, sent_len)
105 |         print("\r\x1b[K", end="")
106 |         logger.info("Sent: %.2f GB in %.2f seconds", sent_len / (1024**3), duration)
107 |         logger.info("Average throughput (send): %.2f Gbps", avg_rate)
108 | 
109 | 
110 | async def run_benchmark(loop: AbstractEventLoop) -> None:
111 |     """
112 |     Simple client benchmark which sends/receives binary messages using curl-cffi.
113 |     """
114 |     logger.info("Starting curl-cffi benchmark")
115 |     ws: AsyncWebSocket | None = None
116 |     waiters: set[Task[None]] = set()
117 |     try:
118 |         async with AsyncSession(impersonate="chrome", verify=False) as session:
119 |             ws = await session.ws_connect(
120 |                 config.srv_path,
121 |                 recv_queue_size=config.recv_queue,
122 |                 send_queue_size=config.send_queue,
123 |             )
124 |             logger.info("Connection established to %s", config.srv_path)
125 | 
126 |             # NOTE: Uncomment for send/recv benchmark or both
127 |             waiters.add(loop.create_task(ws_counter(ws)))
128 |             # waiters.add(loop.create_task(ws_sender(ws)))
129 | 
130 |             _, _ = await wait(waiters, return_when=FIRST_COMPLETED)
131 | 
132 |     except Exception:
133 |         logger.exception("curl-cffi benchmark failed")
134 |         raise
135 | 
136 |     finally:
137 |         for wait_task in waiters:
138 |             try:
139 |                 if not wait_task.done():
140 |                     _ = wait_task.cancel()
141 |                     await wait_task
142 | 
143 |             except CancelledError:
144 |                 ...
145 |         if ws:
146 |             await ws.close(timeout=2)
147 | 
148 | 
149 | async def main(loop: AbstractEventLoop) -> None:
150 |     """Entrypoint"""
151 |     waiters: set[Task[None]] = set()
152 | 
153 |     try:
154 |         # Create the health check and benchmark tasks
155 |         waiters.update(
156 |             {loop.create_task(health_check()), loop.create_task(run_benchmark(loop))}
157 |         )
158 |         _, _ = await wait(waiters, return_when=FIRST_COMPLETED)
159 | 
160 |     except (KeyboardInterrupt, CancelledError):
161 |         logger.debug("Cancelling benchmark")
162 | 
163 |     finally:
164 |         for wait_task in waiters:
165 |             try:
166 |                 if not wait_task.done():
167 |                     _ = wait_task.cancel()
168 |                     await wait_task
169 |             except CancelledError:
170 |                 ...
171 | 
172 | 
173 | if __name__ == "__main__":
174 |     evt_loop: AbstractEventLoop = get_loop()
175 |     try:
176 |         evt_loop.run_until_complete(main(evt_loop))
177 |     finally:
178 |         evt_loop.close()
179 | 


--------------------------------------------------------------------------------
/examples/stream.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | from contextlib import closing
  3 | 
  4 | import curl_cffi
  5 | 
  6 | try:
  7 |     # Python 3.10+
  8 |     from contextlib import aclosing  # pyright: ignore
  9 | except ImportError:
 10 |     from contextlib import asynccontextmanager
 11 | 
 12 |     @asynccontextmanager
 13 |     async def aclosing(thing):
 14 |         try:
 15 |             yield thing
 16 |         finally:
 17 |             await thing.aclose()
 18 | 
 19 | 
 20 | URL = "https://httpbin.org/stream/20"
 21 | 
 22 | with curl_cffi.Session() as s:
 23 |     print("\n======================================================================")
 24 |     print("Iterating over chunks")
 25 |     print("=====================================================================\n")
 26 |     r = s.get(URL, stream=True)
 27 |     for chunk in r.iter_content():
 28 |         print("Status: ", r.status_code)
 29 |         assert r.status_code == 200
 30 |         print("CHUNK", chunk)
 31 |     r.close()
 32 | 
 33 |     print("\n====================================================================")
 34 |     print("Empty body is fine.")
 35 |     print("====================================================================\n")
 36 |     response = s.get("https://httpbin.org/status/202", stream=True)
 37 |     print(response.status_code)
 38 |     response.close()
 39 | 
 40 |     print("\n====================================================================")
 41 |     print("Using with stream")
 42 |     print("====================================================================\n")
 43 |     with s.stream("GET", URL) as r:
 44 |         print("Status: ", r.status_code)
 45 |         for chunk in r.iter_content():
 46 |             assert r.status_code == 200
 47 |             print("CHUNK", chunk)
 48 | 
 49 |     print("\n=====================================================================")
 50 |     print("Iterating on a line basis")
 51 |     print("=====================================================================\n")
 52 |     r = s.get(URL, stream=True)
 53 |     print("Status: ", r.status_code)
 54 |     for line in r.iter_lines():
 55 |         assert r.status_code == 200
 56 |         print("LINE", line.decode())
 57 |     r.close()
 58 | 
 59 |     print("\n======================================================================")
 60 |     print("Break when reading")
 61 |     print("=====================================================================\n")
 62 |     r = s.get("https://httpbin.org/drip", stream=True)
 63 |     for idx, chunk in enumerate(r.iter_content()):
 64 |         print(f"{idx}={chunk.decode()}", end="#", flush=True)
 65 |         if idx == 3:
 66 |             break
 67 |     r.close()
 68 | 
 69 |     print("\n=====================================================================")
 70 |     print("Better, using closing to ensure the response is closed")
 71 |     print("=====================================================================\n")
 72 |     with closing(s.get(URL, stream=True)) as r:
 73 |         for chunk in r.iter_content():
 74 |             print("Status: ", r.status_code)
 75 |             assert r.status_code == 200
 76 |             print("CHUNK", chunk)
 77 | 
 78 | 
 79 | async def async_examples():
 80 |     async with curl_cffi.AsyncSession() as s:
 81 |         print("\n====================================================================")
 82 |         print("Using asyncio")
 83 |         print("====================================================================\n")
 84 |         r = await s.get(URL, stream=True)
 85 |         async for chunk in r.aiter_content():
 86 |             print("Status: ", r.status_code)
 87 |             assert r.status_code == 200
 88 |             print("CHUNK", chunk)
 89 |         await r.aclose()
 90 | 
 91 |         print("\n====================================================================")
 92 |         print("Empty body is fine.")
 93 |         print("====================================================================\n")
 94 |         response = await s.get("https://httpbin.org/status/202", stream=True)
 95 |         print(response.status_code)
 96 |         await response.aclose()
 97 | 
 98 |         print("\n====================================================================")
 99 |         print("Using asyncio async with stream")
100 |         print("====================================================================\n")
101 |         async with s.stream("GET", URL) as r:
102 |             async for chunk in r.aiter_content():
103 |                 print("Status: ", r.status_code)
104 |                 assert r.status_code == 200
105 |                 print("CHUNK", chunk)
106 | 
107 |         print(
108 |             "\n======================================================================"
109 |         )
110 |         print("Break when reading")
111 |         print("=====================================================================\n")
112 |         async with s.stream("GET", "https://httpbin.org/drip") as r:
113 |             idx = 0
114 |             async for chunk in r.aiter_content():
115 |                 idx += 1
116 |                 print(f"{idx}={chunk.decode()}", end="#", flush=True)
117 |                 if idx == 3:
118 |                     break
119 | 
120 |         print("\n====================================================================")
121 |         print("Stream, but not stream, await atext")
122 |         print("====================================================================\n")
123 |         async with s.stream("GET", URL) as r:
124 |             print(await r.atext())
125 | 
126 |         print("\n====================================================================")
127 |         print("Using asyncio async with stream")
128 |         print("====================================================================\n")
129 |         async with s.stream("GET", URL) as r:
130 |             async for chunk in r.aiter_content():
131 |                 print("Status: ", r.status_code)
132 |                 assert r.status_code == 200
133 |                 print("CHUNK", chunk)
134 | 
135 |         print("\n====================================================================")
136 |         print("Better, using aclosing to ensure the response is closed")
137 |         print("====================================================================\n")
138 |         async with aclosing(await s.get(URL.replace("20", "100"), stream=True)) as r:
139 |             async for chunk in r.aiter_content():
140 |                 print("Status: ", r.status_code)
141 |                 assert r.status_code == 200
142 |                 print("CHUNK", chunk)
143 | 
144 | 
145 | asyncio.run(async_examples())
146 | 


--------------------------------------------------------------------------------
/curl_cffi/requests/__init__.py:
--------------------------------------------------------------------------------
  1 | __all__ = [
  2 |     "Session",
  3 |     "AsyncSession",
  4 |     "BrowserType",
  5 |     "BrowserTypeLiteral",
  6 |     "CurlWsFlag",
  7 |     "request",
  8 |     "head",
  9 |     "get",
 10 |     "post",
 11 |     "put",
 12 |     "patch",
 13 |     "delete",
 14 |     "options",
 15 |     "RequestsError",
 16 |     "Cookies",
 17 |     "Headers",
 18 |     "Request",
 19 |     "Response",
 20 |     "AsyncWebSocket",
 21 |     "WebSocket",
 22 |     "WebSocketError",
 23 |     "WebSocketClosed",
 24 |     "WebSocketTimeout",
 25 |     "WsCloseCode",
 26 |     "ExtraFingerprints",
 27 |     "CookieTypes",
 28 |     "HeaderTypes",
 29 |     "ProxySpec",
 30 | ]
 31 | 
 32 | from typing import Optional, TYPE_CHECKING, TypedDict
 33 | 
 34 | from ..const import CurlWsFlag
 35 | from .cookies import Cookies, CookieTypes
 36 | from .errors import RequestsError
 37 | from .headers import Headers, HeaderTypes
 38 | from .impersonate import BrowserType, BrowserTypeLiteral, ExtraFingerprints
 39 | from .models import Request, Response
 40 | from .session import (
 41 |     AsyncSession,
 42 |     HttpMethod,
 43 |     ProxySpec,
 44 |     Session,
 45 |     ThreadType,
 46 |     RequestParams,
 47 |     Unpack,
 48 | )
 49 | from .websockets import (
 50 |     AsyncWebSocket,
 51 |     WebSocket,
 52 |     WebSocketClosed,
 53 |     WebSocketError,
 54 |     WebSocketTimeout,
 55 |     WsCloseCode,
 56 | )
 57 | 
 58 | if TYPE_CHECKING:
 59 | 
 60 |     class SessionRequestParams(RequestParams, total=False):
 61 |         thread: Optional[ThreadType]
 62 |         curl_options: Optional[dict]
 63 |         debug: Optional[bool]
 64 | else:
 65 |     SessionRequestParams = TypedDict
 66 | 
 67 | 
 68 | def request(
 69 |     method: HttpMethod,
 70 |     url: str,
 71 |     thread: Optional[ThreadType] = None,
 72 |     curl_options: Optional[dict] = None,
 73 |     debug: Optional[bool] = None,
 74 |     **kwargs: Unpack[RequestParams],
 75 | ) -> Response:
 76 |     """Send an http request.
 77 | 
 78 |     Parameters:
 79 |         method: http method for the request: GET/POST/PUT/DELETE etc.
 80 |         url: url for the requests.
 81 |         params: query string for the requests.
 82 |         data: form values(dict/list/tuple) or binary data to use in body,
 83 |             ``Content-Type: application/x-www-form-urlencoded`` will be added if a dict
 84 |             is given.
 85 |         json: json values to use in body, `Content-Type: application/json` will be added
 86 |             automatically.
 87 |         headers: headers to send.
 88 |         cookies: cookies to use.
 89 |         files: not supported, use ``multipart`` instead.
 90 |         auth: HTTP basic auth, a tuple of (username, password), only basic auth is
 91 |             supported.
 92 |         timeout: how many seconds to wait before giving up.
 93 |         allow_redirects: whether to allow redirection.
 94 |         max_redirects: max redirect counts, default 30, use -1 for unlimited.
 95 |         proxies: dict of proxies to use, prefer to use ``proxy`` if they are the same.
 96 |             format: ``{"http": proxy_url, "https": proxy_url}``.
 97 |         proxy: proxy to use, format: "http://user@pass:proxy_url".
 98 |             Can't be used with `proxies` parameter.
 99 |         proxy_auth: HTTP basic auth for proxy, a tuple of (username, password).
100 |         verify: whether to verify https certs.
101 |         referer: shortcut for setting referer header.
102 |         accept_encoding: shortcut for setting accept-encoding header.
103 |         content_callback: a callback function to receive response body.
104 |             ``def callback(chunk: bytes) -> None:``
105 |         impersonate: which browser version to impersonate.
106 |         ja3: ja3 string to impersonate.
107 |         akamai: akamai string to impersonate.
108 |         extra_fp: extra fingerprints options, in complement to ja3 and akamai strings.
109 |         thread: thread engine to use for working with other thread implementations.
110 |             choices: eventlet, gevent.
111 |         default_headers: whether to set default browser headers when impersonating.
112 |         default_encoding: encoding for decoding response content if charset is not found
113 |             in headers. Defaults to "utf-8". Can be set to a callable for automatic
114 |             detection.
115 |         quote: Set characters to be quoted, i.e. percent-encoded. Default safe string
116 |             is ``!#$%&'()*+,/:;=?@[]~``. If set to a sting, the character will be
117 |             removed from the safe string, thus quoted. If set to False, the url will be
118 |             kept as is, without any automatic percent-encoding, you must encode the URL
119 |             yourself.
120 |         curl_options: extra curl options to use.
121 |         http_version: limiting http version, defaults to http2.
122 |         debug: print extra curl debug info.
123 |         interface: which interface to use.
124 |         cert: a tuple of (cert, key) filenames for client cert.
125 |         stream: streaming the response, default False.
126 |         max_recv_speed: maximum receive speed, bytes per second.
127 |         multipart: upload files using the multipart format, see examples for details.
128 |         discard_cookies: discard cookies from server. Default to False.
129 | 
130 |     Returns:
131 |         A ``Response`` object.
132 |     """
133 |     debug = False if debug is None else debug
134 |     with Session(thread=thread, curl_options=curl_options, debug=debug) as s:
135 |         return s.request(method=method, url=url, **kwargs)
136 | 
137 | 
138 | def head(url: str, **kwargs: Unpack[SessionRequestParams]):
139 |     return request(method="HEAD", url=url, **kwargs)
140 | 
141 | 
142 | def get(url: str, **kwargs: Unpack[SessionRequestParams]):
143 |     return request(method="GET", url=url, **kwargs)
144 | 
145 | 
146 | def post(url: str, **kwargs: Unpack[SessionRequestParams]):
147 |     return request(method="POST", url=url, **kwargs)
148 | 
149 | 
150 | def put(url: str, **kwargs: Unpack[SessionRequestParams]):
151 |     return request(method="PUT", url=url, **kwargs)
152 | 
153 | 
154 | def patch(url: str, **kwargs: Unpack[SessionRequestParams]):
155 |     return request(method="PATCH", url=url, **kwargs)
156 | 
157 | 
158 | def delete(url: str, **kwargs: Unpack[SessionRequestParams]):
159 |     return request(method="DELETE", url=url, **kwargs)
160 | 
161 | 
162 | def options(url: str, **kwargs: Unpack[SessionRequestParams]):
163 |     return request(method="OPTIONS", url=url, **kwargs)
164 | 
165 | 
166 | def trace(url: str, **kwargs: Unpack[SessionRequestParams]):
167 |     return request(method="TRACE", url=url, **kwargs)
168 | 
169 | 
170 | def query(url: str, **kwargs: Unpack[SessionRequestParams]):
171 |     return request(method="QUERY", url=url, **kwargs)
172 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
  1 | API References
  2 | ======
  3 | 
  4 | curl low levl APIs
  5 | ------------------
  6 | 
  7 | Curl
  8 | ~~~~~~
  9 | 
 10 | .. autoclass:: curl_cffi.Curl
 11 | 
 12 |    .. automethod:: __init__
 13 |    .. automethod:: debug
 14 |    .. automethod:: setopt
 15 |    .. automethod:: getinfo
 16 |    .. automethod:: version
 17 |    .. automethod:: impersonate
 18 |    .. automethod:: perform
 19 |    .. automethod:: duphandle
 20 |    .. automethod:: reset
 21 |    .. automethod:: parse_cookie_headers
 22 |    .. automethod:: get_reason_phrase
 23 |    .. automethod:: parse_status_line
 24 |    .. automethod:: close
 25 |    .. automethod:: ws_recv
 26 |    .. automethod:: ws_send
 27 |    .. automethod:: ws_close
 28 | 
 29 | AsyncCurl
 30 | ~~~~~~
 31 | 
 32 | .. autoclass:: curl_cffi.AsyncCurl
 33 | 
 34 |    .. automethod:: __init__
 35 |    .. automethod:: add_handle
 36 |    .. automethod:: remove_handle
 37 |    .. automethod:: set_result
 38 |    .. automethod:: set_exception
 39 |    .. automethod:: setopt
 40 |    .. automethod:: socket_action
 41 |    .. automethod:: process_data
 42 |    .. automethod:: close
 43 | 
 44 | CurlMime
 45 | ~~~~~~
 46 | 
 47 | .. autoclass:: curl_cffi.CurlMime
 48 | 
 49 |    .. automethod:: __init__
 50 |    .. automethod:: addpart
 51 |    .. automethod:: from_list
 52 |    .. automethod:: attach
 53 |    .. automethod:: close
 54 | 
 55 | Constants
 56 | ~~~~~~~~~
 57 | 
 58 | Enum values used by ``setopt`` and ``getinfo`` can be accessed from ``CurlOpt`` and
 59 | ``CurlInfo``.
 60 | 
 61 | .. autoclass:: curl_cffi.CurlOpt
 62 | .. autoclass:: curl_cffi.CurlInfo
 63 | .. autoclass:: curl_cffi.CurlMOpt
 64 | .. autoclass:: curl_cffi.CurlECode
 65 | .. autoclass:: curl_cffi.CurlHttpVersion
 66 | .. autoclass:: curl_cffi.CurlWsFlag
 67 | .. autoclass:: curl_cffi.CurlSslVersion
 68 | 
 69 | requests-like API
 70 | -----------------
 71 | 
 72 | request method
 73 | ~~~~~~~~~~~~~~
 74 | 
 75 | ``requests.get``, ``requests.post``, etc are just aliases of ``.request(METHOD, ...)``
 76 | 
 77 | .. autofunction:: curl_cffi.requests.request
 78 | 
 79 | 
 80 | Sessions
 81 | ~~~~~~~
 82 | 
 83 | .. autoclass:: curl_cffi.requests.Session
 84 | 
 85 |    .. automethod:: __init__
 86 |    .. automethod:: request
 87 |    .. automethod:: stream
 88 |    .. automethod:: ws_connect
 89 | 
 90 | 
 91 | .. autoclass:: curl_cffi.requests.AsyncSession
 92 | 
 93 |    .. automethod:: __init__
 94 |    .. automethod:: request
 95 |    .. automethod:: stream
 96 |    .. automethod:: close
 97 |    .. automethod:: ws_connect
 98 | 
 99 | Headers
100 | ~~~~~~~
101 | 
102 | .. autoclass:: curl_cffi.requests.Headers
103 | 
104 |    .. autoproperty:: encoding
105 |    .. automethod:: raw
106 |    .. automethod:: multi_items
107 |    .. automethod:: get
108 |    .. automethod:: get_list
109 |    .. automethod:: update
110 |    .. automethod:: __getitem__
111 |    .. automethod:: __setitem__
112 |    .. automethod:: __delitem__
113 | 
114 | Cookies
115 | ~~~~~~~
116 | 
117 | .. autoclass:: curl_cffi.requests.Cookies
118 | 
119 |    .. automethod:: set
120 |    .. automethod:: get
121 |    .. automethod:: delete
122 |    .. automethod:: clear
123 |    .. automethod:: update
124 |    .. automethod:: __getitem__
125 |    .. automethod:: __setitem__
126 |    .. automethod:: __delitem__
127 | 
128 | Request, Response
129 | ~~~~~~
130 | 
131 | .. autoclass:: curl_cffi.requests.Request
132 | 
133 | .. autoclass:: curl_cffi.requests.Response
134 | 
135 |    .. automethod:: raise_for_status
136 |    .. automethod:: iter_lines
137 |    .. automethod:: iter_content
138 |    .. automethod:: json
139 |    .. automethod:: close
140 |    .. automethod:: aiter_lines
141 |    .. automethod:: aiter_content
142 |    .. automethod:: atext
143 |    .. automethod:: acontent
144 |    .. automethod:: aclose
145 | 
146 | Asyncio
147 | -------
148 | 
149 | WebSocket
150 | ---------
151 | 
152 | .. autoclass:: curl_cffi.requests.WebSocket
153 | 
154 |    .. automethod:: __init__
155 |    .. automethod:: connect
156 |    .. automethod:: recv_fragment
157 |    .. automethod:: recv
158 |    .. automethod:: recv_str
159 |    .. automethod:: recv_json
160 |    .. automethod:: send
161 |    .. automethod:: send_binary
162 |    .. automethod:: send_bytes
163 |    .. automethod:: send_str
164 |    .. automethod:: send_json
165 |    .. automethod:: ping
166 |    .. automethod:: run_forever
167 |    .. automethod:: close
168 | 
169 | .. autoclass:: curl_cffi.requests.AsyncWebSocket
170 | 
171 |    .. automethod:: __init__
172 |    .. automethod:: recv_fragment
173 |    .. automethod:: recv
174 |    .. automethod:: recv_str
175 |    .. automethod:: recv_json
176 |    .. automethod:: send
177 |    .. automethod:: send_binary
178 |    .. automethod:: send_bytes
179 |    .. automethod:: send_str
180 |    .. automethod:: send_json
181 |    .. automethod:: ping
182 |    .. automethod:: close
183 | 
184 | Exceptions and Warnings
185 | -----------------------
186 | 
187 | Exceptions
188 | ~~~~~~~~~~~~~~
189 | 
190 | We try to follow the `requests` exception hirearchy, however, some are missing, while
191 | some are added.
192 | 
193 | If an exception is marked as "not used", please catch the base exception.
194 | 
195 | 
196 | .. autoclass:: curl_cffi.requests.exceptions.RequestException
197 | .. autoclass:: curl_cffi.requests.exceptions.CookieConflict
198 | .. autoclass:: curl_cffi.requests.exceptions.SessionClosed
199 | .. autoclass:: curl_cffi.requests.exceptions.ImpersonateError
200 | .. autoclass:: curl_cffi.requests.exceptions.InvalidJSONError
201 | .. autoclass:: curl_cffi.requests.exceptions.HTTPError
202 | .. autoclass:: curl_cffi.requests.exceptions.IncompleteRead
203 | .. autoclass:: curl_cffi.requests.exceptions.ConnectionError
204 | .. autoclass:: curl_cffi.requests.exceptions.DNSError
205 | .. autoclass:: curl_cffi.requests.exceptions.ProxyError
206 | .. autoclass:: curl_cffi.requests.exceptions.SSLError
207 | .. autoclass:: curl_cffi.requests.exceptions.CertificateVerifyError
208 | .. autoclass:: curl_cffi.requests.exceptions.Timeout
209 | .. autoclass:: curl_cffi.requests.exceptions.ConnectTimeout
210 | .. autoclass:: curl_cffi.requests.exceptions.ReadTimeout
211 | .. autoclass:: curl_cffi.requests.exceptions.URLRequired
212 | .. autoclass:: curl_cffi.requests.exceptions.TooManyRedirects
213 | .. autoclass:: curl_cffi.requests.exceptions.MissingSchema
214 | .. autoclass:: curl_cffi.requests.exceptions.InvalidSchema
215 | .. autoclass:: curl_cffi.requests.exceptions.InvalidURL
216 | .. autoclass:: curl_cffi.requests.exceptions.InvalidHeader
217 | .. autoclass:: curl_cffi.requests.exceptions.InvalidProxyURL
218 | .. autoclass:: curl_cffi.requests.exceptions.ChunkedEncodingError
219 | .. autoclass:: curl_cffi.requests.exceptions.ContentDecodingError
220 | .. autoclass:: curl_cffi.requests.exceptions.StreamConsumedError
221 | .. autoclass:: curl_cffi.requests.exceptions.RetryError
222 | .. autoclass:: curl_cffi.requests.exceptions.UnrewindableBodyError
223 | .. autoclass:: curl_cffi.requests.exceptions.InterfaceError
224 | 
225 | Warnings
226 | ~~~~~~~~~~~~~~
227 | 
228 | .. autoclass:: curl_cffi.requests.exceptions.RequestsWarning
229 | .. autoclass:: curl_cffi.requests.exceptions.FileModeWarning
230 | .. autoclass:: curl_cffi.requests.exceptions.RequestsDependencyWarning
231 | 


--------------------------------------------------------------------------------
/docs/faq.rst:
--------------------------------------------------------------------------------
  1 | FAQ
  2 | ==========================
  3 | 
  4 | What does the pro version offer? Is the open source project still maintained?
  5 | ----------------------------------------------------------------------------
  6 | 
  7 | Yes, the open source project is maintained as before.
  8 | 
  9 | In the `pro version <https://impersonate.pro>`_, we provide:
 10 | 
 11 | - weekly update of targets
 12 | - profiles for mobile browsers and apps
 13 | - some private detection fields
 14 | - http/3 fingerprints and proxy support
 15 | 
 16 | And, a better financial situation will help the open source version better maintained.
 17 | 
 18 | Why does the JA3 fingerprints change for Chrome 110+ impersonation?
 19 | ------
 20 | 
 21 | This is intended.
 22 | 
 23 | Chrome introduces ``ClientHello`` permutation in version 110, which means the order of
 24 | extensions will be random, thus JA3 fingerprints will be random. So, when comparing
 25 | JA3 fingerprints of ``curl_cffi`` and a browser, they may differ. However, this does not
 26 | mean that TLS fingerprints will not be a problem, ``ClientHello`` extension order is just
 27 | one factor of how servers can tell automated requests from browsers.
 28 | 
 29 | Roughly, this can be mitigated like:
 30 | 
 31 | .. code-block::
 32 | 
 33 |     ja3 = md5(list(extensions), ...other arguments)
 34 |     ja3n = md5(set(extensions), ...other arguments)
 35 | 
 36 | See more from `this article <https://www.fastly.com/blog/a-first-look-at-chromes-tls-clienthello-permutation-in-the-wild>`_
 37 | and `curl-impersonate notes <https://github.com/lwthiker/curl-impersonate/pull/148>`_.
 38 | 
 39 | Can I bypass Cloudflare with this project? or any other specific site.
 40 | ------
 41 | 
 42 | Short answer is: it depends.
 43 | 
 44 | TLS and http2 fingerprints are just one of the many factors Cloudflare considers. Other
 45 | factors include but are not limited to: IP quality, request rate, JS fingerprints, etc.
 46 | 
 47 | There are different protection levels for website owners to choose. For the most basic
 48 | ones, TLS fingerprints alone maybe enough, but for higher levels, you may need to find
 49 | a better proxy IP provider and use browser automation tools like playwright.
 50 | 
 51 | If you are in a hurry or just want the professionals to take care of the hard parts,
 52 | you can consider the commercial solutions from our sponsors:
 53 | 
 54 | - `Yescaptcha <https://yescaptcha.com/i/stfnIO>`_, captcha resolver and proxy service for bypassing Cloudflare.
 55 | - `ScrapeNinja <https://scrapeninja.net/?utm_source=github&utm_medium=banner&utm_campaign=cffi>`_, Managed web scraping API.
 56 | 
 57 | For details, see the `Sponsor` section on front page.
 58 | 
 59 | 
 60 | I'm getting certs errors
 61 | ------
 62 | 
 63 | The simplest way is to turn off cert verification by ``verify=False``:
 64 | 
 65 | .. code-block:: python
 66 | 
 67 |     r = curl_cffi.get("https://example.com", verify=False)
 68 | 
 69 | 
 70 | ErrCode: 77, Reason: error setting certificate verify locations
 71 | ------
 72 | 
 73 | On Windows, if your Python environment or CA bundle path contains non-ASCII characters
 74 | (e.g. accents), libcurl may fail to open the CA file when passed as a narrow ``char*``.
 75 | ``curl_cffi`` now encodes file-path options (e.g. ``CAINFO``, ``PROXY_CAINFO``,
 76 | ``SSLCERT``) using the system's preferred ANSI code page on Windows to ensure correct
 77 | file access. This fixes most occurrences of error 77.
 78 | 
 79 | How to use with fiddler/charles to intercept content
 80 | ------
 81 | 
 82 | Fiddler and Charles uses man-in-the-middle self-signed certs to intercept TLS traffic,
 83 | to use with them, simply set ``verify=False``.
 84 | 
 85 | 
 86 | ErrCode: 92, Reason: 'HTTP/2 stream 0 was not closed cleanly: PROTOCOL_ERROR (err 1)'
 87 | ------
 88 | 
 89 | This error(http/2 stream 0) has been reported many times ever since `curl_cffi` was
 90 | published, but I still can not find a reproducible way to trigger it. Given that the
 91 | majority users are behind proxies, the situation is even more difficult to deal with.
 92 | 
 93 | I'm even not sure it's a bug introduced in libcurl, curl-impersonate or curl_cffi, or
 94 | it's just a server error. Depending on your context, here are some general suggestions
 95 | for you:
 96 | 
 97 | - First, try removing the ``Content-Length`` header from you request.
 98 | - Try to see if this error was caused by proxies, if so, use better proxies.
 99 | - If it stops working after a while, maybe you're just being blocked by, such as, Akamai.
100 | - Force http/1.1 mode. Some websites' h2 implementation is simply broken.
101 | - See if the url works in your real browser.
102 | - Find a stable way to reproduce it, so we can finally fix, or at least bypass it.
103 | 
104 | To force curl to use http 1.1 only.
105 | 
106 | .. code-block:: python
107 | 
108 |     import curl_cffi
109 | 
110 |     r = curl_cffi.get("https://postman-echo.com", http_version=curl_cffi.CurlHttpVersion.V1_1)
111 | 
112 | Related issues:
113 | 
114 | - `#19 <https://github.com/lexiforest/curl_cffi/issues/19>`_, 
115 | - `#42 <https://github.com/lexiforest/curl_cffi/issues/42>`_, 
116 | - `#79 <https://github.com/lexiforest/curl_cffi/issues/79>`_, 
117 | - `#165 <https://github.com/lexiforest/curl_cffi/issues/165>`_, 
118 | 
119 | 
120 | Packaging with PyInstaller
121 | ------
122 | 
123 | If you encountered any issue with PyInstaller, here are a list of options provided by the
124 | community:
125 | 
126 | Add the ``--hidden-import`` option.
127 | 
128 | .. code-block::
129 | 
130 |    pyinstaller -F .\example.py --hidden-import=_cffi_backend --collect-all curl_cffi
131 | 
132 | Add other paths:
133 | 
134 | .. code-block::
135 | 
136 |    pyinstaller --noconfirm --onefile --console \
137 |        --paths "C:/Users/Administrator/AppData/Local/Programs/Python/Python39" \
138 |        --add-data "C:/Users/Administrator/AppData/Local/Programs/Python/Python39/Lib/site-packages/curl_cffi.libs/libcurl-cbb416caa1dd01638554eab3f38d682d.dll;." \
139 |        --collect-data "curl_cffi" \
140 |        "C:/Users/Administrator/Desktop/test_script.py"
141 | 
142 | 
143 | See also: 
144 | 
145 | - `#5 <https://github.com/lexiforest/curl_cffi/issues/5>`_
146 | - `#48 <https://github.com/lexiforest/curl_cffi/issues/48>`_
147 | 
148 | 
149 | How to change the order of headers?
150 | ------
151 | 
152 | By default, setting ``impersonate`` parameter will bring the corresponding headers. If
153 | you want to change the order or use your own headers, you need to turn off that and bring
154 | your own headers.
155 | 
156 | .. code-block::
157 | 
158 |    requests.get(url, impersonate="chrome", default_headers=False, headers=...)
159 | 
160 | 
161 | How to deal with encoding/decoding errors?
162 | ------------------------------------------
163 | 
164 | Use ``chardet`` or ``cchardet``
165 | 
166 | .. code-block::
167 | 
168 |     >>> import curl_cffi
169 |     >>> r = curl_cffi.get("https://example.com/messy_codec.html")
170 |     >>> import chardet
171 |     >>> chardet.detect(r.content)
172 |     {'encoding': 'GB2312', 'confidence': 0.99, 'language': 'Chinese'}
173 | 
174 | Or use regex or lxml to parse the meta header:
175 | 
176 | .. code-block::
177 | 
178 |     <meta http-equiv="Content-Type" content="text/html; charset=gbk" />
179 | 


--------------------------------------------------------------------------------
/curl_cffi/requests/exceptions.py:
--------------------------------------------------------------------------------
  1 | # Apache 2.0 License
  2 | # Vendored from https://github.com/psf/requests/blob/main/src/requests/exceptions.py
  3 | # With our own addtions
  4 | 
  5 | import json
  6 | from typing import Literal, Union
  7 | 
  8 | from ..const import CurlECode
  9 | from ..curl import CurlError
 10 | 
 11 | 
 12 | # Note IOError is an alias of OSError in Python 3.x
 13 | class RequestException(CurlError, OSError):
 14 |     """Base exception for curl_cffi.requests package"""
 15 | 
 16 |     def __init__(
 17 |         self,
 18 |         msg,
 19 |         code: Union[CurlECode, Literal[0]] = 0,
 20 |         response=None,
 21 |         *args,
 22 |         **kwargs,
 23 |     ):
 24 |         super().__init__(msg, code, *args, **kwargs)
 25 |         self.response = response
 26 | 
 27 | 
 28 | class CookieConflict(RequestException):
 29 |     """Same cookie exists for different domains."""
 30 | 
 31 | 
 32 | class SessionClosed(RequestException):
 33 |     """The session has already been closed."""
 34 | 
 35 | 
 36 | class ImpersonateError(RequestException):
 37 |     """The impersonate config was wrong or impersonate failed."""
 38 | 
 39 | 
 40 | # not used
 41 | class InvalidJSONError(RequestException):
 42 |     """A JSON error occurred. not used"""
 43 | 
 44 | 
 45 | # not used
 46 | class JSONDecodeError(InvalidJSONError, json.JSONDecodeError):
 47 |     """Couldn't decode the text into json. not used"""
 48 | 
 49 | 
 50 | class HTTPError(RequestException):
 51 |     """An HTTP error occurred."""
 52 | 
 53 | 
 54 | class IncompleteRead(HTTPError):
 55 |     """Incomplete read of content"""
 56 | 
 57 | 
 58 | class ConnectionError(RequestException):
 59 |     """A Connection error occurred."""
 60 | 
 61 | 
 62 | class DNSError(ConnectionError):
 63 |     """Could not resolve"""
 64 | 
 65 | 
 66 | class ProxyError(RequestException):
 67 |     """A proxy error occurred."""
 68 | 
 69 | 
 70 | class SSLError(ConnectionError):
 71 |     """An SSL error occurred."""
 72 | 
 73 | 
 74 | class CertificateVerifyError(SSLError):
 75 |     """Raised when certificate validated has failed"""
 76 | 
 77 | 
 78 | class Timeout(RequestException):
 79 |     """The request timed out."""
 80 | 
 81 | 
 82 | # not used
 83 | class ConnectTimeout(ConnectionError, Timeout):
 84 |     """The request timed out while trying to connect to the remote server.
 85 | 
 86 |     Requests that produced this error are safe to retry.
 87 | 
 88 |     not used
 89 |     """
 90 | 
 91 | 
 92 | # not used
 93 | class ReadTimeout(Timeout):
 94 |     """The server did not send any data in the allotted amount of time. not used"""
 95 | 
 96 | 
 97 | # not used
 98 | class URLRequired(RequestException):
 99 |     """A valid URL is required to make a request. not used"""
100 | 
101 | 
102 | class TooManyRedirects(RequestException):
103 |     """Too many redirects."""
104 | 
105 | 
106 | # not used
107 | class MissingSchema(RequestException, ValueError):
108 |     """The URL scheme (e.g. http or https) is missing. not used"""
109 | 
110 | 
111 | class InvalidSchema(RequestException, ValueError):
112 |     """The URL scheme provided is either invalid or unsupported. not used"""
113 | 
114 | 
115 | class InvalidURL(RequestException, ValueError):
116 |     """The URL provided was somehow invalid."""
117 | 
118 | 
119 | # not used
120 | class InvalidHeader(RequestException, ValueError):
121 |     """The header value provided was somehow invalid. not used"""
122 | 
123 | 
124 | # not used
125 | class InvalidProxyURL(InvalidURL):
126 |     """The proxy URL provided is invalid. not used"""
127 | 
128 | 
129 | # not used
130 | class ChunkedEncodingError(RequestException):
131 |     """The server declared chunked encoding but sent an invalid chunk. not used"""
132 | 
133 | 
134 | # not used
135 | class ContentDecodingError(RequestException):
136 |     """Failed to decode response content. not used"""
137 | 
138 | 
139 | # not used
140 | class StreamConsumedError(RequestException, TypeError):
141 |     """The content for this response was already consumed. not used"""
142 | 
143 | 
144 | # does not support
145 | class RetryError(RequestException):
146 |     """Custom retries logic failed. not used"""
147 | 
148 | 
149 | # not used
150 | class UnrewindableBodyError(RequestException):
151 |     """Requests encountered an error when trying to rewind a body. not used"""
152 | 
153 | 
154 | class InterfaceError(RequestException):
155 |     """A specified outgoing interface could not be used."""
156 | 
157 | 
158 | # Warnings
159 | 
160 | 
161 | # TODO: use this warning as a base
162 | class RequestsWarning(Warning):
163 |     """Base warning for Requests. not used"""
164 | 
165 | 
166 | # not used
167 | class FileModeWarning(RequestsWarning, DeprecationWarning):
168 |     """A file was opened in text mode, but Requests determined its binary length.
169 |     not used"""
170 | 
171 | 
172 | # not used
173 | class RequestsDependencyWarning(RequestsWarning):
174 |     """An imported dependency doesn't match the expected version range."""
175 | 
176 | 
177 | CODE2ERROR = {
178 |     0: RequestException,
179 |     CurlECode.UNSUPPORTED_PROTOCOL: InvalidSchema,
180 |     CurlECode.URL_MALFORMAT: InvalidURL,
181 |     CurlECode.COULDNT_RESOLVE_PROXY: ProxyError,
182 |     CurlECode.COULDNT_RESOLVE_HOST: DNSError,
183 |     CurlECode.COULDNT_CONNECT: ConnectionError,
184 |     CurlECode.WEIRD_SERVER_REPLY: ConnectionError,
185 |     CurlECode.REMOTE_ACCESS_DENIED: ConnectionError,
186 |     CurlECode.HTTP2: HTTPError,
187 |     CurlECode.HTTP_RETURNED_ERROR: HTTPError,
188 |     CurlECode.WRITE_ERROR: RequestException,
189 |     CurlECode.READ_ERROR: RequestException,
190 |     CurlECode.OUT_OF_MEMORY: RequestException,
191 |     CurlECode.OPERATION_TIMEDOUT: Timeout,
192 |     CurlECode.SSL_CONNECT_ERROR: SSLError,
193 |     CurlECode.INTERFACE_FAILED: InterfaceError,
194 |     CurlECode.TOO_MANY_REDIRECTS: TooManyRedirects,
195 |     CurlECode.UNKNOWN_OPTION: RequestException,
196 |     CurlECode.SETOPT_OPTION_SYNTAX: RequestException,
197 |     CurlECode.GOT_NOTHING: ConnectionError,
198 |     CurlECode.SSL_ENGINE_NOTFOUND: SSLError,
199 |     CurlECode.SSL_ENGINE_SETFAILED: SSLError,
200 |     CurlECode.SEND_ERROR: ConnectionError,
201 |     CurlECode.RECV_ERROR: ConnectionError,
202 |     CurlECode.SSL_CERTPROBLEM: SSLError,
203 |     CurlECode.SSL_CIPHER: SSLError,
204 |     CurlECode.PEER_FAILED_VERIFICATION: CertificateVerifyError,
205 |     CurlECode.BAD_CONTENT_ENCODING: HTTPError,
206 |     CurlECode.SSL_ENGINE_INITFAILED: SSLError,
207 |     CurlECode.SSL_CACERT_BADFILE: SSLError,
208 |     CurlECode.SSL_CRL_BADFILE: SSLError,
209 |     CurlECode.SSL_ISSUER_ERROR: SSLError,
210 |     CurlECode.SSL_PINNEDPUBKEYNOTMATCH: SSLError,
211 |     CurlECode.SSL_INVALIDCERTSTATUS: SSLError,
212 |     CurlECode.HTTP2_STREAM: HTTPError,
213 |     CurlECode.HTTP3: HTTPError,
214 |     CurlECode.QUIC_CONNECT_ERROR: ConnectionError,
215 |     CurlECode.PROXY: ProxyError,
216 |     CurlECode.SSL_CLIENTCERT: SSLError,
217 |     CurlECode.ECH_REQUIRED: SSLError,
218 |     CurlECode.PARTIAL_FILE: IncompleteRead,
219 | }
220 | 
221 | 
222 | # credits: https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/networking/_curlcffi.py#L241
223 | # Unlicense
224 | def code2error(code: Union[CurlECode, Literal[0]], msg: str):
225 |     if code == CurlECode.RECV_ERROR and "CONNECT" in msg:
226 |         return ProxyError
227 |     return CODE2ERROR.get(code, RequestException)
228 | 


--------------------------------------------------------------------------------
/docs/impersonate/customize.rst:
--------------------------------------------------------------------------------
  1 | How to use my own fingerprints? e.g. okhttp
  2 | ------
  3 | 
  4 | Use ``ja3=...``, ``akamai=...`` and ``extra_fp=...``.
  5 | 
  6 | You can retrieve the JA3 and Akamai strings using tools like WireShark or from TLS fingerprinting sites.
  7 | 
  8 | .. code-block:: python
  9 | 
 10 |    # OKHTTP impersonatation examples
 11 |    # credits: https://github.com/bogdanfinn/tls-client/blob/master/profiles/contributed_custom_profiles.go
 12 | 
 13 |    url = "https://tls.browserleaks.com/json"
 14 | 
 15 |    okhttp4_android10_ja3 = ",".join(
 16 |        [
 17 |            "771",
 18 |            "4865-4866-4867-49195-49196-52393-49199-49200-52392-49171-49172-156-157-47-53",
 19 |            "0-23-65281-10-11-35-16-5-13-51-45-43-21",
 20 |            "29-23-24",
 21 |            "0",
 22 |        ]
 23 |    )
 24 | 
 25 |    okhttp4_android10_akamai = "4:16777216|16711681|0|m,p,a,s"
 26 | 
 27 |    extra_fp = {
 28 |        "tls_signature_algorithms": [
 29 |            "ecdsa_secp256r1_sha256",
 30 |            "rsa_pss_rsae_sha256",
 31 |            "rsa_pkcs1_sha256",
 32 |            "ecdsa_secp384r1_sha384",
 33 |            "rsa_pss_rsae_sha384",
 34 |            "rsa_pkcs1_sha384",
 35 |            "rsa_pss_rsae_sha512",
 36 |            "rsa_pkcs1_sha512",
 37 |            "rsa_pkcs1_sha1",
 38 |        ]
 39 |        # other options:
 40 |        # tls_min_version: int = CurlSslVersion.TLSv1_2
 41 |        # tls_grease: bool = False
 42 |        # tls_permute_extensions: bool = False
 43 |        # tls_cert_compression: Literal["zlib", "brotli"] = "brotli"
 44 |        # tls_signature_algorithms: Optional[List[str]] = None
 45 |        # http2_stream_weight: int = 256
 46 |        # http2_stream_exclusive: int = 1
 47 | 
 48 |        # See requests/impersonate.py and tests/unittest/test_impersonate.py for more examples
 49 |    }
 50 | 
 51 | 
 52 |    r = curl_cffi.get(
 53 |        url, ja3=okhttp4_android10_ja3, akamai=okhttp4_android10_akamai, extra_fp=extra_fp
 54 |    )
 55 |    print(r.json())
 56 | 
 57 | 
 58 | JA3 and Akamai String Format
 59 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 60 | 
 61 | A JA3 string is a simple, comma-separated representation of the key fields in a TLS ClientHello. It consists of five parts:
 62 | 
 63 | - SSL/TLS Version, The numeric version the client requests (e.g. 771 for TLS 1.2).
 64 | - Cipher Suites, A hyphen-separated list of all cipher suite IDs offered by the client (e.g. 4865-4866-4867-49195-49196).
 65 | - Extension IDs, A hyphen-separated list of all TLS extension numbers the client includes (e.g. 0-11-10-35-16-5).
 66 | - Supported Groups (aka “Elliptic Curves”), A hyphen-separated list of curve IDs the client supports for ECDHE (e.g. 29-23-24).
 67 | - EC Point Formats, A hyphen-separated list of the point‐format IDs (almost always just 0 for “uncompressed”) (e.g. 0).
 68 | 
 69 | They’re concatenated in that exact order, with commas between fields. For example:
 70 | 
 71 | .. code-block::
 72 | 
 73 |     771,4865-4866-4867-49195-49196,0-11-10-35-16-5,29-23-24,0
 74 | 
 75 | Note that Chrome permutes the extension order on each request, so there is a new format called JA3N, which uses sorted extension_id list.
 76 | 
 77 | The Akamai HTTP/2 fingerprint string encodes four client‐controlled protocol parameters, joined by the pipe character (|):
 78 | 
 79 | - SETTINGS, A semicolon‐separated list of ID:value pairs from the client’s initial SETTINGS frame. Each ID is a standard HTTP/2 setting identifier (e.g. 1 for HEADER_TABLE_SIZE, 4 for INITIAL_WINDOW_SIZE), and value is the client’s chosen value for that setting 
 80 | - WINDOW_UPDATE, A single integer: the value the client sends in its first WINDOW_UPDATE frame (or 0 if none was sent) 
 81 | - PRIORITY, Zero or more priority‐frame tuples, each formatted as ``StreamID:ExclusiveBit:DependentStreamID:Weight``. Multiple tuples are comma-separated. This captures any PRIORITY frames the client issues before sending headers 
 82 | - Pseudo-Header Order, The sequence in which the client sends HTTP/2 pseudo-headers in its request HEADERS frame, encoded as comma-separated single-letter codes:
 83 | 
 84 | 
 85 | .. code-block::
 86 |     m = :method
 87 |     s = :scheme
 88 |     p = :path
 89 |     a = :authority
 90 | 
 91 | Putting it all together, an example fingerprint might look like:
 92 | 
 93 | .. code-block::
 94 | 
 95 |     1:65536;4:131072;5:16384|12517377|3:0:0:201|m,p,a,s
 96 | 
 97 |     where:
 98 | 
 99 |     SETTINGS = 1:65536;4:131072;5:16384
100 |     WINDOW_UPDATE = 12517377
101 |     PRIORITY = 3:0:0:201
102 |     Pseudo-Header Order = m,p,a,s 
103 | 
104 | Although JA3 and Akamai fingerprint string already captures many of the aspects of a Hello Packet, there are still some fields are not covered and can be used to detect you.
105 | This is when the ``extra_fp`` option comes in, each field of this dict is pretty easy to understand. You should first set the ja3 and akamai string, then check if you have the
106 | identical fingerprint like your target. If not, use the ``extra_fp`` to further refine your impersonation.
107 | 
108 | 
109 | Using CURLOPTs
110 | ~~~~~~~~~~~~~~
111 | 
112 | The other way is to use the ``curlopt`` s to specify exactly which options you want to change.
113 | 
114 | To modify them, use ``curl.setopt(CurlOpt, value)``, for example:
115 | 
116 | .. code-block:: python
117 | 
118 |    import curl_cffi
119 |    from curl_cffi import Curl, CurlOpt
120 | 
121 |    c = Curl()
122 |    c.setopt(CurlOpt.HTTP2_PSEUDO_HEADERS_ORDER, "masp")
123 | 
124 |    # or
125 |    curl_cffi.get(url, curl_options={CurlOpt.HTTP2_PSEUDO_HEADERS_ORDER, "masp"})
126 | 
127 | Here are a list of options:
128 | 
129 | For TLS/JA3 fingerprints:
130 | 
131 | * https://curl.se/libcurl/c/CURLOPT_SSL_CIPHER_LIST.html
132 | 
133 | and non-standard TLS options created for this project:
134 | 
135 | * ``CURLOPT_SSL_ENABLE_ALPS``
136 | * ``CURLOPT_SSL_SIG_HASH_ALGS``
137 | * ``CURLOPT_SSL_CERT_COMPRESSION``
138 | * ``CURLOPT_SSL_ENABLE_TICKET``
139 | * ``CURLOPT_SSL_PERMUTE_EXTENSIONS``
140 | 
141 | For Akamai http2 fingerprints, you can fully customize the 3 parts:
142 | 
143 | * ``CURLOPT_HTTP2_PSEUDO_HEADERS_ORDER``, sets http2 pseudo header order, for example: ``masp`` (non-standard HTTP/2 options created for this project).
144 | * ``CURLOPT_HTTP2_SETTINGS`` sets the settings frame values, for example ``1:65536;3:1000;4:6291456;6:262144`` (non-standard HTTP/2 options created for this project).
145 | * ``CURLOPT_HTTP2_WINDOW_UPDATE`` sets initial window update value for http2, for example ``15663105`` (non-standard HTTP/2 options created for this project).
146 | 
147 | For a complete list of options and explanation, see the `curl-impersoante README`_.
148 | 
149 | .. _curl-impersonate README: https://github.com/lexiforest/curl-impersonate?tab=readme-ov-file#libcurl-impersonate
150 | 
151 | 
152 | How to toggle firefox-specific extensions?
153 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
154 | 
155 | There are a few special extensions from firefox that you should add extra options by ``extra_fp``:
156 | 
157 | Extension 34: delegated credentials
158 | 
159 | .. code-block:: python
160 | 
161 |    extra_fp = {
162 |        "tls_delegated_credential": "ecdsa_secp256r1_sha256:ecdsa_secp384r1_sha384:ecdsa_secp521r1_sha512:ecdsa_sha1"
163 |    }
164 | 
165 |    # Note that the ja3 string also includes extensiion: 34
166 |    ja3 = "771,4865-4867-4866-49195-49199-52393-52392-49196-49200-49162-49161-49171-49172-156-157-47-53,0-23-65281-10-11-35-16-5-34-18-51-43-13-45-28-27-65037,4588-29-23-24-25-256-257,0"
167 | 
168 |    r = curl_cffi.get(url, ja3=ja3, extra_fp=extra_fp)
169 | 
170 | Extension 28: record size limit
171 | 
172 | .. code-block:: python
173 | 
174 |    extra_fp = {
175 |        "tls_record_size_limit": 4001
176 |    }
177 | 
178 |    # Note that the ja3 string also includes extensiion: 28
179 |    ja3 = "771,4865-4867-4866-49195-49199-52393-52392-49196-49200-49162-49161-49171-49172-156-157-47-53,0-23-65281-10-11-35-16-5-34-18-51-43-13-45-28-27-65037,4588-29-23-24-25-256-257,0"
180 | 
181 |    r = curl_cffi.get(url, ja3=ja3, extra_fp=extra_fp)
182 | 
183 | 
184 | 


--------------------------------------------------------------------------------
/tests/unittest/test_impersonate.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from curl_cffi import requests
  4 | from curl_cffi.const import CurlHttpVersion, CurlSslVersion
  5 | 
  6 | 
  7 | def test_impersonate_with_version(server):
  8 |     # the test server does not understand http/2
  9 |     r = requests.get(
 10 |         str(server.url), impersonate="chrome120", http_version=CurlHttpVersion.V1_1
 11 |     )
 12 |     assert r.status_code == 200
 13 |     r = requests.get(
 14 |         str(server.url), impersonate="safari17_0", http_version=CurlHttpVersion.V1_1
 15 |     )
 16 |     assert r.status_code == 200
 17 | 
 18 | 
 19 | def test_impersonate_without_version(server):
 20 |     r = requests.get(
 21 |         str(server.url), impersonate="chrome", http_version=CurlHttpVersion.V1_1
 22 |     )
 23 |     assert r.status_code == 200
 24 |     r = requests.get(
 25 |         str(server.url), impersonate="safari_ios", http_version=CurlHttpVersion.V1_1
 26 |     )
 27 |     assert r.status_code == 200
 28 | 
 29 | 
 30 | def test_impersonate_non_exist(server):
 31 |     with pytest.raises(requests.RequestsError, match="Impersonating"):
 32 |         requests.get(str(server.url), impersonate="edge2131")
 33 |     with pytest.raises(requests.RequestsError, match="Impersonating"):
 34 |         requests.get(str(server.url), impersonate="chrome2952")
 35 | 
 36 | 
 37 | # TODO: implement local ja3/akamai verification server with th1.
 38 | 
 39 | 
 40 | @pytest.mark.skip(reason="warning is used")
 41 | def test_costomized_no_impersonate_coexist(server):
 42 |     with pytest.raises(requests.RequestsError):
 43 |         requests.get(str(server.url), impersonate="chrome", ja3=",,,,")
 44 |     with pytest.raises(requests.RequestsError):
 45 |         requests.get(str(server.url), impersonate="chrome", akamai="|||")
 46 | 
 47 | 
 48 | @pytest.mark.skip(reason="website is down")
 49 | def test_customized_ja3_chrome126():
 50 |     url = "https://tls.browserleaks.com/json"
 51 |     ja3 = (
 52 |         "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,"
 53 |         "0-65037-27-51-13-43-5-18-17513-65281-23-10-45-35-11-16,25497-29-23-24,0"
 54 |     )
 55 |     r = requests.get(url, ja3=ja3).json()
 56 |     assert r["ja3_text"] == ja3
 57 | 
 58 | 
 59 | @pytest.mark.skip(reason="not working")
 60 | def test_customized_ja3_tls_version():
 61 |     url = "https://tls.browserleaks.com/json"
 62 |     ja3 = (
 63 |         "770,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,"
 64 |         "0-65037-27-51-13-43-5-18-17513-65281-23-10-45-35-11-16,25497-29-23-24,0"
 65 |     )
 66 |     r = requests.get(url, ja3=ja3).json()
 67 |     tls_version, _, _, _, _ = r["ja3_text"].split(",")
 68 |     assert tls_version == "770"
 69 | 
 70 | 
 71 | @pytest.mark.skip(reason="website is down")
 72 | def test_customized_ja3_ciphers():
 73 |     url = "https://tls.browserleaks.com/json"
 74 |     ja3 = (
 75 |         "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171,"
 76 |         "0-65037-27-51-13-43-5-18-17513-65281-23-10-45-35-11-16,25497-29-23-24,0"
 77 |     )
 78 |     r = requests.get(url, ja3=ja3).json()
 79 |     _, ciphers, _, _, _ = r["ja3_text"].split(",")
 80 |     assert ciphers == "4865-4866-4867-49195-49199-49196-49200-52393-52392-49171"
 81 | 
 82 | 
 83 | # TODO: change to parameterized test
 84 | @pytest.mark.skip(reason="website is down")
 85 | def test_customized_ja3_extensions():
 86 |     url = "https://tls.browserleaks.com/json"
 87 |     ja3 = (
 88 |         "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,"
 89 |         "65037-65281-0-11-23-5-18-27-16-17513-10-35-43-45-13-51,25497-29-23-24,0"
 90 |     )
 91 |     r = requests.get(url, ja3=ja3).json()
 92 |     _, _, extensions, _, _ = r["ja3_text"].split(",")
 93 |     assert extensions == "65037-65281-0-11-23-5-18-27-16-17513-10-35-43-45-13-51"
 94 | 
 95 |     ja3 = (
 96 |         "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,"
 97 |         "65281-0-11-23-5-18-27-16-17513-10-35-43-45-13-51,25497-29-23-24,0"
 98 |     )
 99 |     r = requests.get(url, ja3=ja3).json()
100 |     _, _, extensions, _, _ = r["ja3_text"].split(",")
101 |     assert extensions == "65281-0-11-23-5-18-27-16-17513-10-35-43-45-13-51"
102 | 
103 |     ja3 = (
104 |         "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,"
105 |         "65281-0-11-23-27-16-17513-10-35-43-45-13-51,25497-29-23-24,0"
106 |     )
107 |     r = requests.get(url, ja3=ja3).json()
108 |     _, _, extensions, _, _ = r["ja3_text"].split(",")
109 |     assert extensions == "65281-0-11-23-27-16-17513-10-35-43-45-13-51"
110 | 
111 |     # removed enable session_ticket()
112 |     ja3 = (
113 |         "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,"
114 |         "65281-0-11-23-5-18-27-16-17513-10-43-45-13-51,25497-29-23-24,0"
115 |     )
116 |     r = requests.get(url, ja3=ja3).json()
117 |     _, _, extensions, _, _ = r["ja3_text"].split(",")
118 |     assert extensions == "65281-0-11-23-5-18-27-16-17513-10-43-45-13-51"
119 | 
120 |     # new alps code point
121 |     ja3 = (
122 |         "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,"
123 |         "0-5-10-11-13-16-18-23-27-35-43-45-51-17613-65037-65281,4588-29-23-24,0"
124 |     )
125 |     r = requests.get(url, ja3=ja3).json()
126 |     _, _, extensions, _, _ = r["ja3_text"].split(",")
127 |     assert extensions == "0-5-10-11-13-16-18-23-27-35-43-45-51-17613-65037-65281"
128 | 
129 | 
130 | @pytest.mark.skip(reason="website is down")
131 | def test_customized_ja3_curves():
132 |     url = "https://tls.browserleaks.com/json"
133 |     ja3 = (
134 |         "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,"
135 |         "0-65037-27-51-13-43-5-18-17513-65281-23-10-45-35-11-16,25497-24-23-29,0"
136 |     )
137 |     r = requests.get(url, ja3=ja3).json()
138 |     _, _, _, curves, _ = r["ja3_text"].split(",")
139 |     assert curves == "25497-24-23-29"
140 | 
141 | 
142 | @pytest.mark.skip(reason="website is down")
143 | def test_customized_akamai_chrome126():
144 |     url = "https://tls.browserleaks.com/json"
145 |     akamai = "1:65536;2:0;4:6291456;6:262144|15663105|0|m,a,s,p"
146 |     r = requests.get(url, akamai=akamai).json()
147 |     assert r["akamai_text"] == akamai
148 | 
149 | 
150 | @pytest.mark.skip(reason="website is down")
151 | def test_customized_akamai_safari():
152 |     url = "https://tls.browserleaks.com/json"
153 |     akamai = "2:0;4:4194304;3:100|10485760|0|m,s,p,a"
154 |     r = requests.get(url, akamai=akamai).json()
155 |     assert r["akamai_text"] == akamai
156 | 
157 |     # test_tls_peet_ws_settings
158 |     r = requests.get(url, akamai=akamai.replace(";", ",")).json()
159 |     assert r["akamai_text"] == akamai
160 | 
161 | 
162 | @pytest.mark.skip(reason="Unstable API")
163 | def test_customized_extra_fp_sig_hash_algs():
164 |     url = "https://tls.peet.ws/api/all"
165 |     safari_algs = [
166 |         "ecdsa_secp256r1_sha256",
167 |         "rsa_pss_rsae_sha256",
168 |         "rsa_pkcs1_sha256",
169 |         "ecdsa_secp384r1_sha384",
170 |         "ecdsa_sha1",
171 |         "rsa_pss_rsae_sha384",
172 |         "rsa_pss_rsae_sha384",
173 |         "rsa_pkcs1_sha384",
174 |         "rsa_pss_rsae_sha512",
175 |         "rsa_pkcs1_sha512",
176 |         "rsa_pkcs1_sha1",
177 |     ]
178 |     fp = requests.ExtraFingerprints(tls_signature_algorithms=safari_algs)
179 |     r = requests.get(url, extra_fp=fp).json()
180 |     result_algs = []
181 |     for ex in r["tls"]["extensions"]:
182 |         if ex["name"] == "signature_algorithms (13)":
183 |             result_algs = ex["signature_algorithms"]
184 |     assert safari_algs == result_algs
185 | 
186 | 
187 | @pytest.mark.skip(reason="Unstable API")
188 | def test_customized_extra_fp_tls_min_version():
189 |     url = "https://tls.peet.ws/api/all"
190 |     safari_min_version = CurlSslVersion.TLSv1_0
191 |     fp = requests.ExtraFingerprints(tls_min_version=safari_min_version)
192 |     r = requests.get(url, extra_fp=fp).json()
193 |     for ex in r["tls"]["extensions"]:
194 |         if ex["name"] == "supported_versions (43)":
195 |             # TLS 1.0 1.1, 1.2, 1.3
196 |             assert len(ex["versions"]) >= 4
197 | 
198 | 
199 | @pytest.mark.skip(reason="Unstable API")
200 | def test_customized_extra_fp_grease():
201 |     url = "https://tls.peet.ws/api/all"
202 |     fp = requests.ExtraFingerprints(tls_grease=True)
203 |     r = requests.get(url, extra_fp=fp).json()
204 |     assert "TLS_GREASE" in r["tls"]["ciphers"][0]
205 | 
206 | 
207 | @pytest.mark.skip(reason="website is down")
208 | def test_customized_extra_fp_permute():
209 |     url = "https://tls.browserleaks.com/json"
210 |     ja3 = (
211 |         "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,"
212 |         "65037-65281-0-11-23-5-18-27-16-17513-10-35-43-45-13-51,25497-29-23-24,0"
213 |     )
214 | 
215 |     r = requests.get(url, ja3=ja3).json()
216 |     _, _, extensions, _, _ = r["ja3_text"].split(",")
217 |     assert extensions == "65037-65281-0-11-23-5-18-27-16-17513-10-35-43-45-13-51"
218 | 
219 |     r = requests.get(
220 |         url, ja3=ja3, extra_fp=requests.ExtraFingerprints(tls_permute_extensions=True)
221 |     ).json()
222 |     _, _, extensions, _, _ = r["ja3_text"].split(",")
223 |     assert extensions != "65037-65281-0-11-23-5-18-27-16-17513-10-35-43-45-13-51"
224 | 
225 | 
226 | @pytest.mark.skip(reason="Unstable API")
227 | def test_customized_extra_fp_cert_compression():
228 |     url = "https://tls.peet.ws/api/all"
229 |     fp = requests.ExtraFingerprints(tls_cert_compression="zlib")
230 |     r = requests.get(url, extra_fp=fp).json()
231 |     result_algs = []
232 |     for ex in r["tls"]["extensions"]:
233 |         if ex["name"] == "compress_certificate (27)":
234 |             result_algs = ex["algorithms"]
235 |     assert result_algs[0] == "zlib (1)"
236 | 
237 | 
238 | @pytest.mark.skip(reason="Unstable API")
239 | def test_customized_extra_fp_stream_weight():
240 |     url = "https://tls.peet.ws/api/all"
241 |     fp = requests.ExtraFingerprints(http2_stream_weight=64)
242 |     r = requests.get(url, extra_fp=fp).json()
243 |     assert r["http2"]["sent_frames"][2]["priority"]["weight"] == 64
244 | 
245 | 
246 | @pytest.mark.skip(reason="Unstable API")
247 | def test_customized_extra_fp_stream_exclusive():
248 |     url = "https://tls.peet.ws/api/all"
249 |     fp = requests.ExtraFingerprints(http2_stream_exclusive=0)
250 |     r = requests.get(url, extra_fp=fp).json()
251 |     assert r["http2"]["sent_frames"][2]["priority"]["exclusive"] == 0
252 | 


--------------------------------------------------------------------------------
/tests/unittest/test_curl.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import json
  3 | from io import BytesIO
  4 | from typing import cast
  5 | 
  6 | import pytest
  7 | 
  8 | import curl_cffi
  9 | from curl_cffi import Curl, CurlError, CurlInfo, CurlOpt
 10 | 
 11 | #######################################################################################
 12 | # testing setopt
 13 | #######################################################################################
 14 | 
 15 | 
 16 | def test_get(server):
 17 |     c = Curl()
 18 |     c.setopt(CurlOpt.URL, str(server.url).encode())
 19 |     c.perform()
 20 | 
 21 | 
 22 | def test_post(server):
 23 |     c = Curl()
 24 |     url = str(server.url.copy_with(path="/echo_body"))
 25 |     c.setopt(CurlOpt.URL, url.encode())
 26 |     c.setopt(CurlOpt.POST, 1)
 27 |     c.setopt(CurlOpt.POSTFIELDS, b"foo=bar")
 28 |     buffer = BytesIO()
 29 |     c.setopt(CurlOpt.WRITEDATA, buffer)
 30 |     c.perform()
 31 |     assert buffer.getvalue() == b"foo=bar"
 32 | 
 33 | 
 34 | def test_put(server):
 35 |     c = Curl()
 36 |     c.setopt(CurlOpt.URL, str(server.url).encode())
 37 |     c.setopt(CurlOpt.CUSTOMREQUEST, b"PUT")
 38 |     c.perform()
 39 | 
 40 | 
 41 | def test_delete(server):
 42 |     c = Curl()
 43 |     c.setopt(CurlOpt.URL, str(server.url).encode())
 44 |     c.setopt(CurlOpt.CUSTOMREQUEST, b"DELETE")
 45 |     c.perform()
 46 | 
 47 | 
 48 | def test_post_data_with_size(server):
 49 |     c = Curl()
 50 |     url = str(server.url.copy_with(path="/echo_body"))
 51 |     c.setopt(CurlOpt.URL, url.encode())
 52 |     c.setopt(CurlOpt.CUSTOMREQUEST, b"POST")
 53 |     c.setopt(CurlOpt.POSTFIELDS, b"\0" * 7)
 54 |     c.setopt(CurlOpt.POSTFIELDSIZE, 7)
 55 |     buffer = BytesIO()
 56 |     c.setopt(CurlOpt.WRITEDATA, buffer)
 57 |     c.perform()
 58 |     assert buffer.getvalue() == b"\0" * 7
 59 | 
 60 | 
 61 | def test_headers(server):
 62 |     c = Curl()
 63 |     url = str(server.url.copy_with(path="/echo_headers"))
 64 |     c.setopt(CurlOpt.URL, url.encode())
 65 |     c.setopt(CurlOpt.HTTPHEADER, [b"Foo: bar"])
 66 |     buffer = BytesIO()
 67 |     c.setopt(CurlOpt.WRITEDATA, buffer)
 68 |     c.perform()
 69 |     headers = json.loads(buffer.getvalue().decode())
 70 |     assert headers["Foo"][0] == "bar"
 71 | 
 72 |     # https://github.com/lexiforest/curl_cffi/issues/16
 73 |     c.setopt(CurlOpt.HTTPHEADER, [b"Foo: baz"])
 74 |     buffer = BytesIO()
 75 |     c.setopt(CurlOpt.WRITEDATA, buffer)
 76 |     c.perform()
 77 |     headers = json.loads(buffer.getvalue().decode())
 78 |     assert headers["Foo"][0] == "baz"
 79 | 
 80 | 
 81 | def test_proxy_headers(server):
 82 |     # XXX: only tests that proxy header is not present for target server, should add
 83 |     # tests that verifies proxy headers are sent to proxy server.
 84 |     c = Curl()
 85 |     url = str(server.url.copy_with(path="/echo_headers"))
 86 |     c.setopt(CurlOpt.URL, url.encode())
 87 |     c.setopt(CurlOpt.PROXYHEADER, [b"Foo: bar"])
 88 |     buffer = BytesIO()
 89 |     c.setopt(CurlOpt.WRITEDATA, buffer)
 90 |     c.perform()
 91 |     headers = json.loads(buffer.getvalue().decode())
 92 |     assert "Foo" not in headers
 93 | 
 94 |     # https://github.com/lexiforest/curl_cffi/issues/16
 95 |     c.setopt(CurlOpt.PROXYHEADER, [b"Foo: baz"])
 96 |     buffer = BytesIO()
 97 |     c.setopt(CurlOpt.WRITEDATA, buffer)
 98 |     c.perform()
 99 |     headers = json.loads(buffer.getvalue().decode())
100 |     assert "Foo" not in headers
101 | 
102 | 
103 | def test_write_function_memory_leak(server):
104 |     c = Curl()
105 |     for _ in range(10):
106 |         url = str(server.url.copy_with(path="/echo_headers"))
107 |         c.setopt(CurlOpt.URL, url.encode())
108 |         c.setopt(CurlOpt.HTTPHEADER, [b"Foo: bar"])
109 |         buffer = BytesIO()
110 |         c.setopt(CurlOpt.WRITEDATA, buffer)
111 |         c.perform()
112 |     assert c._write_handle is None
113 | 
114 | 
115 | def test_write_function(server):
116 |     c = Curl()
117 |     url = str(server.url.copy_with(path="/echo_body"))
118 |     c.setopt(CurlOpt.URL, url.encode())
119 |     c.setopt(CurlOpt.POST, 1)
120 |     c.setopt(CurlOpt.POSTFIELDS, b"foo=bar")
121 | 
122 |     buffer = BytesIO()
123 | 
124 |     def write(data: bytes):
125 |         buffer.write(data)
126 |         return len(data)
127 | 
128 |     c.setopt(CurlOpt.WRITEFUNCTION, write)
129 |     c.perform()
130 |     assert buffer.getvalue() == b"foo=bar"
131 | 
132 | 
133 | def test_cookies(server):
134 |     c = Curl()
135 |     url = str(server.url.copy_with(path="/echo_cookies"))
136 |     c.setopt(CurlOpt.URL, url.encode())
137 |     c.setopt(CurlOpt.COOKIE, b"foo=bar")
138 |     buffer = BytesIO()
139 |     c.setopt(CurlOpt.WRITEDATA, buffer)
140 |     c.perform()
141 |     cookies = json.loads(buffer.getvalue().decode())
142 |     # print(cookies)
143 |     assert cookies["foo"] == "bar"
144 | 
145 | 
146 | def test_auth(server):
147 |     c = Curl()
148 |     url = str(server.url.copy_with(path="/echo_headers"))
149 |     c.setopt(CurlOpt.URL, url.encode())
150 |     c.setopt(CurlOpt.USERNAME, b"foo")
151 |     c.setopt(CurlOpt.PASSWORD, b"bar")
152 |     buffer = BytesIO()
153 |     c.setopt(CurlOpt.WRITEDATA, buffer)
154 |     c.perform()
155 |     headers = json.loads(buffer.getvalue().decode())
156 |     assert (
157 |         headers["Authorization"][0] == f"Basic {base64.b64encode(b'foo:bar').decode()}"
158 |     )
159 | 
160 | 
161 | def test_timeout(server):
162 |     c = Curl()
163 |     url = str(server.url.copy_with(path="/slow_response"))
164 |     c.setopt(CurlOpt.URL, url.encode())
165 |     c.setopt(CurlOpt.TIMEOUT_MS, 100)
166 |     with pytest.raises(CurlError, match=r"curl: \(28\)"):
167 |         c.perform()
168 | 
169 | 
170 | def test_repeated_headers_after_error(server):
171 |     c = Curl()
172 |     url = str(server.url.copy_with(path="/slow_response"))
173 |     c.setopt(CurlOpt.URL, url.encode())
174 |     c.setopt(CurlOpt.TIMEOUT_MS, 100)
175 |     c.setopt(CurlOpt.HTTPHEADER, [b"Foo: bar"])
176 |     with pytest.raises(CurlError, match=r"curl: \(28\)"):
177 |         c.perform()
178 | 
179 |     # another request
180 |     url = str(server.url.copy_with(path="/echo_headers"))
181 |     c.setopt(CurlOpt.URL, url.encode())
182 |     c.setopt(CurlOpt.HTTPHEADER, [b"Foo: bar"])
183 |     buffer = BytesIO()
184 |     c.setopt(CurlOpt.WRITEDATA, buffer)
185 |     c.perform()
186 |     headers = json.loads(buffer.getvalue().decode())
187 |     assert len(headers["Foo"]) == 1
188 |     # print(headers)
189 | 
190 | 
191 | def test_follow_redirect(server):
192 |     c = Curl()
193 |     url = str(server.url.copy_with(path="/redirect_301"))
194 |     c.setopt(CurlOpt.URL, url.encode())
195 |     c.setopt(CurlOpt.FOLLOWLOCATION, 1)
196 |     c.perform()
197 |     assert c.getinfo(CurlInfo.RESPONSE_CODE) == 200
198 | 
199 | 
200 | def test_not_follow_redirect(server):
201 |     c = Curl()
202 |     url = str(server.url.copy_with(path="/redirect_301"))
203 |     c.setopt(CurlOpt.URL, url.encode())
204 |     c.perform()
205 |     assert c.getinfo(CurlInfo.RESPONSE_CODE) == 301
206 | 
207 | 
208 | def test_http_proxy_changed_path(server):
209 |     c = Curl()
210 |     proxy_url = str(server.url).rstrip("/")
211 |     print("proxy url", proxy_url)
212 |     c.setopt(CurlOpt.URL, b"http://example.org")
213 |     c.setopt(CurlOpt.PROXY, proxy_url.encode())
214 |     buffer = BytesIO()
215 |     c.setopt(CurlOpt.WRITEDATA, buffer)
216 |     c.perform()
217 |     rsp = json.loads(buffer.getvalue().decode())
218 |     assert rsp["Hello"] == "http_proxy!"
219 | 
220 | 
221 | def test_https_proxy_using_connect(server):
222 |     c = Curl()
223 |     proxy_url = str(server.url)
224 |     c.setopt(CurlOpt.URL, b"https://example.org")
225 |     c.setopt(CurlOpt.PROXY, proxy_url.encode())
226 |     c.setopt(CurlOpt.HTTPPROXYTUNNEL, 1)
227 |     buffer = BytesIO()
228 |     c.setopt(CurlOpt.WRITEDATA, buffer)
229 |     with pytest.raises(CurlError, match=r"curl: \(35\)"):
230 |         c.perform()
231 | 
232 | 
233 | def test_verify(https_server):
234 |     c = Curl()
235 |     url = str(https_server.url)
236 |     c.setopt(CurlOpt.URL, url.encode())
237 |     with pytest.raises(CurlError, match="SSL certificate problem"):
238 |         c.perform()
239 | 
240 | 
241 | def test_verify_false(https_server):
242 |     c = Curl()
243 |     url = str(https_server.url)
244 |     c.setopt(CurlOpt.URL, url.encode())
245 |     c.setopt(CurlOpt.SSL_VERIFYPEER, 0)
246 |     c.setopt(CurlOpt.SSL_VERIFYHOST, 0)
247 |     c.perform()
248 | 
249 | 
250 | def test_referer(server):
251 |     c = Curl()
252 |     url = str(server.url.copy_with(path="/echo_headers"))
253 |     c.setopt(CurlOpt.URL, url.encode())
254 |     c.setopt(CurlOpt.REFERER, b"http://example.org")
255 |     buffer = BytesIO()
256 |     c.setopt(CurlOpt.WRITEDATA, buffer)
257 |     c.perform()
258 |     headers = json.loads(buffer.getvalue().decode())
259 |     assert headers["Referer"][0] == "http://example.org"
260 | 
261 | 
262 | #######################################################################################
263 | # testing getinfo
264 | #######################################################################################
265 | 
266 | 
267 | def test_effective_url(server):
268 |     c = Curl()
269 |     url = str(server.url.copy_with(path="/redirect_301"))
270 |     c.setopt(CurlOpt.URL, url.encode())
271 |     c.setopt(CurlOpt.FOLLOWLOCATION, 1)
272 |     c.perform()
273 |     assert c.getinfo(CurlInfo.EFFECTIVE_URL) == str(server.url).encode()
274 | 
275 | 
276 | def test_status_code(server):
277 |     c = Curl()
278 |     url = str(server.url)
279 |     c.setopt(CurlOpt.URL, url.encode())
280 |     c.perform()
281 |     assert c.getinfo(CurlInfo.RESPONSE_CODE) == 200
282 | 
283 | 
284 | def test_response_headers(server):
285 |     c = Curl()
286 |     url = str(server.url.copy_with(path="/set_headers"))
287 |     c.setopt(CurlOpt.URL, url.encode())
288 |     buffer = BytesIO()
289 |     c.setopt(CurlOpt.HEADERDATA, buffer)
290 |     c.perform()
291 |     headers = buffer.getvalue().decode()
292 |     for line in headers.splitlines():
293 |         if line.startswith("x-test"):
294 |             assert line.startswith("x-test: test")
295 | 
296 | 
297 | def test_response_cookies(server):
298 |     c = Curl()
299 |     url = str(server.url.copy_with(path="/set_cookies"))
300 |     c.setopt(CurlOpt.URL, url.encode())
301 |     buffer = BytesIO()
302 |     c.setopt(CurlOpt.HEADERDATA, buffer)
303 |     c.perform()
304 |     headers = buffer.getvalue()
305 |     cookie = c.parse_cookie_headers(headers.splitlines())
306 |     for name, morsel in cookie.items():
307 |         if name == "foo":
308 |             assert morsel.value == "bar"
309 | 
310 | 
311 | def test_elapsed(server):
312 |     c = Curl()
313 |     url = str(server.url)
314 |     c.setopt(CurlOpt.URL, url.encode())
315 |     c.perform()
316 |     assert cast(int, c.getinfo(CurlInfo.TOTAL_TIME)) > 0
317 | 
318 | 
319 | def test_reason(server):
320 |     c = Curl()
321 |     url = str(server.url)
322 |     c.setopt(CurlOpt.URL, url.encode())
323 |     buffer = BytesIO()
324 |     c.setopt(CurlOpt.HEADERDATA, buffer)
325 |     c.perform()
326 |     headers = buffer.getvalue()
327 |     headers = headers.splitlines()
328 |     assert c.get_reason_phrase(headers[0]) == b"OK"
329 | 
330 | 
331 | def test_resolve(server):
332 |     c = Curl()
333 |     url = "http://example.com:8000"
334 |     c.setopt(CurlOpt.RESOLVE, ["example.com:8000:127.0.0.1"])
335 |     c.setopt(CurlOpt.URL, url)
336 |     c.perform()
337 | 
338 | 
339 | def test_duphandle(server):
340 |     c = Curl()
341 |     c.setopt(CurlOpt.URL, str(server.url.copy_with(path="/redirect_loop")).encode())
342 |     c.setopt(CurlOpt.FOLLOWLOCATION, 1)
343 |     c.setopt(CurlOpt.MAXREDIRS, 2)
344 |     c = c.duphandle()
345 |     with pytest.raises(CurlError):
346 |         c.perform()
347 | 
348 | 
349 | def test_is_pro():
350 |     assert curl_cffi.is_pro() is False
351 | 


--------------------------------------------------------------------------------
/curl_cffi/requests/models.py:
--------------------------------------------------------------------------------
  1 | from contextlib import suppress
  2 | import queue
  3 | import re
  4 | import warnings
  5 | from concurrent.futures import Future
  6 | from typing import Any, Optional, Union
  7 | from collections.abc import Awaitable, Callable
  8 | from datetime import timedelta
  9 | 
 10 | from ..curl import Curl
 11 | from ..utils import CurlCffiWarning
 12 | from .cookies import Cookies
 13 | from .exceptions import HTTPError, RequestException
 14 | from .headers import Headers
 15 | 
 16 | # Use orjson if present
 17 | try:
 18 |     from orjson import loads
 19 | except ImportError:
 20 |     from json import loads
 21 | 
 22 | with suppress(ImportError):
 23 |     from markdownify import markdownify as md
 24 |     import readability as rd
 25 | 
 26 | CHARSET_RE = re.compile(r"charset=([\w-]+)")
 27 | STREAM_END = object()
 28 | 
 29 | 
 30 | def clear_queue(q: queue.Queue):
 31 |     with q.mutex:
 32 |         q.queue.clear()
 33 |         q.all_tasks_done.notify_all()
 34 |         q.unfinished_tasks = 0
 35 | 
 36 | 
 37 | class Request:
 38 |     """Representing a sent request."""
 39 | 
 40 |     def __init__(self, url: str, headers: Headers, method: str):
 41 |         self.url = url
 42 |         self.headers = headers
 43 |         self.method = method
 44 | 
 45 | 
 46 | class Response:
 47 |     """Contains information the server sends.
 48 | 
 49 |     Attributes:
 50 |         url: url used in the request.
 51 |         content: response body in bytes.
 52 |         text: response body in str.
 53 |         status_code: http status code.
 54 |         reason: http response reason, such as OK, Not Found.
 55 |         ok: is status_code in [200, 400)?
 56 |         headers: response headers.
 57 |         cookies: response cookies.
 58 |         elapsed: timedelta of the request duration.
 59 |         encoding: http body encoding.
 60 |         charset: alias for encoding.
 61 |         primary_ip: primary ip of the server.
 62 |         primary_port: primary port of the server.
 63 |         local_ip: local ip used in this connection.
 64 |         local_port: local port used in this connection.
 65 |         charset_encoding: encoding specified by the Content-Type header.
 66 |         default_encoding: encoding for decoding response content if charset is not found
 67 |             in headers. Defaults to "utf-8". Can be set to a callable for automatic
 68 |             detection.
 69 |         redirect_count: how many redirects happened.
 70 |         redirect_url: the final redirected url.
 71 |         http_version: http version used.
 72 |         history: history redirections, only headers are available.
 73 |         download_size: total downloaded bytes (body).
 74 |         upload_size: total uploaded bytes (body).
 75 |         header_size: total header size.
 76 |         request_size: request size.
 77 |         response_size: download_size + header_size
 78 |     """
 79 | 
 80 |     def __init__(self, curl: Optional[Curl] = None, request: Optional[Request] = None):
 81 |         self.curl = curl
 82 |         self.request = request
 83 |         self.url = ""
 84 |         self.content = b""
 85 |         self.status_code = 200
 86 |         self.reason = "OK"
 87 |         self.ok = True
 88 |         self.headers = Headers()
 89 |         self.cookies = Cookies()
 90 |         self.elapsed: timedelta = timedelta()
 91 |         self.default_encoding: Union[str, Callable[[bytes], str]] = "utf-8"
 92 |         self.redirect_count = 0
 93 |         self.redirect_url = ""
 94 |         self.http_version = 0
 95 |         self.primary_ip: str = ""
 96 |         self.primary_port: int = 0
 97 |         self.local_ip: str = ""
 98 |         self.local_port: int = 0
 99 |         self.history: list[dict[str, Any]] = []
100 |         self.infos: dict[str, Any] = {}
101 |         self.queue: Optional[queue.Queue] = None
102 |         self.stream_task: Optional[Future] = None
103 |         self.astream_task: Optional[Awaitable] = None
104 |         self.quit_now = None
105 |         self.download_size: int = 0
106 |         self.upload_size: int = 0
107 |         self.header_size: int = 0
108 |         self.request_size: int = 0
109 |         self.response_size: int = 0
110 | 
111 |     @property
112 |     def charset(self) -> str:
113 |         """Alias for encoding."""
114 |         return self.encoding
115 | 
116 |     @property
117 |     def encoding(self) -> str:
118 |         """
119 |         Determines the encoding to decode byte content into text.
120 | 
121 |         The method follows a specific priority to decide the encoding:
122 |         1. If ``.encoding`` has been explicitly set, it is used.
123 |         2. The encoding specified by the ``charset`` parameter in the ``Content-Type``
124 |             header.
125 |         3. The encoding specified by the ``default_encoding`` attribute. This can either
126 |             be a string (e.g., "utf-8") or a callable for charset autodetection.
127 |         """
128 |         if not hasattr(self, "_encoding"):
129 |             encoding = self.charset_encoding
130 |             if encoding is None:
131 |                 if isinstance(self.default_encoding, str):
132 |                     encoding = self.default_encoding
133 |                 elif callable(self.default_encoding):
134 |                     encoding = self.default_encoding(self.content)
135 |             self._encoding = encoding or "utf-8"
136 |         return self._encoding
137 | 
138 |     @encoding.setter
139 |     def encoding(self, value: str) -> None:
140 |         if hasattr(self, "_text"):
141 |             raise ValueError("Cannot set encoding after text has been accessed")
142 |         self._encoding = value
143 | 
144 |     @property
145 |     def charset_encoding(self) -> Optional[str]:
146 |         """Return the encoding, as specified by the Content-Type header."""
147 |         content_type = self.headers.get("Content-Type")
148 |         if content_type:
149 |             charset_match = CHARSET_RE.search(content_type)
150 |             return charset_match.group(1) if charset_match else None
151 |         return None
152 | 
153 |     @property
154 |     def text(self) -> str:
155 |         if not hasattr(self, "_text"):
156 |             if not self.content:
157 |                 self._text = ""
158 |             else:
159 |                 self._text = self._decode(self.content)
160 |         return self._text
161 | 
162 |     def markdown(self) -> str:
163 |         doc = rd.Document(self.content)
164 |         title = doc.title()
165 |         summary = doc.summary(html_partial=True)
166 |         body_as_md = md(f"<h1>{title}</h1><main>{summary}</main>")
167 |         return body_as_md
168 | 
169 |     def _decode(self, content: bytes) -> str:
170 |         try:
171 |             return content.decode(self.encoding, errors="replace")
172 |         except (UnicodeDecodeError, LookupError):
173 |             return content.decode("utf-8-sig")
174 | 
175 |     def raise_for_status(self):
176 |         """Raise an error if status code is not in [200, 400)"""
177 |         if not self.ok:
178 |             raise HTTPError(f"HTTP Error {self.status_code}: {self.reason}", 0, self)
179 | 
180 |     def iter_lines(self, chunk_size=None, decode_unicode=False, delimiter=None):
181 |         """
182 |         iterate streaming content line by line, separated by ``\\n``.
183 | 
184 |         Copied from: https://requests.readthedocs.io/en/latest/_modules/requests/models/
185 |         which is under the License: Apache 2.0
186 |         """
187 |         pending = None
188 | 
189 |         for chunk in self.iter_content(
190 |             chunk_size=chunk_size, decode_unicode=decode_unicode
191 |         ):
192 |             if pending is not None:
193 |                 chunk = pending + chunk
194 |             lines = chunk.split(delimiter) if delimiter else chunk.splitlines()
195 |             pending = (
196 |                 lines.pop()
197 |                 if lines and lines[-1] and chunk and lines[-1][-1] == chunk[-1]
198 |                 else None
199 |             )
200 | 
201 |             yield from lines
202 | 
203 |         if pending is not None:
204 |             yield pending
205 | 
206 |     def iter_content(self, chunk_size=None, decode_unicode=False):
207 |         """
208 |         iterate streaming content chunk by chunk in bytes.
209 |         """
210 |         if chunk_size:
211 |             warnings.warn(
212 |                 "chunk_size is ignored, there is no way to tell curl that.",
213 |                 CurlCffiWarning,
214 |                 stacklevel=2,
215 |             )
216 |         if decode_unicode:
217 |             raise NotImplementedError()
218 | 
219 |         assert self.queue and self.curl, "stream mode is not enabled."
220 | 
221 |         while True:
222 |             chunk = self.queue.get()
223 | 
224 |             # re-raise the exception if something wrong happened.
225 |             if isinstance(chunk, RequestException):
226 |                 self.curl.reset()
227 |                 raise chunk
228 | 
229 |             # end of stream.
230 |             if chunk is STREAM_END:
231 |                 break
232 | 
233 |             yield chunk
234 | 
235 |     def json(self, **kw):
236 |         """return a parsed json object of the content."""
237 |         return loads(self.content, **kw)
238 | 
239 |     def close(self):
240 |         """Close the streaming connection, only valid in stream mode."""
241 | 
242 |         if self.quit_now:
243 |             self.quit_now.set()
244 |         if self.stream_task:
245 |             self.stream_task.result()
246 | 
247 |     async def aiter_lines(self, chunk_size=None, decode_unicode=False, delimiter=None):
248 |         """
249 |         iterate streaming content line by line, separated by ``\\n``.
250 | 
251 |         Copied from: https://requests.readthedocs.io/en/latest/_modules/requests/models/
252 |         which is under the License: Apache 2.0
253 |         """
254 |         pending = None
255 | 
256 |         async for chunk in self.aiter_content(
257 |             chunk_size=chunk_size, decode_unicode=decode_unicode
258 |         ):
259 |             if pending is not None:
260 |                 chunk = pending + chunk
261 |             lines = chunk.split(delimiter) if delimiter else chunk.splitlines()
262 |             pending = (
263 |                 lines.pop()
264 |                 if lines and lines[-1] and chunk and lines[-1][-1] == chunk[-1]
265 |                 else None
266 |             )
267 | 
268 |             for line in lines:
269 |                 yield line
270 | 
271 |         if pending is not None:
272 |             yield pending
273 | 
274 |     async def aiter_content(self, chunk_size=None, decode_unicode=False):
275 |         """
276 |         iterate streaming content chunk by chunk in bytes.
277 |         """
278 |         if chunk_size:
279 |             warnings.warn(
280 |                 "chunk_size is ignored, there is no way to tell curl that.",
281 |                 CurlCffiWarning,
282 |                 stacklevel=2,
283 |             )
284 |         if decode_unicode:
285 |             raise NotImplementedError()
286 | 
287 |         assert self.queue and self.curl, "stream mode is not enabled."
288 | 
289 |         while True:
290 |             chunk = await self.queue.get()
291 | 
292 |             # re-raise the exception if something wrong happened.
293 |             if isinstance(chunk, RequestException):
294 |                 await self.aclose()
295 |                 raise chunk
296 | 
297 |             # end of stream.
298 |             if chunk is STREAM_END:
299 |                 await self.aclose()
300 |                 return
301 | 
302 |             yield chunk
303 | 
304 |     async def atext(self) -> str:
305 |         """
306 |         Return a decoded string.
307 |         """
308 |         return self._decode(await self.acontent())
309 | 
310 |     async def acontent(self) -> bytes:
311 |         """wait and read the streaming content in one bytes object."""
312 |         chunks = []
313 |         async for chunk in self.aiter_content():
314 |             chunks.append(chunk)
315 |         return b"".join(chunks)
316 | 
317 |     async def aclose(self):
318 |         """Close the streaming connection, only valid in stream mode."""
319 | 
320 |         if self.astream_task:
321 |             await self.astream_task
322 | 
323 |     # It prints the status code of the response instead of the object's memory location.
324 |     def __repr__(self) -> str:
325 |         return f"<Response [{self.status_code}]>"
326 | 


--------------------------------------------------------------------------------