├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── core ├── cookies.py ├── parser.py └── request.py ├── main.py ├── requirements.txt └── routes └── request.py /.gitignore: -------------------------------------------------------------------------------- 1 | **/__pycache__ 2 | logs.txt 3 | env/ -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8 2 | 3 | ADD . ./ 4 | 5 | RUN pip install -r requirements.txt 6 | RUN playwright install firefox 7 | RUN playwright install-deps 8 | 9 | EXPOSE ${PORT} 10 | 11 | CMD ["uvicorn", "main:app", "--host=0.0.0.0", "--port=8000"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | the MIT License 2 | 3 | Copyright (c) 2016 rhysd 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 9 | of the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 16 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 17 | PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 19 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR 20 | THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **Doesn't work right now** 2 | 3 | # Cloudfire 4 | 5 | Cloudfire is a proxy to bypass cloudflare's javascript challenge (I'm under attack mode) using playwright. 6 | 7 | ## How it works 8 | 9 | Cloudfire uses playwright to solve cloudflare challenges and stores the cookies internally (redis can also be used). It then uses those cookies for further requests using aiohttp. Whenever the response returns a 503/403 status, it uses playwright to again solve the challenge and the cycle continues. 10 | 11 | ## Installation 12 | 13 | ```sh 14 | git clone https://github.com/lonely-code-cube/cloudfire 15 | cd cloudfire 16 | pip install -r requirements.txt 17 | playwright install firefox 18 | ``` 19 | 20 | ## Starting the server 21 | 22 | ``` 23 | uvicorn main:app 24 | ``` 25 | 26 | By default the server will run at port 8000. 27 | 28 | ## Usage 29 | 30 | ### Endpoints 31 | ```/get``` 32 | 33 | Method: ```POST``` 34 | 35 | Post body: 36 | 37 | ``` 38 | url: The url to make a get request 39 | ``` 40 | -------------------------------------------------------------------------------- /core/cookies.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from urllib.parse import urlparse 3 | 4 | from redis.asyncio import Redis 5 | from playwright.async_api import Cookie 6 | 7 | 8 | class CookieManager: 9 | def __init__(self, redis: Redis = None) -> None: 10 | self.redis = redis 11 | self.cookie_map = {} 12 | 13 | async def add_cookies(self, cookies: List[Cookie]): 14 | if not self.redis: 15 | for cookie in cookies: 16 | if cookie["domain"].startswith('.'): 17 | domain = cookie["domain"][1:] 18 | else: 19 | domain = cookie["domain"] 20 | try: 21 | self.cookie_map[domain][cookie["name"]] = cookie["value"] 22 | except KeyError: 23 | self.cookie_map[domain] = {} 24 | self.cookie_map[domain][cookie["name"]] = cookie["value"] 25 | 26 | async def get_cookies(self, domain: str): 27 | base = urlparse(domain).netloc 28 | if not self.redis: 29 | return self.cookie_map.get(base) 30 | 31 | async def get_all_cookies(self): 32 | if not self.redis: 33 | return self.cookie_map 34 | -------------------------------------------------------------------------------- /core/parser.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | import json, dataclasses 3 | 4 | @dataclasses.dataclass 5 | class Content: 6 | jsonable: bool 7 | content: str 8 | 9 | def try_json(content: str): 10 | soup = BeautifulSoup(content, 'html.parser') 11 | try: 12 | json.loads(soup.text) 13 | return Content(jsonable=True, content=soup.text) 14 | except json.JSONDecodeError: 15 | return Content(jsonable=False, content=soup.prettify()) -------------------------------------------------------------------------------- /core/request.py: -------------------------------------------------------------------------------- 1 | from playwright.async_api import BrowserContext, Cookie 2 | from aiohttp import ClientSession 3 | 4 | 5 | from typing import Optional, List 6 | 7 | from core.cookies import CookieManager 8 | 9 | 10 | class Response: 11 | def __init__( 12 | self, content: str, status: int, cookies: Optional[List[Cookie]] = None 13 | ) -> None: 14 | self.status = status 15 | self.cookies = cookies 16 | self.content = content 17 | 18 | 19 | class RequestManager: 20 | def __init__( 21 | self, 22 | session: ClientSession, 23 | browser: BrowserContext = None, 24 | cookie_manager: CookieManager = None, 25 | user_agent: str = None, 26 | ) -> None: 27 | self.session = session 28 | self.browser = browser 29 | self.cookie_manager = cookie_manager 30 | self.user_agent = user_agent 31 | 32 | async def _get_browser(self, url: str) -> Response: 33 | page = await self.browser.new_page() 34 | await page.goto(url) 35 | async with page.expect_response(url) as res_info: 36 | await page.wait_for_selector("h2#challenge-running", state="detached") 37 | await page.wait_for_load_state("domcontentloaded") 38 | status = (await res_info.value).status 39 | cookies = await page.context.cookies(url) 40 | html = await page.content() 41 | await page.close() 42 | await self.cookie_manager.add_cookies(cookies) 43 | return Response(cookies=cookies, content=html, status=status) 44 | 45 | async def _get_aiohttp( 46 | self, 47 | url: str, 48 | ) -> Response: 49 | headers = {"User-Agent": self.user_agent} 50 | if await self.cookie_manager.get_cookies(url): 51 | res = await self.session.get( 52 | url, cookies=await self.cookie_manager.get_cookies(url), headers=headers 53 | ) 54 | if res.status == 503 or res.status == 403: 55 | if not self.browser: 56 | raise Exception("Browser not provided. Aborting") 57 | return await self._get_browser(url=url) 58 | return Response(content=await res.text(), status=res.status) 59 | else: 60 | res = await self.session.get(url, headers=headers) 61 | if res.status == 503 or res.status == 403: 62 | if not self.browser: 63 | raise Exception("Browser not provided. Aborting") 64 | return await self._get_browser(url=url) 65 | return Response(content=await res.text(), status=res.status) 66 | 67 | async def get(self, url: str) -> Response: 68 | 69 | return await self._get_aiohttp(url) 70 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import logging, datetime 2 | 3 | from fastapi import FastAPI, Request 4 | from playwright.async_api import async_playwright 5 | from aiohttp import ClientSession 6 | 7 | from core import cookies 8 | from core.request import RequestManager 9 | from routes import request 10 | 11 | app = FastAPI() 12 | logger = logging.getLogger("internal") 13 | handler = logging.FileHandler("logs.txt") 14 | logger.addHandler(handler) 15 | logger.setLevel(logging.INFO) 16 | 17 | 18 | @app.on_event("startup") 19 | async def config(): 20 | logger.info("\n------New session starts here-------\n") 21 | logger.info(f"$DATE: {datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S%z')}\n") 22 | app.state.pwsession = await async_playwright().start() 23 | app.state.user_agent = ( 24 | "Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0" 25 | ) 26 | app.state.browser = await ( 27 | await app.state.pwsession.firefox.launch(headless=True) 28 | ).new_context(user_agent=app.state.user_agent) 29 | app.state.httpsession = ClientSession() 30 | app.state.cookie_manager = cookies.CookieManager() 31 | app.state.request = RequestManager( 32 | session=app.state.httpsession, 33 | browser=app.state.browser, 34 | cookie_manager=app.state.cookie_manager, 35 | user_agent=app.state.user_agent, 36 | ) 37 | 38 | 39 | @app.middleware("http") 40 | async def req_config(request: Request, call_next): 41 | request.state.browser = app.state.browser 42 | request.state.httpsession = app.state.httpsession 43 | request.state.cookie_manager = app.state.cookie_manager 44 | request.state.user_agent = app.state.user_agent 45 | request.state.request = app.state.request 46 | response = await call_next(request) 47 | return response 48 | 49 | 50 | @app.on_event("shutdown") 51 | async def shutdown(): 52 | logger.info("\n-------Session ends here--------\n") 53 | await app.state.browser.close() 54 | await app.state.pwsession.stop() 55 | await app.state.httpsession.close() 56 | 57 | 58 | app.include_router(request.router, tags=["request"]) 59 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp 2 | fastapi 3 | uvicorn 4 | playwright 5 | bs4 6 | redis>=4.2.0rc1 -------------------------------------------------------------------------------- /routes/request.py: -------------------------------------------------------------------------------- 1 | import logging, datetime 2 | 3 | from fastapi import APIRouter 4 | from fastapi.requests import Request 5 | from pydantic import BaseModel, HttpUrl 6 | 7 | from core import parser 8 | 9 | router = APIRouter() 10 | logger = logging.getLogger("internal") 11 | 12 | 13 | class GetReq(BaseModel): 14 | url: HttpUrl 15 | 16 | 17 | @router.post("/get") 18 | async def get(req: Request, payload: GetReq): 19 | logger.info( 20 | f"$GET: \"{payload.url}\" at {datetime.datetime.now().strftime('%I:%M%p')}" 21 | ) 22 | res = await req.state.request.get(payload.url) 23 | data = parser.try_json(res.content) 24 | if data.jsonable: 25 | res.content = data.content 26 | return res 27 | --------------------------------------------------------------------------------