├── .gitignore ├── LICENSE ├── README.md ├── browserforge ├── bayesian_network.py ├── download.py ├── fingerprints │ ├── __init__.py │ └── generator.py ├── headers │ ├── __init__.py │ ├── generator.py │ └── utils.py ├── injectors │ ├── __init__.py │ ├── data │ │ ├── __init__.py │ │ └── utils.js.xz │ ├── playwright │ │ ├── __init__.py │ │ └── injector.py │ ├── pyppeteer │ │ ├── __init__.py │ │ └── injector.py │ ├── undetected_playwright │ │ ├── __init__.py │ │ └── injector.py │ └── utils.py └── py.typed └── pyproject.toml /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Binaries 10 | *.dll 11 | *.dylib 12 | 13 | # Model files 14 | *.json 15 | 16 | # Distribution / packaging 17 | .Python 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | share/python-wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | MANIFEST 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .nox/ 50 | .coverage 51 | .coverage.* 52 | .cache 53 | nosetests.xml 54 | coverage.xml 55 | *.cover 56 | *.py,cover 57 | .hypothesis/ 58 | .pytest_cache/ 59 | cover/ 60 | 61 | # Translations 62 | *.mo 63 | *.pot 64 | 65 | # Django stuff: 66 | *.log 67 | local_settings.py 68 | db.sqlite3 69 | db.sqlite3-journal 70 | 71 | # Flask stuff: 72 | instance/ 73 | .webassets-cache 74 | 75 | # Scrapy stuff: 76 | .scrapy 77 | 78 | # Sphinx documentation 79 | docs/_build/ 80 | 81 | # PyBuilder 82 | .pybuilder/ 83 | target/ 84 | 85 | # Jupyter Notebook 86 | .ipynb_checkpoints 87 | 88 | # IPython 89 | profile_default/ 90 | ipython_config.py 91 | 92 | # pyenv 93 | # For a library or package, you might want to ignore these files since the code is 94 | # intended to run in multiple environments; otherwise, check them in: 95 | # .python-version 96 | 97 | # pipenv 98 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 99 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 100 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 101 | # install all needed dependencies. 102 | #Pipfile.lock 103 | 104 | # poetry 105 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 106 | # This is especially recommended for binary packages to ensure reproducibility, and is more 107 | # commonly ignored for libraries. 108 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 109 | #poetry.lock 110 | 111 | # pdm 112 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 113 | #pdm.lock 114 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 115 | # in version control. 116 | # https://pdm.fming.dev/#use-with-ide 117 | .pdm.toml 118 | 119 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 120 | __pypackages__/ 121 | 122 | # Celery stuff 123 | celerybeat-schedule 124 | celerybeat.pid 125 | 126 | # SageMath parsed files 127 | *.sage.py 128 | 129 | # Environments 130 | .env 131 | .venv 132 | env/ 133 | venv/ 134 | ENV/ 135 | env.bak/ 136 | venv.bak/ 137 | 138 | # Spyder project settings 139 | .spyderproject 140 | .spyproject 141 | 142 | # Rope project settings 143 | .ropeproject 144 | 145 | # mkdocs documentation 146 | /site 147 | 148 | # mypy 149 | .mypy_cache/ 150 | .dmypy.json 151 | dmypy.json 152 | 153 | # Pyre type checker 154 | .pyre/ 155 | 156 | # pytype static type analyzer 157 | .pytype/ 158 | 159 | # Cython debug symbols 160 | cython_debug/ 161 | 162 | # PyCharm 163 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 164 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 165 | # and can be added to the global gitignore or merged into this file. For a more nuclear 166 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 167 | #.idea/ 168 | 169 | # VsCode 170 | .vscode 171 | .trunk 172 | 173 | # daownloaded files 174 | data/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | BrowserForge 3 |

4 | 5 |

6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | PyPI 14 | 15 | 16 | PyPI 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 |

28 | 29 |

30 | 🎭 Intelligent browser header & fingerprint generator 31 |

32 | 33 | --- 34 | 35 | ## What is it? 36 | 37 | BrowserForge is a browser header and fingerprint generator that mimics the frequency of different browsers, operating systems, and devices found in the wild. 38 | 39 | It is a reimplementation of [Apify's fingerprint-suite](https://github.com/apify/fingerprint-suite) in Python. 40 | 41 | ## Features 42 | 43 | - Uses a Bayesian generative network to mimic actual web traffic 44 | - Extremely fast runtime (0.1-0.2 miliseconds) 45 | - Easy and simple for humans to use 46 | - Extensive customization options for browsers, operating systems, devices, locales, and HTTP version 47 | - Written with type safety 48 | 49 | ## Installation 50 | 51 | ``` 52 | pip install browserforge[all] 53 | ``` 54 | ## Usage 55 | 56 | ## Generating Headers 57 | 58 | ### Simple usage 59 | 60 | ```py 61 | >>> from browserforge.headers import HeaderGenerator 62 | >>> headers = HeaderGenerator() 63 | >>> headers.generate() 64 | {'sec-ch-ua': '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"', 'Sec-Ch-Ua-Mobile': '?0', 'Sec-Ch-Ua-Platform': '"Windows"', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'Sec-Fetch-Site': '?1', 'Sec-Fetch-Mode': 'same-site', 'Sec-Fetch-User': 'document', 'Sec-Fetch-Dest': 'navigate', 'Accept-Encoding': 'gzip, deflate, br, zstd', 'Accept-Language': 'en-US;q=1.0'} 65 | ``` 66 | 67 | ### Using with requests 68 | 69 | Headers can be added to a session in [requests](https://github.com/psf/requests) (or similar libraries) by assigning them to the `headers` attribute: 70 | 71 | ```py 72 | import requests 73 | session = requests.Session() 74 | # Set the session headers 75 | session.headers = headers.generate() 76 | ``` 77 | 78 |
79 | Parameters for HeaderGenerator 80 | 81 | ``` 82 | Parameters: 83 | browser (Union[ListOrString, Iterable[Browser]], optional): Browser(s) or Browser object(s). 84 | os (ListOrString, optional): Operating system(s) to generate headers for. 85 | device (ListOrString, optional): Device(s) to generate the headers for. 86 | locale (ListOrString, optional): List of at most 10 languages for the Accept-Language header. Default is 'en-US'. 87 | http_version (Literal[1, 2], optional): Http version to be used to generate headers. Defaults to 2. 88 | strict (bool, optional): Throws an error if it cannot generate headers based on the input. Defaults to False. 89 | ``` 90 | 91 |
92 | 93 |
94 | Parameters for HeaderGenerator.generate 95 | 96 | ``` 97 | Generates headers using the default options and their possible overrides. 98 | 99 | Parameters: 100 | browser (Optional[Iterable[Union[str, Browser]]], optional): Browser(s) to generate the headers for. 101 | os (Optional[ListOrString], optional): Operating system(s) to generate the headers for. 102 | device (Optional[ListOrString], optional): Device(s) to generate the headers for. 103 | locale (Optional[ListOrString], optional): Language(s) to include in the Accept-Language header. 104 | http_version (Optional[Literal[1, 2]], optional): HTTP version to be used to generate headers. 105 | user_agent (Optional[ListOrString], optional): User-Agent(s) to use. 106 | request_dependent_headers (Optional[Dict[str, str]], optional): Known values of request-dependent headers. 107 | strict (Optional[bool], optional): If true, throws an error if it cannot generate headers based on the input. 108 | ``` 109 | 110 |
111 | 112 | ### Constraining headers 113 | 114 | #### Single constraint 115 | 116 | Set constraints for browsers by passing the optional strings below: 117 | 118 | ```py 119 | headers = HeaderGenerator( 120 | browser='chrome', 121 | os='windows', 122 | device='desktop', 123 | locale='en-US', 124 | http_version=2 125 | ) 126 | ``` 127 | 128 | #### Multiple constraints 129 | 130 | Set multiple constraints to select from. Options are selected based on their actual frequency in the wild: 131 | 132 | ```py 133 | headers = HeaderGenerator( 134 | browser=('chrome', 'firefox', 'safari', 'edge'), 135 | os=('windows', 'macos', 'linux', 'android', 'ios'), 136 | device=('desktop', 'mobile'), 137 | locale=('en-US', 'en', 'de'), 138 | http_version=2 139 | ) 140 | ``` 141 | 142 | #### Browser specifications 143 | 144 | Set specificiations for browsers, including version ranges and HTTP version: 145 | 146 | ```py 147 | from browserforge.headers import Browser 148 | 149 | browsers = [ 150 | Browser(name='chrome', min_version=100, max_version=110), 151 | Browser(name='firefox', max_version=80, http_version=1), 152 | Browser(name='edge', min_version=95), 153 | ] 154 | headers = HeaderGenerator(browser=browsers) 155 | ``` 156 | 157 | Note that all constraints passed into the `HeaderGenerator` constructor can be overridden by passing them into the `generate` method. 158 | 159 | #### Generate headers given User-Agent 160 | 161 | Headers can be generated given an existing user agent: 162 | 163 | ```py 164 | >>> headers.generate(user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36') 165 | ``` 166 | 167 | Select from multiple User-Agents based on their frequency in the wild: 168 | 169 | ```py 170 | >>> headers.generate(user_agent=( 171 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36', 172 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0' 173 | )) 174 | ``` 175 | 176 |
177 | 178 | ## Generating Fingerprints 179 | 180 | ### Simple usage 181 | 182 | Initialize FingerprintGenerator: 183 | 184 | ```py 185 | from browserforge.fingerprints import FingerprintGenerator 186 | fingerprints = FingerprintGenerator() 187 | fingerprints.generate() 188 | ``` 189 | 190 |
191 | Parameters for FingerprintGenerator 192 | 193 | ``` 194 | Parameters: 195 | screen (Screen, optional): Screen constraints for the generated fingerprint. 196 | strict (bool, optional): Whether to raise an exception if the constraints are too strict. Default is False. 197 | mock_webrtc (bool, optional): Whether to mock WebRTC when injecting the fingerprint. Default is False. 198 | slim (bool, optional): Disables performance-heavy evasions when injecting the fingerprint. Default is False. 199 | **header_kwargs: Header generation options for HeaderGenerator 200 | ``` 201 | 202 |
203 | 204 |
205 | Parameters for FingerprintGenerator.generate 206 | 207 | ``` 208 | Generates a fingerprint and a matching set of ordered headers using a combination of the default options specified in the constructor and their possible overrides provided here. 209 | 210 | Parameters: 211 | screen (Screen, optional): Screen constraints for the generated fingerprint. 212 | strict (bool, optional): Whether to raise an exception if the constraints are too strict. 213 | mock_webrtc (bool, optional): Whether to mock WebRTC when injecting the fingerprint. Default is False. 214 | slim (bool, optional): Disables performance-heavy evasions when injecting the fingerprint. Default is False. 215 | **header_kwargs: Additional header generation options for HeaderGenerator.generate 216 | ``` 217 | 218 |
219 | 220 |
221 | Example response 222 | 223 | ``` 224 | Fingerprint(screen=ScreenFingerprint(availHeight=784, 225 | availWidth=1440, 226 | availTop=25, 227 | availLeft=0, 228 | colorDepth=30, 229 | height=900, 230 | pixelDepth=30, 231 | width=1440, 232 | devicePixelRatio=2, 233 | pageXOffset=0, 234 | pageYOffset=0, 235 | innerHeight=0, 236 | outerHeight=718, 237 | outerWidth=1440, 238 | innerWidth=0, 239 | screenX=0, 240 | clientWidth=0, 241 | clientHeight=19, 242 | hasHDR=True), 243 | navigator=NavigatorFingerprint(userAgent='Mozilla/5.0 (Macintosh; ' 244 | 'Intel Mac OS X 10_15_7) ' 245 | 'AppleWebKit/537.36 ' 246 | '(KHTML, like Gecko) ' 247 | 'Chrome/121.0.0.0 ' 248 | 'Safari/537.36', 249 | userAgentData={'architecture': 'arm', 250 | 'bitness': '64', 251 | 'brands': [{'brand': 'Not ' 252 | 'A(Brand', 253 | 'version': '99'}, 254 | {'brand': 'Google ' 255 | 'Chrome', 256 | 'version': '121'}, 257 | {'brand': 'Chromium', 258 | 'version': '121'}], 259 | 'fullVersionList': [{'brand': 'Not ' 260 | 'A(Brand', 261 | 'version': '99.0.0.0'}, 262 | {'brand': 'Google ' 263 | 'Chrome', 264 | 'version': '121.0.6167.160'}, 265 | {'brand': 'Chromium', 266 | 'version': '121.0.6167.160'}], 267 | 'mobile': False, 268 | 'model': '', 269 | 'platform': 'macOS', 270 | 'platformVersion': '13.6.1', 271 | 'uaFullVersion': '121.0.6167.160'}, 272 | doNotTrack=None, 273 | appCodeName='Mozilla', 274 | appName='Netscape', 275 | appVersion='5.0 (Macintosh; Intel ' 276 | 'Mac OS X 10_15_7) ' 277 | 'AppleWebKit/537.36 ' 278 | '(KHTML, like Gecko) ' 279 | 'Chrome/121.0.0.0 ' 280 | 'Safari/537.36', 281 | oscpu=None, 282 | webdriver=False, 283 | language='en-US', 284 | languages=['en-US'], 285 | platform='MacIntel', 286 | deviceMemory=8, 287 | hardwareConcurrency=10, 288 | product='Gecko', 289 | productSub='20030107', 290 | vendor='Google Inc.', 291 | vendorSub=None, 292 | maxTouchPoints=0, 293 | extraProperties={'globalPrivacyControl': None, 294 | 'installedApps': [], 295 | 'isBluetoothSupported': False, 296 | 'pdfViewerEnabled': True, 297 | 'vendorFlavors': ['chrome']}), 298 | headers={'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 299 | 'Accept-Encoding': 'gzip, deflate, br', 300 | 'Accept-Language': 'en-US;q=1.0', 301 | 'Sec-Fetch-Dest': 'navigate', 302 | 'Sec-Fetch-Mode': 'same-site', 303 | 'Sec-Fetch-Site': '?1', 304 | 'Sec-Fetch-User': 'document', 305 | 'Upgrade-Insecure-Requests': '1', 306 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X ' 307 | '10_15_7) AppleWebKit/537.36 (KHTML, like ' 308 | 'Gecko) Chrome/121.0.0.0 Safari/537.36', 309 | 'sec-ch-ua': '"Not A(Brand";v="99", "Google ' 310 | 'Chrome";v="121", "Chromium";v="121"', 311 | 'sec-ch-ua-mobile': '?0', 312 | 'sec-ch-ua-platform': '"macOS"'}, 313 | videoCodecs={'h264': 'probably', 'ogg': '', 'webm': 'probably'}, 314 | audioCodecs={'aac': 'probably', 315 | 'm4a': 'maybe', 316 | 'mp3': 'probably', 317 | 'ogg': 'probably', 318 | 'wav': 'probably'}, 319 | pluginsData={'mimeTypes': ['Portable Document ' 320 | 'Format~~application/pdf~~pdf', 321 | 'Portable Document ' 322 | 'Format~~text/pdf~~pdf'], 323 | 'plugins': [{'description': 'Portable Document Format', 324 | 'filename': 'internal-pdf-viewer', 325 | 'mimeTypes': [{'description': 'Portable ' 326 | 'Document ' 327 | 'Format', 328 | 'enabledPlugin': 'PDF ' 329 | 'Viewer', 330 | 'suffixes': 'pdf', 331 | 'type': 'application/pdf'}, 332 | {'description': 'Portable ' 333 | 'Document ' 334 | 'Format', 335 | 'enabledPlugin': 'PDF ' 336 | 'Viewer', 337 | 'suffixes': 'pdf', 338 | 'type': 'text/pdf'}], 339 | 'name': 'PDF Viewer'}, 340 | {'description': 'Portable Document Format', 341 | 'filename': 'internal-pdf-viewer', 342 | 'mimeTypes': [{'description': 'Portable ' 343 | 'Document ' 344 | 'Format', 345 | 'enabledPlugin': 'Chrome ' 346 | 'PDF ' 347 | 'Viewer', 348 | 'suffixes': 'pdf', 349 | 'type': 'application/pdf'}, 350 | {'description': 'Portable ' 351 | 'Document ' 352 | 'Format', 353 | 'enabledPlugin': 'Chrome ' 354 | 'PDF ' 355 | 'Viewer', 356 | 'suffixes': 'pdf', 357 | 'type': 'text/pdf'}], 358 | 'name': 'Chrome PDF Viewer'}, 359 | {'description': 'Portable Document Format', 360 | 'filename': 'internal-pdf-viewer', 361 | 'mimeTypes': [{'description': 'Portable ' 362 | 'Document ' 363 | 'Format', 364 | 'enabledPlugin': 'Chromium ' 365 | 'PDF ' 366 | 'Viewer', 367 | 'suffixes': 'pdf', 368 | 'type': 'application/pdf'}, 369 | {'description': 'Portable ' 370 | 'Document ' 371 | 'Format', 372 | 'enabledPlugin': 'Chromium ' 373 | 'PDF ' 374 | 'Viewer', 375 | 'suffixes': 'pdf', 376 | 'type': 'text/pdf'}], 377 | 'name': 'Chromium PDF Viewer'}, 378 | {'description': 'Portable Document Format', 379 | 'filename': 'internal-pdf-viewer', 380 | 'mimeTypes': [{'description': 'Portable ' 381 | 'Document ' 382 | 'Format', 383 | 'enabledPlugin': 'Microsoft ' 384 | 'Edge ' 385 | 'PDF ' 386 | 'Viewer', 387 | 'suffixes': 'pdf', 388 | 'type': 'application/pdf'}, 389 | 'Document ' 390 | 'Format', 391 | 'enabledPlugin': 'Microsoft ' 392 | 'Edge ' 393 | 'PDF ' 394 | 'Viewer', 395 | 'suffixes': 'pdf', 396 | 'type': 'text/pdf'}], 397 | 'name': 'Microsoft Edge PDF Viewer'}, 398 | {'description': 'Portable Document Format', 399 | 'filename': 'internal-pdf-viewer', 400 | 'mimeTypes': [{'description': 'Portable ' 401 | 'Document ' 402 | 'Format', 403 | 'enabledPlugin': 'WebKit ' 404 | 'built-in ' 405 | 'PDF', 406 | 'suffixes': 'pdf', 407 | 'type': 'application/pdf'}, 408 | {'description': 'Portable ' 409 | 'Document ' 410 | 'Format', 411 | 'enabledPlugin': 'WebKit ' 412 | 'built-in ' 413 | 'PDF', 414 | 'suffixes': 'pdf', 415 | 'type': 'text/pdf'}], 416 | 'name': 'WebKit built-in PDF'}]}, 417 | battery={'charging': False, 418 | 'chargingTime': None, 419 | 'dischargingTime': 29940, 420 | 'level': 0.98}, 421 | videoCard=VideoCard(renderer='ANGLE (Apple, ANGLE Metal Renderer: ' 422 | 'Apple M2 Pro, Unspecified Version)', 423 | vendor='Google Inc. (Apple)'), 424 | multimediaDevices={'micros': [{'deviceId': '', 425 | 'groupId': '', 426 | 'kind': 'audioinput', 427 | 'label': ''}], 428 | 'speakers': [{'deviceId': '', 429 | 'groupId': '', 430 | 'kind': 'audiooutput', 431 | 'label': ''}], 432 | 'webcams': [{'deviceId': '', 433 | 'groupId': '', 434 | 'kind': 'videoinput', 435 | 'label': ''}]}, 436 | fonts=['Arial Unicode MS', 'Gill Sans', 'Helvetica Neue', 'Menlo'] 437 | mockWebRTC: False, 438 | slim: False) 439 | ``` 440 | 441 |
442 | 443 | ### Constraining fingerprints 444 | 445 | #### Screen width/height 446 | 447 | Constrain the minimum/maximum screen width and height: 448 | 449 | ```py 450 | from browserforge.fingerprints import Screen 451 | 452 | screen = Screen( 453 | min_width=100 454 | max_width=1280 455 | min_height=400 456 | max_height=720 457 | ) 458 | 459 | fingerprints = FingerprintGenerator(screen=screen) 460 | ``` 461 | 462 | Note: Not all bounds need to be defined. 463 | 464 | #### Browser specifications 465 | 466 | `FingerprintGenerator` and `FingerprintGenerator.generate` inherit the same parameters from `HeaderGenerator`. 467 | 468 | Because of this, user agents, browser specifications, device types, and operating system constrains can also be passed into `FingerprintGenerator.generate`. 469 | 470 | Here is a usage example: 471 | 472 | ```py 473 | fingerprint.generate(browser='chrome', os='windows') 474 | ``` 475 | 476 |
477 | 478 | ## Injecting Fingerprints 479 | 480 | > [!WARNING] 481 | > Fingerprint injection in BrowserForge is deprecated. Please check out [Camoufox] instead. 482 | 483 | BrowserForge is fully compatible with your existing Playwright and Pyppeteer code. You only have to change your context/page initialization. 484 | 485 | ### Playwright 486 | 487 | #### Async API: 488 | 489 | ```py 490 | # Import the AsyncNewContext injector 491 | from browserforge.injectors.playwright import AsyncNewContext 492 | 493 | async def main(): 494 | async with async_playwright() as playwright: 495 | browser = await playwright.chromium.launch() 496 | # Create a new async context with the injected fingerprint 497 | context = await AsyncNewContext(browser, fingerprint=fingerprint) 498 | page = await context.new_page() 499 | ... 500 | ``` 501 | 502 | Replace `await browser.new_context` with `await AsyncNewContext` in your existing Playwright code. 503 | 504 |
505 | Parameters for AsyncNewContext 506 | 507 | ``` 508 | Injects an async_api Playwright context with a Fingerprint. 509 | 510 | Parameters: 511 | browser (Browser): The browser to create the context in 512 | fingerprint (Optional[Fingerprint]): The fingerprint to inject. If None, one will be generated 513 | fingerprint_options (Optional[Dict]): Options for the Fingerprint generator if `fingerprint` is not passed 514 | **new_context_options: Other options for the new context 515 | ``` 516 | 517 |
518 | 519 | #### Sync API: 520 | 521 | ```py 522 | # Import the NewContext injector 523 | from browserforge.injectors.playwright import NewContext 524 | 525 | def main(): 526 | with sync_playwright() as playwright: 527 | browser = playwright.chromium.launch() 528 | # Create a new context with the injected fingerprint 529 | context = NewContext(browser, fingerprint=fingerprint) 530 | page = context.new_page() 531 | ... 532 | ``` 533 | 534 | Replace `browser.new_context` with `NewContext` in your existing Playwright code. 535 | 536 |
537 | Parameters for NewContext 538 | 539 | ``` 540 | Injects a sync_api Playwright context with a Fingerprint. 541 | 542 | Parameters: 543 | browser (Browser): The browser to create the context in 544 | fingerprint (Optional[Fingerprint]): The fingerprint to inject. If None, one will be generated 545 | fingerprint_options (Optional[Dict]): Options for the Fingerprint generator if `fingerprint` is not passed 546 | **new_context_options: Other options for the new context 547 | ``` 548 | 549 |
550 | 551 | #### Undetected-Playwright 552 | 553 | [Undetected-Playwright](https://github.com/kaliiiiiiiiii/undetected-playwright-python) is also supported in the `browserforge.injectors.undetected_playwright` package. The usage is the same as the Playwright injector. 554 | 555 | ### Pyppeteer 556 | 557 | ```py 558 | # Import the NewPage injector 559 | from browserforge.injectors.pyppeteer import NewPage 560 | from pyppeteer import launch 561 | 562 | async def test(): 563 | browser = await launch() 564 | # Create a new page with the injected fingerprint 565 | page = await NewPage(browser, fingerprint=fingerprint) 566 | ... 567 | ``` 568 | 569 | Replace `browser.newPage` with `NewPage` in your existing Pyppeteer code. 570 | 571 |
572 | Parameters for NewPage 573 | 574 | ``` 575 | Injects a Pyppeteer browser object with a Fingerprint. 576 | 577 | Parameters: 578 | browser (Browser): The browser to create the context in 579 | fingerprint (Optional[Fingerprint]): The fingerprint to inject. If None, one will be generated 580 | fingerprint_options (Optional[Dict]): Options for the Fingerprint generator if `fingerprint` is not passed 581 | ``` 582 | 583 |
584 | 585 |
586 | 587 | ## Uninstall 588 | 589 | ``` 590 | pip uninstall browserforge 591 | ``` 592 | 593 | --- 594 | -------------------------------------------------------------------------------- /browserforge/bayesian_network.py: -------------------------------------------------------------------------------- 1 | import random 2 | import zipfile 3 | from pathlib import Path 4 | from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, TypeVar, Union 5 | 6 | try: 7 | import orjson as json 8 | except ImportError: 9 | import json 10 | 11 | T = TypeVar('T') 12 | Map = Union[list, tuple] 13 | 14 | 15 | class BayesianNode: 16 | """ 17 | Implementation of a single node in a bayesian network allowing sampling from its conditional distribution 18 | """ 19 | 20 | def __init__(self, node_definition: Dict[str, Any]): 21 | self.node_definition = node_definition 22 | 23 | def get_probabilities_given_known_values( 24 | self, parent_values: Dict[str, Any] 25 | ) -> Dict[Any, float]: 26 | """ 27 | Extracts unconditional probabilities of node values given the values of the parent nodes 28 | """ 29 | probabilities = self.node_definition['conditionalProbabilities'] 30 | for parent_name in self.parent_names: 31 | parent_value = parent_values.get(parent_name) 32 | if parent_value in probabilities.get('deeper', {}): 33 | probabilities = probabilities['deeper'][parent_value] 34 | else: 35 | probabilities = probabilities.get('skip', {}) 36 | return probabilities 37 | 38 | def sample_random_value_from_possibilities( 39 | self, possible_values: List[str], probabilities: Dict[str, float] 40 | ) -> Any: 41 | """ 42 | Randomly samples from the given values using the given probabilities 43 | """ 44 | # Python natively supports weighted random sampling in random.choices, 45 | # but this method is much faster 46 | anchor = random.random() 47 | cumulative_probability = 0.0 48 | for possible_value in possible_values: 49 | cumulative_probability += probabilities[possible_value] 50 | if cumulative_probability > anchor: 51 | return possible_value 52 | # Default to first item 53 | return possible_values[0] 54 | 55 | def sample(self, parent_values: Dict[str, Any]) -> Any: 56 | """ 57 | Randomly samples from the conditional distribution of this node given values of parents 58 | """ 59 | probabilities = self.get_probabilities_given_known_values(parent_values) 60 | return self.sample_random_value_from_possibilities( 61 | list(probabilities.keys()), probabilities 62 | ) 63 | 64 | def sample_according_to_restrictions( 65 | self, 66 | parent_values: Dict[str, Any], 67 | value_possibilities: Iterable[str], 68 | banned_values: List[str], 69 | ) -> Optional[str]: 70 | """ 71 | Randomly samples from the conditional distribution of this node given restrictions on the possible values and the values of the parents. 72 | """ 73 | probabilities = self.get_probabilities_given_known_values(parent_values) 74 | valid_values = [ 75 | value 76 | for value in value_possibilities 77 | if value not in banned_values and value in probabilities 78 | ] 79 | if valid_values: 80 | return self.sample_random_value_from_possibilities(valid_values, probabilities) 81 | else: 82 | return None # Equivalent to `false` in TypeScript 83 | 84 | @property 85 | def name(self) -> str: 86 | return self.node_definition['name'] 87 | 88 | @property 89 | def parent_names(self) -> List[str]: 90 | return self.node_definition.get('parentNames', []) 91 | 92 | @property 93 | def possible_values(self) -> List[str]: 94 | return self.node_definition.get('possibleValues', []) 95 | 96 | 97 | class BayesianNetwork: 98 | """ 99 | Implementation of a bayesian network capable of randomly sampling from its distribution 100 | """ 101 | 102 | def __init__(self, path: Path) -> None: 103 | network_definition = extract_json(path) 104 | self.nodes_in_sampling_order = [ 105 | BayesianNode(node_def) for node_def in network_definition['nodes'] 106 | ] 107 | self.nodes_by_name = {node.name: node for node in self.nodes_in_sampling_order} 108 | 109 | def generate_sample(self, input_values: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: 110 | """ 111 | Randomly samples from the distribution represented by the bayesian network. 112 | """ 113 | if input_values is None: 114 | input_values = {} 115 | sample = input_values.copy() 116 | for node in self.nodes_in_sampling_order: 117 | if node.name not in sample: 118 | sample[node.name] = node.sample(sample) 119 | return sample 120 | 121 | def generate_consistent_sample_when_possible( 122 | self, value_possibilities: Dict[str, Iterable[str]] 123 | ) -> Optional[Dict[str, Any]]: 124 | """ 125 | Randomly samples values from the distribution represented by the bayesian network, 126 | making sure the sample is consistent with the provided restrictions on value possibilities. 127 | Returns None if no such sample can be generated. 128 | """ 129 | return self.recursively_generate_consistent_sample_when_possible({}, value_possibilities, 0) 130 | 131 | def recursively_generate_consistent_sample_when_possible( 132 | self, 133 | sample_so_far: Dict[str, Any], 134 | value_possibilities: Dict[str, Iterable[str]], 135 | depth: int, 136 | ) -> Optional[Dict[str, Any]]: 137 | """ 138 | Recursively generates a random sample consistent with the given restrictions on possible values. 139 | """ 140 | if depth == len(self.nodes_in_sampling_order): 141 | return sample_so_far 142 | node = self.nodes_in_sampling_order[depth] 143 | banned_values: List[str] = [] 144 | sample_value = None 145 | while True: 146 | sample_value = node.sample_according_to_restrictions( 147 | sample_so_far, 148 | value_possibilities.get(node.name, node.possible_values), 149 | banned_values, 150 | ) 151 | if sample_value is None: 152 | break 153 | sample_so_far[node.name] = sample_value 154 | next_sample = self.recursively_generate_consistent_sample_when_possible( 155 | sample_so_far, value_possibilities, depth + 1 156 | ) 157 | if next_sample is not None: 158 | return next_sample 159 | banned_values.append(sample_value) 160 | del sample_so_far[node.name] 161 | return None 162 | 163 | 164 | def array_intersection(a: Sequence[T], b: Sequence[T]) -> List[T]: 165 | """ 166 | Performs a set "intersection" on the given (flat) arrays 167 | """ 168 | set_b = set(b) 169 | return [x for x in a if x in set_b] 170 | 171 | 172 | def array_zip(a: List[Tuple[T, ...]], b: List[Tuple[T, ...]]) -> List[Tuple[T, ...]]: 173 | """ 174 | Combines two arrays into a single array using the set union 175 | Args: 176 | a: First array to be combined. 177 | b: Second array to be combined. 178 | Returns: 179 | Zipped (multi-dimensional) array. 180 | """ 181 | return [tuple(set(x).union(y)) for x, y in zip(a, b)] 182 | 183 | 184 | def undeeper(obj: Dict[str, Any]) -> Dict[str, Any]: 185 | """ 186 | Removes the "deeper/skip" structures from the conditional probability table 187 | """ 188 | if not isinstance(obj, dict): 189 | return obj 190 | result: Dict[str, Any] = {} 191 | for key, value in obj.items(): 192 | if key == 'skip': 193 | continue 194 | if key == 'deeper': 195 | result.update(undeeper(value)) 196 | else: 197 | result[key] = undeeper(value) 198 | return result 199 | 200 | 201 | def filter_by_last_level_keys(tree: Dict[str, Any], valid_keys: Map) -> List[Tuple[str, ...]]: 202 | r""" 203 | Performs DFS on the Tree and returns values of the nodes on the paths that end with the given keys 204 | (stored by levels - first level is the root) 205 | ``` 206 | 1 207 | / \ 208 | 2 3 209 | / \ / \ 210 | 4 5 6 7 211 | ``` 212 | filter_by_last_level_keys(tree, ['4', '7']) => [[1], [2,3]] 213 | """ 214 | out: List[Tuple[str, ...]] = [] 215 | 216 | def recurse(t: Dict[str, Any], vk: Union[Tuple[str, ...], List[str]], acc: List[str]) -> None: 217 | for key in t.keys(): 218 | if not isinstance(t[key], dict) or t[key] is None: 219 | if key in vk: 220 | nonlocal out 221 | out = ( 222 | [(x,) for x in acc] 223 | if len(out) == 0 224 | else array_zip(out, [(x,) for x in acc]) 225 | ) 226 | continue 227 | else: 228 | recurse(t[key], vk, acc + [key]) 229 | 230 | recurse(tree, valid_keys, []) 231 | return out 232 | 233 | 234 | def get_possible_values( 235 | network: 'BayesianNetwork', possible_values: Dict[str, Union[Tuple[str, ...], List[str]]] 236 | ) -> Dict[str, Sequence[str]]: 237 | """ 238 | Given a `generative-bayesian-network` instance and a set of user constraints, returns an extended 239 | set of constraints **induced** by the original constraints and network structure 240 | """ 241 | 242 | sets = [] 243 | # For every pre-specified node, compute the "closure" for values of the other nodes 244 | for key, value in possible_values.items(): 245 | if not isinstance(value, (list, tuple)): 246 | continue 247 | if len(value) == 0: 248 | raise ValueError( 249 | "The current constraints are too restrictive. No possible values can be found for the given constraints." 250 | ) 251 | node = network.nodes_by_name[key] 252 | tree = undeeper(node.node_definition['conditionalProbabilities']) 253 | zipped_values = filter_by_last_level_keys(tree, value) 254 | sets.append({**dict(zip(node.parent_names, zipped_values)), key: value}) 255 | 256 | # Compute the intersection of all the possible values for each node 257 | result: Dict[str, Sequence[str]] = {} 258 | for set_dict in sets: 259 | for key in set_dict.keys(): 260 | if key in result: 261 | intersected_values = array_intersection(set_dict[key], result[key]) 262 | if not intersected_values: 263 | raise ValueError( 264 | "The current constraints are too restrictive. No possible values can be found for the given constraints." 265 | ) 266 | result[key] = intersected_values 267 | else: 268 | result[key] = set_dict[key] 269 | 270 | return result 271 | 272 | 273 | def extract_json(path: Path) -> dict: 274 | """ 275 | Unzips a zip file if the path points to a zip file, otherwise directly loads a JSON file. 276 | 277 | Parameters: 278 | path: The path to the zip file or JSON file. 279 | 280 | Returns: 281 | A dictionary representing the JSON content. 282 | """ 283 | if path.suffix != '.zip': 284 | # Directly load the JSON file 285 | with open(path, 'rb') as file: 286 | return json.loads(file.read()) 287 | # Unzip the file and load the JSON content 288 | with zipfile.ZipFile(path, 'r') as zf: 289 | # Find the first JSON file in zip 290 | try: 291 | filename = next(file for file in zf.namelist() if file.endswith('.json')) 292 | except StopIteration: 293 | return {} # Broken 294 | with zf.open(filename) as f: 295 | # Assuming only one JSON file is needed 296 | return json.loads(f.read()) 297 | -------------------------------------------------------------------------------- /browserforge/download.py: -------------------------------------------------------------------------------- 1 | 2 | import click 3 | 4 | """ 5 | Downloads the required model definitions - deprecated 6 | """ 7 | 8 | 9 | """ 10 | Public download functions 11 | """ 12 | 13 | 14 | def Download(headers=False, fingerprints=False) -> None: 15 | """ 16 | Deprecated. Downloading model definition files is no longer needed. 17 | 18 | Files are included as explicit python package dependency. 19 | """ 20 | click.secho('Deprecated. Downloading model definition files is no longer needed.', fg='bright_yellow') 21 | 22 | 23 | def DownloadIfNotExists(**flags: bool) -> None: 24 | """ 25 | Deprecated. Downloading model definition files is no longer needed. 26 | 27 | Files are included as explicit python package dependency. 28 | """ 29 | pass 30 | 31 | 32 | def IsDownloaded(**flags: bool) -> bool: 33 | """ 34 | Deprecated. Downloading model definition files is no longer needed. 35 | 36 | Files are included as explicit python package dependency. 37 | """ 38 | return True 39 | 40 | 41 | def Remove() -> None: 42 | """ 43 | Deprecated. Downloading model definition files is no longer needed. 44 | 45 | Files are included as explicit python package dependency. 46 | """ 47 | pass 48 | -------------------------------------------------------------------------------- /browserforge/fingerprints/__init__.py: -------------------------------------------------------------------------------- 1 | from browserforge.download import DownloadIfNotExists 2 | 3 | DownloadIfNotExists(fingerprints=True, headers=True) 4 | 5 | from browserforge.headers import Browser 6 | 7 | from .generator import ( 8 | Fingerprint, 9 | FingerprintGenerator, 10 | NavigatorFingerprint, 11 | Screen, 12 | ScreenFingerprint, 13 | VideoCard, 14 | ) 15 | 16 | __all__ = [ 17 | "Browser", 18 | "Fingerprint", 19 | "FingerprintGenerator", 20 | "NavigatorFingerprint", 21 | "Screen", 22 | "ScreenFingerprint", 23 | "VideoCard", 24 | ] 25 | -------------------------------------------------------------------------------- /browserforge/fingerprints/generator.py: -------------------------------------------------------------------------------- 1 | from dataclasses import asdict, dataclass 2 | from pathlib import Path 3 | from typing import Dict, List, Optional 4 | 5 | from apify_fingerprint_datapoints import get_fingerprint_network 6 | 7 | from browserforge.bayesian_network import BayesianNetwork, get_possible_values 8 | from browserforge.headers import HeaderGenerator 9 | from browserforge.headers.utils import get_user_agent 10 | 11 | try: 12 | import orjson as json 13 | 14 | USE_ORJSON = True 15 | except ImportError: 16 | import json 17 | 18 | USE_ORJSON = False 19 | 20 | DATA_DIR: Path = Path(__file__).parent / 'data' 21 | 22 | 23 | @dataclass 24 | class ScreenFingerprint: 25 | availHeight: int 26 | availWidth: int 27 | availTop: int 28 | availLeft: int 29 | colorDepth: int 30 | height: int 31 | pixelDepth: int 32 | width: int 33 | devicePixelRatio: float 34 | pageXOffset: int 35 | pageYOffset: int 36 | innerHeight: int 37 | outerHeight: int 38 | outerWidth: int 39 | innerWidth: int 40 | screenX: int 41 | clientWidth: int 42 | clientHeight: int 43 | hasHDR: bool 44 | 45 | 46 | @dataclass 47 | class NavigatorFingerprint: 48 | userAgent: str 49 | userAgentData: Dict[str, str] 50 | doNotTrack: Optional[str] 51 | appCodeName: str 52 | appName: str 53 | appVersion: str 54 | oscpu: str 55 | webdriver: str 56 | language: str 57 | languages: List[str] 58 | platform: str 59 | deviceMemory: Optional[int] 60 | hardwareConcurrency: int 61 | product: str 62 | productSub: str 63 | vendor: str 64 | vendorSub: str 65 | maxTouchPoints: int 66 | extraProperties: Dict[str, str] 67 | 68 | 69 | @dataclass 70 | class VideoCard: 71 | renderer: str 72 | vendor: str 73 | 74 | 75 | @dataclass 76 | class Fingerprint: 77 | """Output data of the fingerprint generator""" 78 | 79 | screen: ScreenFingerprint 80 | navigator: NavigatorFingerprint 81 | headers: Dict[str, str] 82 | videoCodecs: Dict[str, str] 83 | audioCodecs: Dict[str, str] 84 | pluginsData: Dict[str, str] 85 | battery: Optional[Dict[str, str]] 86 | videoCard: Optional[VideoCard] 87 | multimediaDevices: List[str] 88 | fonts: List[str] 89 | mockWebRTC: Optional[bool] 90 | slim: Optional[bool] 91 | 92 | def dumps(self) -> str: 93 | """ 94 | Dumps the dataclass as a JSON string. 95 | """ 96 | if USE_ORJSON: 97 | return json.dumps(self).decode() 98 | # Built-in `json` does not take dataclass objects 99 | # Instead, convert to a dict first 100 | return json.dumps(asdict(self)) 101 | 102 | 103 | @dataclass 104 | class Screen: 105 | """Constrains the screen dimensions of the generated fingerprint""" 106 | 107 | min_width: Optional[int] = None 108 | max_width: Optional[int] = None 109 | min_height: Optional[int] = None 110 | max_height: Optional[int] = None 111 | 112 | def __post_init__(self): 113 | if ( 114 | None not in (self.min_width, self.max_width) 115 | and self.min_width > self.max_width 116 | or None not in (self.min_height, self.max_height) 117 | and self.min_height > self.max_height 118 | ): 119 | raise ValueError( 120 | "Invalid screen constraints: min values cannot be greater than max values" 121 | ) 122 | 123 | def is_set(self) -> bool: 124 | """ 125 | Returns true if any constraints were set 126 | """ 127 | return any(value is not None for value in self.__dict__.values()) 128 | 129 | 130 | class FingerprintGenerator: 131 | """Generates realistic browser fingerprints""" 132 | 133 | fingerprint_generator_network = BayesianNetwork(get_fingerprint_network()) 134 | 135 | def __init__( 136 | self, 137 | screen: Optional[Screen] = None, 138 | strict: bool = False, 139 | mock_webrtc: bool = False, 140 | slim: bool = False, 141 | **header_kwargs, 142 | ): 143 | """ 144 | Initializes the FingerprintGenerator with the given options. 145 | 146 | Parameters: 147 | screen (Screen, optional): Screen constraints for the generated fingerprint. 148 | strict (bool, optional): Whether to raise an exception if the constraints are too strict. Default is False. 149 | mock_webrtc (bool, optional): Whether to mock WebRTC when injecting the fingerprint. Default is False. 150 | slim (bool, optional): Disables performance-heavy evasions when injecting the fingerprint. Default is False. 151 | **header_kwargs: Header generation options for HeaderGenerator 152 | """ 153 | self.header_generator: HeaderGenerator = HeaderGenerator(**header_kwargs) 154 | 155 | # Set default options 156 | self.screen: Optional[Screen] = screen 157 | self.strict: bool = strict 158 | self.mock_webrtc: bool = mock_webrtc 159 | self.slim: bool = slim 160 | 161 | def generate( 162 | self, 163 | *, 164 | screen: Optional[Screen] = None, 165 | strict: Optional[bool] = None, 166 | mock_webrtc: Optional[bool] = None, 167 | slim: Optional[bool] = None, 168 | **header_kwargs, 169 | ) -> Fingerprint: 170 | """ 171 | Generates a fingerprint and a matching set of ordered headers using a combination of the default options 172 | specified in the constructor and their possible overrides provided here. 173 | 174 | Parameters: 175 | screen (Screen, optional): Screen constraints for the generated fingerprint. 176 | strict (bool, optional): Whether to raise an exception if the constraints are too strict. 177 | mock_webrtc (bool, optional): Whether to mock WebRTC when injecting the fingerprint. Default is False. 178 | slim (bool, optional): Disables performance-heavy evasions when injecting the fingerprint. Default is False. 179 | **header_kwargs: Additional header generation options for HeaderGenerator.generate 180 | """ 181 | filtered_values: Dict[str, str] = {} 182 | if header_kwargs is None: 183 | header_kwargs = {} 184 | 185 | # merge new options with old 186 | screen = _first(screen, self.screen) 187 | strict = _first(strict, self.strict) 188 | 189 | partial_csp = self.partial_csp( 190 | strict=strict, screen=screen, filtered_values=filtered_values 191 | ) 192 | 193 | # Generate headers consistent with the inputs to get input-compatible user-agent 194 | # and accept-language headers needed later 195 | if partial_csp: 196 | header_kwargs['user_agent'] = partial_csp['userAgent'] 197 | headers = self.header_generator.generate(**header_kwargs) 198 | # Extract generated User-Agent 199 | user_agent = get_user_agent(headers) 200 | if user_agent is None: 201 | raise ValueError("Failed to find User-Agent in generated response") 202 | 203 | # Generate fingerprint consistent with the generated user agent 204 | while True: 205 | fingerprint: Optional[Dict] = ( 206 | self.fingerprint_generator_network.generate_consistent_sample_when_possible( 207 | {**filtered_values, 'userAgent': (user_agent,)} 208 | ) 209 | ) 210 | if fingerprint is not None: 211 | break 212 | # Raise 213 | if strict: 214 | raise ValueError( 215 | 'Cannot generate headers. User-Agent may be invalid, or screen constraints are too restrictive.' 216 | ) 217 | # If no fingerprint was generated, relax the filtered values. 218 | # This seems to be an issue with some Mac and Linux systems 219 | filtered_values = {} 220 | 221 | # Delete any missing attributes and unpack any object/array-like attributes 222 | # that have been packed together to make the underlying network simpler 223 | for attribute in list(fingerprint.keys()): 224 | if fingerprint[attribute] == '*MISSING_VALUE*': 225 | fingerprint[attribute] = None 226 | if isinstance(fingerprint[attribute], str) and fingerprint[attribute].startswith( 227 | '*STRINGIFIED*' 228 | ): 229 | fingerprint[attribute] = json.loads(fingerprint[attribute][len('*STRINGIFIED*') :]) 230 | 231 | # Manually add the set of accepted languages required by the input 232 | accept_language_header_value = headers.get('Accept-Language', '') 233 | accepted_languages = [ 234 | locale.split(';', 1)[0] for locale in accept_language_header_value.split(',') 235 | ] 236 | fingerprint['languages'] = accepted_languages 237 | 238 | return self._transform_fingerprint( 239 | fingerprint, 240 | headers, 241 | _first(mock_webrtc, self.mock_webrtc), 242 | _first(slim, self.slim), 243 | ) 244 | 245 | def partial_csp( 246 | self, strict: Optional[bool], screen: Optional[Screen], filtered_values: Dict 247 | ) -> Optional[Dict]: 248 | """ 249 | Generates partial content security policy (CSP) based on the provided options and filtered values. 250 | 251 | Parameters: 252 | strict (Optional[bool): Whether to raise an exception if the constraints are too strict. 253 | screen (Optional[Screen]): Screen for generating the partial CSP. 254 | filtered_values (Dict): Filtered values used for generating the partial CSP. 255 | 256 | Returns: 257 | Dict: Partial CSP values. 258 | """ 259 | # if extensive constraints need to be used 260 | if not (screen and screen.is_set()): 261 | return None 262 | 263 | filtered_values['screen'] = [ 264 | screen_string 265 | for screen_string in self.fingerprint_generator_network.nodes_by_name[ 266 | 'screen' 267 | ].possible_values 268 | if self._is_screen_within_constraints(screen_string, screen) 269 | ] 270 | 271 | try: 272 | return get_possible_values(self.fingerprint_generator_network, filtered_values) 273 | except Exception as e: 274 | if strict: 275 | raise e 276 | del filtered_values['screen'] 277 | return None 278 | 279 | @staticmethod 280 | def _is_screen_within_constraints(screen_string: str, screen_options: Screen) -> bool: 281 | """ 282 | Checks if the given screen dimensions are within the specified constraints. 283 | 284 | Parameters: 285 | screen_string (str): Stringified screen dimensions. 286 | screen_options (Screen): Screen constraint options. 287 | 288 | Returns: 289 | bool: True if the screen dimensions are within the constraints, False otherwise. 290 | """ 291 | try: 292 | screen = json.loads(screen_string[len('*STRINGIFIED*') :]) 293 | return ( 294 | # Ensure that the screen width/height are greater than the minimum constraints 295 | # Default missing values to -1 to ensure they are excluded 296 | screen.get('width', -1) >= (screen_options.min_width or 0) 297 | and screen.get('height', -1) >= (screen_options.min_height or 0) 298 | # Ensure that the screen width/height are less than the maximum constraints 299 | and screen.get('width', 0) <= (screen_options.max_width or 1e5) 300 | and screen.get('height', 0) <= (screen_options.max_height or 1e5) 301 | ) 302 | except (ValueError, TypeError): 303 | return False 304 | 305 | @staticmethod 306 | def _transform_fingerprint( 307 | fingerprint: Dict, headers: Dict, mock_webrtc: bool, slim: bool 308 | ) -> Fingerprint: 309 | """ 310 | Transforms fingerprint into a final dataclass instance. 311 | 312 | Parameters: 313 | fingerprint (Dict): Fingerprint to be transformed. 314 | headers (Dict): Generated headers. 315 | mock_webrtc (bool): Whether to mock WebRTC when injecting the fingerprint. 316 | slim (bool): Disables performance-heavy evasions when injecting the fingerprint. 317 | 318 | Returns: 319 | Fingerprint: Transformed fingerprint as a Fingerprint dataclass instance. 320 | """ 321 | 322 | navigator_kwargs = { 323 | k: fingerprint[k] 324 | for k in ( 325 | 'userAgent', 326 | 'userAgentData', 327 | 'doNotTrack', 328 | 'appCodeName', 329 | 'appName', 330 | 'appVersion', 331 | 'oscpu', 332 | 'webdriver', 333 | 'platform', 334 | 'deviceMemory', 335 | 'product', 336 | 'productSub', 337 | 'vendor', 338 | 'vendorSub', 339 | 'extraProperties', 340 | 'hardwareConcurrency', 341 | 'languages', 342 | ) 343 | } 344 | 345 | # Always take the first element for 'language' 346 | navigator_kwargs['language'] = navigator_kwargs['languages'][0] 347 | navigator_kwargs['maxTouchPoints'] = fingerprint.get('maxTouchPoints', 0) 348 | 349 | return Fingerprint( 350 | screen=ScreenFingerprint(**fingerprint['screen']), 351 | navigator=NavigatorFingerprint(**navigator_kwargs), 352 | headers=headers, 353 | videoCodecs=fingerprint['videoCodecs'], 354 | audioCodecs=fingerprint['audioCodecs'], 355 | pluginsData=fingerprint['pluginsData'], 356 | battery=fingerprint['battery'], 357 | videoCard=( 358 | VideoCard(**fingerprint['videoCard']) if fingerprint.get('videoCard') else None 359 | ), 360 | multimediaDevices=fingerprint['multimediaDevices'], 361 | fonts=fingerprint['fonts'], 362 | mockWebRTC=mock_webrtc, 363 | slim=slim, 364 | ) 365 | 366 | 367 | def _first(*values): 368 | """ 369 | Simple function that returns the first non-None value passed 370 | """ 371 | return next((v for v in values if v is not None), None) 372 | -------------------------------------------------------------------------------- /browserforge/headers/__init__.py: -------------------------------------------------------------------------------- 1 | from browserforge.download import DownloadIfNotExists 2 | 3 | DownloadIfNotExists(headers=True) 4 | 5 | from .generator import Browser, HeaderGenerator 6 | 7 | __all__ = [ 8 | "Browser", 9 | "HeaderGenerator", 10 | ] 11 | -------------------------------------------------------------------------------- /browserforge/headers/generator.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple, Union 3 | 4 | from browserforge.bayesian_network import BayesianNetwork, get_possible_values 5 | from apify_fingerprint_datapoints import (get_header_network, get_headers_order, get_browser_helper_file, 6 | get_input_network) 7 | 8 | from .utils import get_browser, get_user_agent, pascalize_headers, tuplify 9 | 10 | try: 11 | import orjson as json 12 | except ImportError: 13 | import json 14 | 15 | try: 16 | from typing import TypeAlias # novm 17 | except ImportError: 18 | from typing_extensions import TypeAlias # <3.10 19 | 20 | 21 | """Constants""" 22 | SUPPORTED_BROWSERS = ('chrome', 'firefox', 'safari', 'edge') 23 | SUPPORTED_OPERATING_SYSTEMS = ('windows', 'macos', 'linux', 'android', 'ios') 24 | SUPPORTED_DEVICES = ('desktop', 'mobile') 25 | SUPPORTED_HTTP_VERSIONS = ('1', '2') 26 | MISSING_VALUE_DATASET_TOKEN = '*MISSING_VALUE*' 27 | HTTP1_SEC_FETCH_ATTRIBUTES = { 28 | 'Sec-Fetch-Mode': 'same-site', 29 | 'Sec-Fetch-Dest': 'navigate', 30 | 'Sec-Fetch-Site': '?1', 31 | 'Sec-Fetch-User': 'document', 32 | } 33 | HTTP2_SEC_FETCH_ATTRIBUTES = { 34 | 'sec-fetch-mode': 'same-site', 35 | 'sec-fetch-dest': 'navigate', 36 | 'sec-fetch-site': '?1', 37 | 'sec-fetch-user': 'document', 38 | } 39 | ListOrString: TypeAlias = Union[Tuple[str, ...], List[str], str] 40 | 41 | 42 | @dataclass 43 | class Browser: 44 | """Represents a browser specification with name, min/max version, and HTTP version""" 45 | 46 | name: str 47 | min_version: Optional[int] = None 48 | max_version: Optional[int] = None 49 | http_version: Union[str, int] = '2' 50 | 51 | def __post_init__(self): 52 | # Convert http_version to 53 | if isinstance(self.http_version, int): 54 | self.http_version = str(self.http_version) 55 | # Confirm min_version < max_version 56 | if ( 57 | isinstance(self.min_version, int) 58 | and isinstance(self.max_version, int) 59 | and self.min_version > self.max_version 60 | ): 61 | raise ValueError( 62 | f'Browser min version constraint ({self.min_version}) cannot exceed max version ({self.max_version})' 63 | ) 64 | 65 | 66 | @dataclass 67 | class HttpBrowserObject: 68 | """Represents an HTTP browser object with name, version, complete string, and HTTP version""" 69 | 70 | name: Optional[str] 71 | version: Tuple[int, ...] 72 | complete_string: str 73 | http_version: str 74 | 75 | @property 76 | def is_http2(self): 77 | return self.http_version == '2' 78 | 79 | 80 | class HeaderGenerator: 81 | """Generates HTTP headers based on a set of constraints""" 82 | 83 | relaxation_order: Tuple[str, ...] = ('locales', 'devices', 'operatingSystems', 'browsers') 84 | 85 | # Initialize networks 86 | input_generator_network = BayesianNetwork(get_input_network()) 87 | header_generator_network = BayesianNetwork(get_header_network()) 88 | 89 | def __init__( 90 | self, 91 | browser: Union[ListOrString, Iterable[Browser]] = SUPPORTED_BROWSERS, 92 | os: ListOrString = SUPPORTED_OPERATING_SYSTEMS, 93 | device: ListOrString = SUPPORTED_DEVICES, 94 | locale: ListOrString = 'en-US', 95 | http_version: Literal[1, 2] = 2, 96 | strict: bool = False, 97 | ): 98 | """ 99 | Initializes the HeaderGenerator with the given options. 100 | 101 | Parameters: 102 | browser (Union[ListOrString, Iterable[Browser]], optional): Browser(s) or Browser object(s). 103 | os (ListOrString, optional): Operating system(s) to generate headers for. 104 | device (ListOrString, optional): Device(s) to generate the headers for. 105 | locale (ListOrString, optional): List of at most 10 languages for the Accept-Language header. Default is 'en-US'. 106 | http_version (Literal[1, 2], optional): Http version to be used to generate headers. Defaults to 2. 107 | strict (bool, optional): Throws an error if it cannot generate headers based on the input. Defaults to False. 108 | """ 109 | http_ver: str = str(http_version) 110 | 111 | self.options = { 112 | 'browsers': self._prepare_browsers_config(tuplify(browser), http_ver), 113 | 'os': tuplify(os), 114 | 'devices': tuplify(device), 115 | 'locales': tuplify(locale), 116 | 'http_version': http_ver, 117 | 'strict': strict, 118 | } 119 | # Loader orders 120 | self.unique_browsers = self._load_unique_browsers() 121 | self.headers_order = self._load_headers_order() 122 | 123 | def generate( 124 | self, 125 | *, 126 | browser: Optional[Iterable[Union[str, Browser]]] = None, 127 | os: Optional[ListOrString] = None, 128 | device: Optional[ListOrString] = None, 129 | locale: Optional[ListOrString] = None, 130 | http_version: Optional[Literal[1, 2]] = None, 131 | user_agent: Optional[ListOrString] = None, 132 | strict: Optional[bool] = None, 133 | request_dependent_headers: Optional[Dict[str, str]] = None, 134 | ): 135 | """ 136 | Generates headers using the default options and their possible overrides. 137 | 138 | Parameters: 139 | browser (Optional[Iterable[Union[str, Browser]]], optional): Browser(s) to generate the headers for. 140 | os (Optional[ListOrString], optional): Operating system(s) to generate the headers for. 141 | device (Optional[ListOrString], optional): Device(s) to generate the headers for. 142 | locale (Optional[ListOrString], optional): Language(s) to include in the Accept-Language header. 143 | http_version (Optional[Literal[1, 2]], optional): HTTP version to be used to generate headers. 144 | user_agent (Optional[ListOrString], optional): User-Agent(s) to use. 145 | request_dependent_headers (Optional[Dict[str, str]], optional): Known values of request-dependent headers. 146 | strict (Optional[bool], optional): If true, throws an error if it cannot generate headers based on the input. 147 | """ 148 | 149 | options = { 150 | 'browsers': tuplify(browser), 151 | 'os': tuplify(os), 152 | 'devices': tuplify(device), 153 | 'locales': tuplify(locale), 154 | 'http_version': str(http_version) if http_version else None, 155 | 'strict': strict, 156 | 'user_agent': tuplify(user_agent), 157 | 'request_dependent_headers': request_dependent_headers, 158 | } 159 | generated: Dict[str, str] = self._get_headers( 160 | **{k: v for k, v in options.items() if v is not None} 161 | ) 162 | if (options['http_version'] or self.options['http_version']) == '2': 163 | return pascalize_headers(generated) 164 | return generated 165 | 166 | def _get_headers( 167 | self, 168 | request_dependent_headers: Optional[Dict[str, str]] = None, 169 | user_agent: Optional[Iterable[str]] = None, 170 | **options: Any, 171 | ) -> Dict[str, str]: 172 | """ 173 | Generates HTTP headers based on the given constraints. 174 | 175 | Parameters: 176 | request_dependent_headers (Dict[str, str], optional): Dictionary of request-dependent headers. 177 | user_agent (Iterable[str], optional): User-Agent value(s). 178 | **options (Any): Additional options for header generation. 179 | 180 | Returns: 181 | Dict[str, str]: Dictionary of generated HTTP headers. 182 | """ 183 | if request_dependent_headers is None: 184 | request_dependent_headers = {} 185 | 186 | # Process new options 187 | if 'browsers' in options or ( 188 | # if a unique http_version was passed 189 | 'http_version' in options 190 | and options['http_version'] != self.options['http_version'] 191 | ): 192 | self._update_http_version(options) 193 | 194 | header_options = {**self.options, **options} 195 | possible_attribute_values = self._get_possible_attribute_values(header_options) 196 | 197 | if user_agent: 198 | # evaluate iterable 199 | if not isinstance(user_agent, (tuple, list)): 200 | user_agent = tuple(user_agent) 201 | http1_values, http2_values = ( 202 | get_possible_values(self.header_generator_network, {'User-Agent': user_agent}), 203 | get_possible_values(self.header_generator_network, {'user-agent': user_agent}), 204 | ) 205 | else: 206 | http1_values, http2_values = {}, {} 207 | 208 | constraints = self._prepare_constraints( 209 | possible_attribute_values, http1_values, http2_values 210 | ) 211 | 212 | input_sample = self.input_generator_network.generate_consistent_sample_when_possible( 213 | constraints 214 | ) 215 | if not input_sample: 216 | if header_options['http_version'] == '1': 217 | headers2 = self._get_headers( 218 | request_dependent_headers, user_agent, **options, http_version='2' 219 | ) 220 | return self.order_headers(pascalize_headers(headers2)) 221 | 222 | relaxation_index = next( 223 | (i for i, key in enumerate(self.relaxation_order) if key in options), -1 224 | ) 225 | if header_options['strict'] or relaxation_index == -1: 226 | raise ValueError( 227 | 'No headers based on this input can be generated. Please relax or change some of the requirements you specified.' 228 | ) 229 | 230 | relaxed_options = {**options} 231 | del relaxed_options[self.relaxation_order[relaxation_index]] 232 | return self._get_headers(request_dependent_headers, user_agent, **relaxed_options) 233 | 234 | generated_sample = self.header_generator_network.generate_sample(input_sample) 235 | generated_http_and_browser = self._prepare_http_browser_object( 236 | generated_sample['*BROWSER_HTTP'] 237 | ) 238 | 239 | # Add Accept-Language header 240 | accept_language_field_name = ( 241 | 'accept-language' if generated_http_and_browser.is_http2 else 'Accept-Language' 242 | ) 243 | generated_sample[accept_language_field_name] = self._get_accept_language_header( 244 | header_options['locales'] 245 | ) 246 | 247 | # Add Sec headers 248 | if self._should_add_sec_fetch(generated_http_and_browser): 249 | if generated_http_and_browser.is_http2: 250 | generated_sample.update(HTTP2_SEC_FETCH_ATTRIBUTES) 251 | else: 252 | generated_sample.update(HTTP1_SEC_FETCH_ATTRIBUTES) 253 | 254 | # Ommit connection, close, and missing value headers 255 | generated_sample = { 256 | k: v 257 | for k, v in generated_sample.items() 258 | if not ( 259 | k.lower() == 'connection' 260 | and v == 'close' 261 | or k.startswith('*') 262 | or v == MISSING_VALUE_DATASET_TOKEN 263 | ) 264 | } 265 | 266 | # Reorder headers 267 | return self.order_headers({**generated_sample, **request_dependent_headers}) 268 | 269 | def _update_http_version( 270 | self, 271 | options: Dict[str, Any], 272 | ): 273 | """ 274 | Prepares options when a `browsers` or `http_version` kwarg is passed to .generate. 275 | 276 | Parameters: 277 | options (Dict[str, Any]): Other arguments. 278 | """ 279 | if 'http_version' in options: 280 | http_version = options['http_version'] 281 | else: 282 | http_version = self.options['http_version'] 283 | 284 | if 'browsers' in options: 285 | options['browsers'] = self._prepare_browsers_config(options['browsers'], http_version) 286 | else: 287 | # Create a copy of the class browsers with an updated http_version 288 | options['browsers'] = [ 289 | ( 290 | Browser( 291 | name=brwsr.name, 292 | min_version=brwsr.min_version, 293 | max_version=brwsr.max_version, 294 | http_version=http_version, 295 | ) 296 | if isinstance(brwsr, Browser) 297 | else Browser(name=brwsr, http_version=http_version) 298 | ) 299 | for brwsr in self.options['browsers'] 300 | ] 301 | 302 | def _prepare_browsers_config( 303 | self, browsers: Iterable[Union[str, Browser]], http_version: str 304 | ) -> List[Browser]: 305 | """ 306 | Prepares the browser configuration based on the given browsers and HTTP version. 307 | 308 | Parameters: 309 | browsers (Iterable[Union[str, Browser]]): Supported browsers or Browser objects. 310 | http_version (str): HTTP version ('1' or '2'). 311 | 312 | Returns: 313 | List[Browser]: List of Browser objects. 314 | """ 315 | return [ 316 | ( 317 | Browser(name=browser, http_version=http_version) 318 | if isinstance(browser, str) 319 | else browser 320 | ) 321 | for browser in browsers 322 | ] 323 | 324 | def _get_browser_http_options(self, browsers: Iterable[Browser]) -> List[str]: 325 | """ 326 | Retrieves the browser HTTP options based on the given browser specifications. 327 | 328 | Parameters: 329 | browsers (Iterable[Browser]): Iterable of Browser objects. 330 | 331 | Returns: 332 | List[str]: List of browser HTTP options. 333 | """ 334 | return [ 335 | browser_option.complete_string 336 | for browser in browsers 337 | for browser_option in self.unique_browsers 338 | if browser.name == browser_option.name 339 | and (not browser.min_version or browser.min_version <= browser_option.version[0]) 340 | and (not browser.max_version or browser.max_version >= browser_option.version[0]) 341 | and (not browser.http_version or browser.http_version == browser_option.http_version) 342 | ] 343 | 344 | def order_headers(self, headers: Dict[str, str]) -> Dict[str, str]: 345 | """ 346 | Orders the headers based on the browser-specific header order. 347 | 348 | Parameters: 349 | headers (Dict[str, str]): Dictionary of headers. 350 | 351 | Returns: 352 | Dict[str, str]: Ordered dictionary of headers. 353 | """ 354 | # get the browser name 355 | user_agent = get_user_agent(headers) 356 | if user_agent is None: 357 | raise ValueError("Failed to find User-Agent in generated response") 358 | browser_name = get_browser(user_agent) 359 | if browser_name is None: 360 | raise ValueError("Failed to find browser in User-Agent") 361 | 362 | header_order = self.headers_order.get(browser_name) 363 | # Order headers according to the specific browser's header order 364 | return ( 365 | {key: headers[key] for key in header_order if key in headers} 366 | if header_order 367 | else headers 368 | ) 369 | 370 | def _get_possible_attribute_values( 371 | self, header_options: Dict[str, Any] 372 | ) -> Dict[str, List[str]]: 373 | """ 374 | Retrieves the possible attribute values based on the given header options. 375 | 376 | Parameters: 377 | header_options (Dict[str, Any]): Dictionary of header options. 378 | 379 | Returns: 380 | Dict[str, List[str]]: Dictionary of possible attribute values. 381 | """ 382 | browsers = self._prepare_browsers_config( 383 | header_options.get('browsers', ()), 384 | header_options.get('http_version', '2'), 385 | ) 386 | browser_http_options = self._get_browser_http_options(browsers) 387 | 388 | possible_attribute_values = { 389 | '*BROWSER_HTTP': browser_http_options, 390 | '*OPERATING_SYSTEM': header_options.get('os', SUPPORTED_OPERATING_SYSTEMS), 391 | } 392 | if 'devices' in header_options: 393 | possible_attribute_values['*DEVICE'] = header_options['devices'] 394 | 395 | return possible_attribute_values 396 | 397 | def _should_add_sec_fetch(self, browser: HttpBrowserObject) -> bool: 398 | """ 399 | Determines whether Sec-Fetch headers should be added based on the user agent. 400 | 401 | Parameters: 402 | browser (HttpBrowserObject): Browser object. 403 | 404 | Returns: 405 | bool: True if Sec-Fetch headers should be added, False otherwise. 406 | """ 407 | if browser.name == 'chrome' and browser.version[0] >= 76: 408 | return True 409 | if browser.name == 'firefox' and browser.version[0] >= 90: 410 | return True 411 | if browser.name == 'edge' and browser.version[0] >= 79: 412 | return True 413 | return False 414 | 415 | def _get_accept_language_header(self, locales: ListOrString) -> str: 416 | """ 417 | Generates the Accept-Language header based on the given locales. 418 | 419 | Parameters: 420 | locales (ListOrString): Locale(s). 421 | 422 | Returns: 423 | str: Accept-Language header string. 424 | """ 425 | return ', '.join( 426 | f"{locale};q={1.0 - index * 0.1:.1f}" for index, locale in enumerate(locales) 427 | ) 428 | 429 | def _load_headers_order(self) -> Dict[str, List[str]]: 430 | """ 431 | Loads the headers order from the headers-order.json file. 432 | 433 | Returns: 434 | Dict[str, List[str]]: Dictionary of headers order for each browser. 435 | """ 436 | return json.loads(get_headers_order().read_bytes()) 437 | 438 | def _load_unique_browsers(self) -> List[HttpBrowserObject]: 439 | """ 440 | Loads the unique browsers from the browser-helper-file.json file. 441 | 442 | Returns: 443 | List[HttpBrowserObject]: List of HttpBrowserObject instances. 444 | """ 445 | unique_browser_strings = json.loads(get_browser_helper_file().read_bytes()) 446 | return [ 447 | self._prepare_http_browser_object(browser_str) 448 | for browser_str in unique_browser_strings 449 | if browser_str != MISSING_VALUE_DATASET_TOKEN 450 | ] 451 | 452 | def _prepare_constraints( 453 | self, 454 | possible_attribute_values: Dict[str, List[str]], 455 | http1_values: Dict[str, Any], 456 | http2_values: Dict[str, Any], 457 | ) -> Dict[str, Iterable[str]]: 458 | """ 459 | Prepares the constraints for generating consistent samples. 460 | 461 | Parameters: 462 | possible_attribute_values (Dict[str, List[str]]): Dictionary of possible attribute values. 463 | http1_values (Dict[str, Any]): Dictionary of HTTP/1 values. 464 | http2_values (Dict[str, Any]): Dictionary of HTTP/2 values. 465 | 466 | Returns: 467 | Dict[str, Iterable[str]]: Dictionary of constraints for each attribute. 468 | """ 469 | return { 470 | key: tuple( 471 | filter( 472 | lambda x: ( 473 | self.filter_browser_http(x, http1_values, http2_values) 474 | if key == '*BROWSER_HTTP' 475 | else self.filter_other_values(x, http1_values, http2_values, key) 476 | ), 477 | values, 478 | ) 479 | ) 480 | for key, values in possible_attribute_values.items() 481 | } 482 | 483 | @staticmethod 484 | def filter_browser_http( 485 | value: str, http1_values: Dict[str, Any], http2_values: Dict[str, Any] 486 | ) -> bool: 487 | """ 488 | Filters the browser HTTP value based on the HTTP/1 and HTTP/2 values. 489 | 490 | Parameters: 491 | value (str): Browser HTTP value. 492 | http1_values (Dict[str, Any]): Dictionary of HTTP/1 values. 493 | http2_values (Dict[str, Any]): Dictionary of HTTP/2 values. 494 | 495 | Returns: 496 | bool: True if the value should be included, False otherwise. 497 | """ 498 | browser_name, http_version = value.split('|') 499 | return ( 500 | (not http1_values or browser_name in http1_values.get('*BROWSER', ())) 501 | if http_version == '1' 502 | else (not http2_values or browser_name in http2_values.get('*BROWSER', ())) 503 | ) 504 | 505 | @staticmethod 506 | def filter_other_values( 507 | value: str, http1_values: Dict[str, Any], http2_values: Dict[str, Any], key: str 508 | ) -> bool: 509 | """ 510 | Filters the other attribute values based on the HTTP/1 and HTTP/2 values. 511 | 512 | Parameters: 513 | value (str): Attribute value. 514 | http1_values (Dict[str, Any]): Dictionary of HTTP/1 values. 515 | http2_values (Dict[str, Any]): Dictionary of HTTP/2 values. 516 | key (str): Attribute key. 517 | 518 | Returns: 519 | bool: True if the value should be included, False otherwise. 520 | """ 521 | if http1_values or http2_values: 522 | return value in http1_values.get(key, ()) or value in http2_values.get(key, ()) 523 | return True 524 | 525 | def _prepare_http_browser_object(self, http_browser_string: str) -> HttpBrowserObject: 526 | """ 527 | Extracts structured information about a browser and HTTP version from a string. 528 | 529 | Parameters: 530 | http_browser_string (str): HTTP browser string. 531 | 532 | Returns: 533 | HttpBrowserObject: HttpBrowserObject instance. 534 | """ 535 | browser_string, http_version = http_browser_string.split('|') 536 | if browser_string == MISSING_VALUE_DATASET_TOKEN: 537 | return HttpBrowserObject( 538 | name=None, version=(), complete_string=MISSING_VALUE_DATASET_TOKEN, http_version='' 539 | ) 540 | 541 | browser_name, version_string = browser_string.split('/') 542 | version_parts = version_string.split('.') 543 | version = tuple(int(part) for part in version_parts) 544 | return HttpBrowserObject( 545 | name=browser_name, 546 | version=version, 547 | complete_string=http_browser_string, 548 | http_version=http_version, 549 | ) 550 | -------------------------------------------------------------------------------- /browserforge/headers/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Iterable, Optional 2 | 3 | 4 | def get_user_agent(headers: Dict[str, str]) -> Optional[str]: 5 | """ 6 | Retrieves the User-Agent from the headers dictionary. 7 | """ 8 | return headers.get('User-Agent') or headers.get('user-agent') 9 | 10 | 11 | def get_browser(user_agent: str) -> Optional[str]: 12 | """ 13 | Determines the browser name from the User-Agent string. 14 | """ 15 | if any(alias in user_agent for alias in ('Firefox', 'FxiOS')): 16 | return 'firefox' 17 | elif any(alias in user_agent for alias in ('Chrome', 'CriOS')): 18 | return 'chrome' 19 | elif 'Safari' in user_agent: 20 | return 'safari' 21 | elif any(alias in user_agent for alias in ('Edge', 'EdgA', 'Edg', 'EdgiOS')): 22 | return 'edge' 23 | return None 24 | 25 | 26 | PASCALIZE_UPPER = {'dnt', 'rtt', 'ect'} 27 | 28 | 29 | def pascalize(name: str) -> str: 30 | # ignore 31 | if name.startswith(':') or name.startswith('sec-ch-ua'): 32 | return name 33 | # uppercase 34 | if name in PASCALIZE_UPPER: 35 | return name.upper() 36 | return name.title() 37 | 38 | 39 | def pascalize_headers(headers: Dict[str, str]) -> Dict[str, str]: 40 | return {pascalize(key): value for key, value in headers.items()} 41 | 42 | 43 | def tuplify(obj: Any): 44 | if (isinstance(obj, Iterable) and not isinstance(obj, str)) or obj is None: 45 | return obj 46 | return (obj,) 47 | -------------------------------------------------------------------------------- /browserforge/injectors/__init__.py: -------------------------------------------------------------------------------- 1 | from . import utils 2 | -------------------------------------------------------------------------------- /browserforge/injectors/data/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | utils.js.xz contains a compressed version of the utils.js file 3 | from Apify's fingerprint-injector: 4 | 5 | https://github.com/apify/fingerprint-suite/blob/master/packages/fingerprint-injector/src/utils.js 6 | 7 | Its purpose is to inject a Fingerprint object into a browser page. 8 | 9 | Copyright 2018 Apify Technologies s.r.o. 10 | """ 11 | -------------------------------------------------------------------------------- /browserforge/injectors/data/utils.js.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijro/browserforge/99dd114332c17e895469107847e7193e2832504a/browserforge/injectors/data/utils.js.xz -------------------------------------------------------------------------------- /browserforge/injectors/playwright/__init__.py: -------------------------------------------------------------------------------- 1 | from browserforge.injectors.utils import CheckIfInstalled 2 | 3 | CheckIfInstalled('playwright') 4 | 5 | from .injector import AsyncNewContext, NewContext 6 | -------------------------------------------------------------------------------- /browserforge/injectors/playwright/injector.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional 2 | 3 | from browserforge.fingerprints import Fingerprint 4 | from browserforge.injectors.utils import InjectFunction, _fingerprint, only_injectable_headers 5 | 6 | from playwright.async_api import Browser as AsyncBrowser 7 | from playwright.async_api import BrowserContext as AsyncBrowserContext 8 | from playwright.sync_api import Browser, BrowserContext 9 | 10 | 11 | async def AsyncNewContext( 12 | browser: AsyncBrowser, 13 | fingerprint: Optional[Fingerprint] = None, 14 | fingerprint_options: Optional[Dict] = None, 15 | **context_options, 16 | ) -> AsyncBrowserContext: 17 | """ 18 | Injects an async_api Playwright context with a Fingerprint. 19 | 20 | Parameters: 21 | browser (Browser): The browser to create the context in 22 | fingerprint (Optional[Fingerprint]): The fingerprint to inject. If None, one will be generated 23 | fingerprint_options (Optional[Dict]): Options for the Fingerprint generator if `fingerprint` is not passed 24 | **context_options: Other options for the new context 25 | """ 26 | fingerprint = _fingerprint(fingerprint, fingerprint_options) 27 | function = InjectFunction(fingerprint) 28 | # Build new context 29 | context = await browser.new_context(**_context_options(fingerprint, context_options)) 30 | # Set headers 31 | await context.set_extra_http_headers( 32 | only_injectable_headers(fingerprint.headers, browser.browser_type.name) 33 | ) 34 | 35 | # Since there are no async lambdas, define a new async function for emulating dark scheme 36 | async def on_page(page): 37 | await page.emulate_media(color_scheme='dark') 38 | 39 | # Dark mode 40 | context.on("page", on_page) 41 | 42 | # Inject function 43 | await context.add_init_script(function) 44 | 45 | return context 46 | 47 | 48 | def NewContext( 49 | browser: Browser, 50 | fingerprint: Optional[Fingerprint] = None, 51 | fingerprint_options: Optional[Dict] = None, 52 | **context_options, 53 | ) -> BrowserContext: 54 | """ 55 | Injects a sync_api Playwright context with a Fingerprint. 56 | 57 | Parameters: 58 | browser (Browser): The browser to create the context in 59 | fingerprint (Optional[Fingerprint]): The fingerprint to inject. If None, one will be generated 60 | fingerprint_options (Optional[Dict]): Options for the Fingerprint generator if `fingerprint` is not passed 61 | **context_options: Other options for the new context 62 | """ 63 | fingerprint = _fingerprint(fingerprint, fingerprint_options) 64 | function = InjectFunction(fingerprint) 65 | # Build new context 66 | context = browser.new_context(**_context_options(fingerprint, context_options)) 67 | # Set headers 68 | context.set_extra_http_headers( 69 | only_injectable_headers(fingerprint.headers, browser.browser_type.name) 70 | ) 71 | # Dark mode 72 | context.on("page", lambda page: page.emulate_media(color_scheme='dark')) 73 | 74 | # Inject function 75 | context.add_init_script(function) 76 | 77 | return context 78 | 79 | 80 | def _context_options( 81 | fingerprint: Fingerprint, 82 | options: Dict, 83 | ): 84 | """ 85 | Builds options for new context 86 | """ 87 | return { 88 | 'user_agent': fingerprint.navigator.userAgent, 89 | 'color_scheme': 'dark', 90 | 'viewport': { 91 | 'width': fingerprint.screen.width, 92 | 'height': fingerprint.screen.height, 93 | **options.pop('viewport', {}), 94 | }, 95 | 'extra_http_headers': { 96 | 'accept-language': fingerprint.headers['Accept-Language'], 97 | **options.pop('extra_http_headers', {}), 98 | }, 99 | 'device_scale_factor': fingerprint.screen.devicePixelRatio, 100 | **options, 101 | } 102 | -------------------------------------------------------------------------------- /browserforge/injectors/pyppeteer/__init__.py: -------------------------------------------------------------------------------- 1 | from browserforge.injectors.utils import CheckIfInstalled 2 | 3 | CheckIfInstalled('pyppeteer') 4 | 5 | from .injector import NewPage 6 | -------------------------------------------------------------------------------- /browserforge/injectors/pyppeteer/injector.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Dict, Optional 3 | 4 | from pyppeteer.browser import Browser 5 | from pyppeteer.page import Page 6 | 7 | from browserforge.fingerprints import Fingerprint 8 | from browserforge.injectors.utils import InjectFunction, _fingerprint, only_injectable_headers 9 | 10 | 11 | async def NewPage( 12 | browser: Browser, 13 | fingerprint: Optional[Fingerprint] = None, 14 | fingerprint_options: Optional[Dict] = None, 15 | ) -> Page: 16 | """ 17 | Injects a Pyppeteer browser object with a Fingerprint. 18 | 19 | Parameters: 20 | browser (Browser): The browser to create the context in 21 | fingerprint (Optional[Fingerprint]): The fingerprint to inject. If None, one will be generated 22 | fingerprint_options (Optional[Dict]): Options for the Fingerprint generator if `fingerprint` is not passed 23 | """ 24 | fingerprint = _fingerprint(fingerprint, fingerprint_options) 25 | function = InjectFunction(fingerprint) 26 | # create a new page 27 | page = await browser.newPage() 28 | 29 | await page.setUserAgent(fingerprint.navigator.userAgent) 30 | 31 | # Pyppeteer does not support firefox, so we can ignore checks 32 | cdp_sess = await page.target.createCDPSession() 33 | await cdp_sess.send( 34 | 'Page.setDeviceMetricsOverride', 35 | { 36 | 'screenHeight': fingerprint.screen.height, 37 | 'screenWidth': fingerprint.screen.width, 38 | 'width': fingerprint.screen.width, 39 | 'height': fingerprint.screen.height, 40 | 'mobile': any( 41 | name in fingerprint.navigator.userAgent for name in ('phone', 'android', 'mobile') 42 | ), 43 | 'screenOrientation': ( 44 | {'angle': 0, 'type': 'portraitPrimary'} 45 | if fingerprint.screen.height > fingerprint.screen.width 46 | else {'angle': 90, 'type': 'landscapePrimary'} 47 | ), 48 | 'deviceScaleFactor': fingerprint.screen.devicePixelRatio, 49 | }, 50 | ) 51 | await page.setExtraHTTPHeaders(only_injectable_headers(fingerprint.headers, 'chrome')) 52 | 53 | # Only set to dark mode if the Chrome version >= 76 54 | version = re.search('.*?/(\d+)[\d\.]+?', await browser.version()) 55 | if version and int(version[1]) >= 76: 56 | await page._client.send( 57 | 'Emulation.setEmulatedMedia', 58 | {'features': [{'name': 'prefers-color-scheme', 'value': 'dark'}]}, 59 | ) 60 | 61 | # Inject function 62 | await page.evaluateOnNewDocument(function) 63 | return page 64 | -------------------------------------------------------------------------------- /browserforge/injectors/undetected_playwright/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The undetected_playwright injector is a 1:1 copy of the playwright injector, 3 | using the "undetected_playwright" import name for typing purposes. 4 | """ 5 | 6 | from browserforge.injectors.utils import CheckIfInstalled 7 | 8 | CheckIfInstalled('undetected_playwright') 9 | 10 | from .injector import AsyncNewContext, NewContext 11 | -------------------------------------------------------------------------------- /browserforge/injectors/undetected_playwright/injector.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional 2 | 3 | from browserforge.fingerprints import Fingerprint 4 | from browserforge.injectors.utils import InjectFunction, _fingerprint, only_injectable_headers 5 | 6 | from undetected_playwright.async_api import Browser as AsyncBrowser 7 | from undetected_playwright.async_api import BrowserContext as AsyncBrowserContext 8 | from undetected_playwright.sync_api import Browser, BrowserContext 9 | 10 | 11 | async def AsyncNewContext( 12 | browser: AsyncBrowser, 13 | fingerprint: Optional[Fingerprint] = None, 14 | fingerprint_options: Optional[Dict] = None, 15 | **context_options, 16 | ) -> AsyncBrowserContext: 17 | """ 18 | Injects an async_api Undetected-Playwright context with a Fingerprint. 19 | 20 | Parameters: 21 | browser (Browser): The browser to create the context in 22 | fingerprint (Optional[Fingerprint]): The fingerprint to inject. If None, one will be generated 23 | fingerprint_options (Optional[Dict]): Options for the Fingerprint generator if `fingerprint` is not passed 24 | **context_options: Other options for the new context 25 | """ 26 | fingerprint = _fingerprint(fingerprint, fingerprint_options) 27 | function = InjectFunction(fingerprint) 28 | # Build new context 29 | context = await browser.new_context(**_context_options(fingerprint, context_options)) 30 | # Set headers 31 | await context.set_extra_http_headers( 32 | only_injectable_headers(fingerprint.headers, browser.browser_type.name) 33 | ) 34 | 35 | # Since there are no async lambdas, define a new async function for emulating dark scheme 36 | async def on_page(page): 37 | await page.emulate_media(color_scheme='dark') 38 | 39 | # Dark mode 40 | context.on("page", on_page) 41 | 42 | # Inject function 43 | await context.add_init_script(function) 44 | 45 | return context 46 | 47 | 48 | def NewContext( 49 | browser: Browser, 50 | fingerprint: Optional[Fingerprint] = None, 51 | fingerprint_options: Optional[Dict] = None, 52 | **context_options, 53 | ) -> BrowserContext: 54 | """ 55 | Injects a sync_api Undetected-Playwright context with a Fingerprint. 56 | 57 | Parameters: 58 | browser (Browser): The browser to create the context in 59 | fingerprint (Optional[Fingerprint]): The fingerprint to inject. If None, one will be generated 60 | fingerprint_options (Optional[Dict]): Options for the Fingerprint generator if `fingerprint` is not passed 61 | **context_options: Other options for the new context 62 | """ 63 | fingerprint = _fingerprint(fingerprint, fingerprint_options) 64 | function = InjectFunction(fingerprint) 65 | # Build new context 66 | context = browser.new_context(**_context_options(fingerprint, context_options)) 67 | # Set headers 68 | context.set_extra_http_headers( 69 | only_injectable_headers(fingerprint.headers, browser.browser_type.name) 70 | ) 71 | # Dark mode 72 | context.on("page", lambda page: page.emulate_media(color_scheme='dark')) 73 | 74 | # Inject function 75 | context.add_init_script(function) 76 | 77 | return context 78 | 79 | 80 | def _context_options( 81 | fingerprint: Fingerprint, 82 | options: Dict, 83 | ): 84 | """ 85 | Builds options for new context 86 | """ 87 | return { 88 | 'user_agent': fingerprint.navigator.userAgent, 89 | 'color_scheme': 'dark', 90 | 'viewport': { 91 | 'width': fingerprint.screen.width, 92 | 'height': fingerprint.screen.height, 93 | **options.pop('viewport', {}), 94 | }, 95 | 'extra_http_headers': { 96 | 'accept-language': fingerprint.headers['Accept-Language'], 97 | **options.pop('extra_http_headers', {}), 98 | }, 99 | **options, 100 | } 101 | -------------------------------------------------------------------------------- /browserforge/injectors/utils.py: -------------------------------------------------------------------------------- 1 | import lzma 2 | from pathlib import Path 3 | from random import randrange 4 | from typing import Dict, Optional, Set 5 | 6 | from browserforge.fingerprints import Fingerprint, FingerprintGenerator 7 | 8 | UTILS_JS: Path = Path(__file__).parent / 'data/utils.js.xz' 9 | 10 | request_headers: Set[str] = { 11 | 'accept-encoding', 12 | 'accept', 13 | 'cache-control', 14 | 'pragma', 15 | 'sec-fetch-dest', 16 | 'sec-fetch-mode', 17 | 'sec-fetch-site', 18 | 'sec-fetch-user', 19 | 'upgrade-insecure-requests', 20 | } 21 | 22 | 23 | def only_injectable_headers(headers: Dict[str, str], browser_name: str) -> Dict[str, str]: 24 | """ 25 | Some HTTP headers depend on the request (for example Accept (with values application/json, image/png) etc.). 26 | This function filters out those headers and leaves only the browser-wide ones. 27 | """ 28 | filtered_headers = {k: v for k, v in headers.items() if k.lower() not in request_headers} 29 | 30 | # Chromium-based controlled browsers do not support `te` header. 31 | # Remove the `te` header if the browser is not Firefox 32 | if browser_name and 'firefox' not in browser_name.lower(): 33 | if 'te' in filtered_headers: 34 | del filtered_headers['te'] 35 | if 'Te' in filtered_headers: 36 | del filtered_headers['Te'] 37 | 38 | return filtered_headers 39 | 40 | 41 | def InjectFunction(fingerprint: Fingerprint) -> str: 42 | return f""" 43 | (()=>{{ 44 | {utils_js()} 45 | 46 | const fp = {fingerprint.dumps()}; 47 | const {{ 48 | battery, 49 | navigator: {{ 50 | userAgentData, 51 | webdriver, 52 | ...navigatorProps 53 | }}, 54 | screen: allScreenProps, 55 | videoCard, 56 | audioCodecs, 57 | videoCodecs, 58 | mockWebRTC, 59 | }} = fp; 60 | 61 | slim = fp.slim; 62 | 63 | const historyLength = {randrange(1, 6)}; 64 | 65 | const {{ 66 | outerHeight, 67 | outerWidth, 68 | devicePixelRatio, 69 | innerWidth, 70 | innerHeight, 71 | screenX, 72 | pageXOffset, 73 | pageYOffset, 74 | clientWidth, 75 | clientHeight, 76 | hasHDR, 77 | ...newScreen 78 | }} = allScreenProps; 79 | 80 | const windowScreenProps = {{ 81 | innerHeight, 82 | outerHeight, 83 | outerWidth, 84 | innerWidth, 85 | screenX, 86 | pageXOffset, 87 | pageYOffset, 88 | devicePixelRatio, 89 | }}; 90 | 91 | const documentScreenProps = {{ 92 | clientHeight, 93 | clientWidth, 94 | }}; 95 | 96 | runHeadlessFixes(); 97 | if (mockWebRTC) blockWebRTC(); 98 | if (slim) {{ 99 | window['slim'] = true; 100 | }} 101 | overrideIntlAPI(navigatorProps.language); 102 | overrideStatic(); 103 | if (userAgentData) {{ 104 | overrideUserAgentData(userAgentData); 105 | }} 106 | if (window.navigator.webdriver) {{ 107 | navigatorProps.webdriver = false; 108 | }} 109 | overrideInstancePrototype(window.navigator, navigatorProps); 110 | overrideInstancePrototype(window.screen, newScreen); 111 | overrideWindowDimensionsProps(windowScreenProps); 112 | overrideDocumentDimensionsProps(documentScreenProps); 113 | overrideInstancePrototype(window.history, {{ length: historyLength }}); 114 | overrideWebGl(videoCard); 115 | overrideCodecs(audioCodecs, videoCodecs); 116 | overrideBattery(battery); 117 | }})() 118 | """ 119 | 120 | 121 | def utils_js() -> str: 122 | """ 123 | Opens and uncompresses the utils.js file and returns it as a string 124 | """ 125 | with lzma.open(UTILS_JS, 'rt') as f: 126 | return f.read() 127 | 128 | 129 | def _fingerprint( 130 | fingerprint: Optional[Fingerprint] = None, fingerprint_options: Optional[Dict] = None 131 | ) -> Fingerprint: 132 | """ 133 | Generates a fingerprint if one doesnt exist 134 | """ 135 | if fingerprint: 136 | return fingerprint 137 | generator = FingerprintGenerator() 138 | return generator.generate(**(fingerprint_options or {})) 139 | 140 | 141 | def CheckIfInstalled(module_name: str): 142 | """ 143 | Checks if a module is installed 144 | """ 145 | import importlib.util 146 | 147 | return importlib.util.find_spec(module_name) is not None 148 | -------------------------------------------------------------------------------- /browserforge/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijro/browserforge/99dd114332c17e895469107847e7193e2832504a/browserforge/py.typed -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["poetry-core>=1.0.0"] 3 | build-backend = "poetry.core.masonry.api" 4 | 5 | [tool.poetry] 6 | name = "browserforge" 7 | version = "1.2.4" 8 | description = "Intelligent browser header & fingerprint generator" 9 | authors = ["daijro "] 10 | license = "Apache-2.0" 11 | readme = "README.md" 12 | repository = "https://github.com/daijro/browserforge" 13 | keywords = [ 14 | "client", 15 | "headers", 16 | "fingerprint", 17 | "generator", 18 | "browser", 19 | "http", 20 | "scraping", 21 | "requests", 22 | "playwright", 23 | ] 24 | classifiers = [ 25 | "Topic :: Internet :: WWW/HTTP", 26 | "Topic :: Internet :: WWW/HTTP :: Browsers", 27 | "Topic :: Software Development :: Libraries :: Python Modules", 28 | ] 29 | 30 | [tool.poetry.dependencies] 31 | python = "^3.8" 32 | click = "*" 33 | orjson = { version = "*", optional = true } 34 | typing_extensions = {version = "*", python = "<3.10"} 35 | apify_fingerprint_datapoints = "*" 36 | 37 | [tool.poetry.extras] 38 | all = ["orjson"] 39 | --------------------------------------------------------------------------------