├── .gitignore
├── LICENSE
├── README.md
├── browserforge
├── bayesian_network.py
├── download.py
├── fingerprints
│ ├── __init__.py
│ └── generator.py
├── headers
│ ├── __init__.py
│ ├── generator.py
│ └── utils.py
├── injectors
│ ├── __init__.py
│ ├── data
│ │ ├── __init__.py
│ │ └── utils.js.xz
│ ├── playwright
│ │ ├── __init__.py
│ │ └── injector.py
│ ├── pyppeteer
│ │ ├── __init__.py
│ │ └── injector.py
│ ├── undetected_playwright
│ │ ├── __init__.py
│ │ └── injector.py
│ └── utils.py
└── py.typed
└── pyproject.toml
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Binaries
10 | *.dll
11 | *.dylib
12 |
13 | # Model files
14 | *.json
15 |
16 | # Distribution / packaging
17 | .Python
18 | build/
19 | develop-eggs/
20 | dist/
21 | downloads/
22 | eggs/
23 | .eggs/
24 | lib/
25 | lib64/
26 | parts/
27 | sdist/
28 | var/
29 | wheels/
30 | share/python-wheels/
31 | *.egg-info/
32 | .installed.cfg
33 | *.egg
34 | MANIFEST
35 |
36 | # PyInstaller
37 | # Usually these files are written by a python script from a template
38 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
39 | *.manifest
40 | *.spec
41 |
42 | # Installer logs
43 | pip-log.txt
44 | pip-delete-this-directory.txt
45 |
46 | # Unit test / coverage reports
47 | htmlcov/
48 | .tox/
49 | .nox/
50 | .coverage
51 | .coverage.*
52 | .cache
53 | nosetests.xml
54 | coverage.xml
55 | *.cover
56 | *.py,cover
57 | .hypothesis/
58 | .pytest_cache/
59 | cover/
60 |
61 | # Translations
62 | *.mo
63 | *.pot
64 |
65 | # Django stuff:
66 | *.log
67 | local_settings.py
68 | db.sqlite3
69 | db.sqlite3-journal
70 |
71 | # Flask stuff:
72 | instance/
73 | .webassets-cache
74 |
75 | # Scrapy stuff:
76 | .scrapy
77 |
78 | # Sphinx documentation
79 | docs/_build/
80 |
81 | # PyBuilder
82 | .pybuilder/
83 | target/
84 |
85 | # Jupyter Notebook
86 | .ipynb_checkpoints
87 |
88 | # IPython
89 | profile_default/
90 | ipython_config.py
91 |
92 | # pyenv
93 | # For a library or package, you might want to ignore these files since the code is
94 | # intended to run in multiple environments; otherwise, check them in:
95 | # .python-version
96 |
97 | # pipenv
98 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
99 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
100 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
101 | # install all needed dependencies.
102 | #Pipfile.lock
103 |
104 | # poetry
105 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
106 | # This is especially recommended for binary packages to ensure reproducibility, and is more
107 | # commonly ignored for libraries.
108 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
109 | #poetry.lock
110 |
111 | # pdm
112 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113 | #pdm.lock
114 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
115 | # in version control.
116 | # https://pdm.fming.dev/#use-with-ide
117 | .pdm.toml
118 |
119 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
120 | __pypackages__/
121 |
122 | # Celery stuff
123 | celerybeat-schedule
124 | celerybeat.pid
125 |
126 | # SageMath parsed files
127 | *.sage.py
128 |
129 | # Environments
130 | .env
131 | .venv
132 | env/
133 | venv/
134 | ENV/
135 | env.bak/
136 | venv.bak/
137 |
138 | # Spyder project settings
139 | .spyderproject
140 | .spyproject
141 |
142 | # Rope project settings
143 | .ropeproject
144 |
145 | # mkdocs documentation
146 | /site
147 |
148 | # mypy
149 | .mypy_cache/
150 | .dmypy.json
151 | dmypy.json
152 |
153 | # Pyre type checker
154 | .pyre/
155 |
156 | # pytype static type analyzer
157 | .pytype/
158 |
159 | # Cython debug symbols
160 | cython_debug/
161 |
162 | # PyCharm
163 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
164 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
165 | # and can be added to the global gitignore or merged into this file. For a more nuclear
166 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
167 | #.idea/
168 |
169 | # VsCode
170 | .vscode
171 | .trunk
172 |
173 | # daownloaded files
174 | data/
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | BrowserForge
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 | 🎭 Intelligent browser header & fingerprint generator
31 |
32 |
33 | ---
34 |
35 | ## What is it?
36 |
37 | BrowserForge is a browser header and fingerprint generator that mimics the frequency of different browsers, operating systems, and devices found in the wild.
38 |
39 | It is a reimplementation of [Apify's fingerprint-suite](https://github.com/apify/fingerprint-suite) in Python.
40 |
41 | ## Features
42 |
43 | - Uses a Bayesian generative network to mimic actual web traffic
44 | - Extremely fast runtime (0.1-0.2 miliseconds)
45 | - Easy and simple for humans to use
46 | - Extensive customization options for browsers, operating systems, devices, locales, and HTTP version
47 | - Written with type safety
48 |
49 | ## Installation
50 |
51 | ```
52 | pip install browserforge[all]
53 | ```
54 | ## Usage
55 |
56 | ## Generating Headers
57 |
58 | ### Simple usage
59 |
60 | ```py
61 | >>> from browserforge.headers import HeaderGenerator
62 | >>> headers = HeaderGenerator()
63 | >>> headers.generate()
64 | {'sec-ch-ua': '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"', 'Sec-Ch-Ua-Mobile': '?0', 'Sec-Ch-Ua-Platform': '"Windows"', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'Sec-Fetch-Site': '?1', 'Sec-Fetch-Mode': 'same-site', 'Sec-Fetch-User': 'document', 'Sec-Fetch-Dest': 'navigate', 'Accept-Encoding': 'gzip, deflate, br, zstd', 'Accept-Language': 'en-US;q=1.0'}
65 | ```
66 |
67 | ### Using with requests
68 |
69 | Headers can be added to a session in [requests](https://github.com/psf/requests) (or similar libraries) by assigning them to the `headers` attribute:
70 |
71 | ```py
72 | import requests
73 | session = requests.Session()
74 | # Set the session headers
75 | session.headers = headers.generate()
76 | ```
77 |
78 |
79 | Parameters for HeaderGenerator
80 |
81 | ```
82 | Parameters:
83 | browser (Union[ListOrString, Iterable[Browser]], optional): Browser(s) or Browser object(s).
84 | os (ListOrString, optional): Operating system(s) to generate headers for.
85 | device (ListOrString, optional): Device(s) to generate the headers for.
86 | locale (ListOrString, optional): List of at most 10 languages for the Accept-Language header. Default is 'en-US'.
87 | http_version (Literal[1, 2], optional): Http version to be used to generate headers. Defaults to 2.
88 | strict (bool, optional): Throws an error if it cannot generate headers based on the input. Defaults to False.
89 | ```
90 |
91 |
92 |
93 |
94 | Parameters for HeaderGenerator.generate
95 |
96 | ```
97 | Generates headers using the default options and their possible overrides.
98 |
99 | Parameters:
100 | browser (Optional[Iterable[Union[str, Browser]]], optional): Browser(s) to generate the headers for.
101 | os (Optional[ListOrString], optional): Operating system(s) to generate the headers for.
102 | device (Optional[ListOrString], optional): Device(s) to generate the headers for.
103 | locale (Optional[ListOrString], optional): Language(s) to include in the Accept-Language header.
104 | http_version (Optional[Literal[1, 2]], optional): HTTP version to be used to generate headers.
105 | user_agent (Optional[ListOrString], optional): User-Agent(s) to use.
106 | request_dependent_headers (Optional[Dict[str, str]], optional): Known values of request-dependent headers.
107 | strict (Optional[bool], optional): If true, throws an error if it cannot generate headers based on the input.
108 | ```
109 |
110 |
111 |
112 | ### Constraining headers
113 |
114 | #### Single constraint
115 |
116 | Set constraints for browsers by passing the optional strings below:
117 |
118 | ```py
119 | headers = HeaderGenerator(
120 | browser='chrome',
121 | os='windows',
122 | device='desktop',
123 | locale='en-US',
124 | http_version=2
125 | )
126 | ```
127 |
128 | #### Multiple constraints
129 |
130 | Set multiple constraints to select from. Options are selected based on their actual frequency in the wild:
131 |
132 | ```py
133 | headers = HeaderGenerator(
134 | browser=('chrome', 'firefox', 'safari', 'edge'),
135 | os=('windows', 'macos', 'linux', 'android', 'ios'),
136 | device=('desktop', 'mobile'),
137 | locale=('en-US', 'en', 'de'),
138 | http_version=2
139 | )
140 | ```
141 |
142 | #### Browser specifications
143 |
144 | Set specificiations for browsers, including version ranges and HTTP version:
145 |
146 | ```py
147 | from browserforge.headers import Browser
148 |
149 | browsers = [
150 | Browser(name='chrome', min_version=100, max_version=110),
151 | Browser(name='firefox', max_version=80, http_version=1),
152 | Browser(name='edge', min_version=95),
153 | ]
154 | headers = HeaderGenerator(browser=browsers)
155 | ```
156 |
157 | Note that all constraints passed into the `HeaderGenerator` constructor can be overridden by passing them into the `generate` method.
158 |
159 | #### Generate headers given User-Agent
160 |
161 | Headers can be generated given an existing user agent:
162 |
163 | ```py
164 | >>> headers.generate(user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36')
165 | ```
166 |
167 | Select from multiple User-Agents based on their frequency in the wild:
168 |
169 | ```py
170 | >>> headers.generate(user_agent=(
171 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
172 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0'
173 | ))
174 | ```
175 |
176 |
177 |
178 | ## Generating Fingerprints
179 |
180 | ### Simple usage
181 |
182 | Initialize FingerprintGenerator:
183 |
184 | ```py
185 | from browserforge.fingerprints import FingerprintGenerator
186 | fingerprints = FingerprintGenerator()
187 | fingerprints.generate()
188 | ```
189 |
190 |
191 | Parameters for FingerprintGenerator
192 |
193 | ```
194 | Parameters:
195 | screen (Screen, optional): Screen constraints for the generated fingerprint.
196 | strict (bool, optional): Whether to raise an exception if the constraints are too strict. Default is False.
197 | mock_webrtc (bool, optional): Whether to mock WebRTC when injecting the fingerprint. Default is False.
198 | slim (bool, optional): Disables performance-heavy evasions when injecting the fingerprint. Default is False.
199 | **header_kwargs: Header generation options for HeaderGenerator
200 | ```
201 |
202 |
203 |
204 |
205 | Parameters for FingerprintGenerator.generate
206 |
207 | ```
208 | Generates a fingerprint and a matching set of ordered headers using a combination of the default options specified in the constructor and their possible overrides provided here.
209 |
210 | Parameters:
211 | screen (Screen, optional): Screen constraints for the generated fingerprint.
212 | strict (bool, optional): Whether to raise an exception if the constraints are too strict.
213 | mock_webrtc (bool, optional): Whether to mock WebRTC when injecting the fingerprint. Default is False.
214 | slim (bool, optional): Disables performance-heavy evasions when injecting the fingerprint. Default is False.
215 | **header_kwargs: Additional header generation options for HeaderGenerator.generate
216 | ```
217 |
218 |
219 |
220 |
221 | Example response
222 |
223 | ```
224 | Fingerprint(screen=ScreenFingerprint(availHeight=784,
225 | availWidth=1440,
226 | availTop=25,
227 | availLeft=0,
228 | colorDepth=30,
229 | height=900,
230 | pixelDepth=30,
231 | width=1440,
232 | devicePixelRatio=2,
233 | pageXOffset=0,
234 | pageYOffset=0,
235 | innerHeight=0,
236 | outerHeight=718,
237 | outerWidth=1440,
238 | innerWidth=0,
239 | screenX=0,
240 | clientWidth=0,
241 | clientHeight=19,
242 | hasHDR=True),
243 | navigator=NavigatorFingerprint(userAgent='Mozilla/5.0 (Macintosh; '
244 | 'Intel Mac OS X 10_15_7) '
245 | 'AppleWebKit/537.36 '
246 | '(KHTML, like Gecko) '
247 | 'Chrome/121.0.0.0 '
248 | 'Safari/537.36',
249 | userAgentData={'architecture': 'arm',
250 | 'bitness': '64',
251 | 'brands': [{'brand': 'Not '
252 | 'A(Brand',
253 | 'version': '99'},
254 | {'brand': 'Google '
255 | 'Chrome',
256 | 'version': '121'},
257 | {'brand': 'Chromium',
258 | 'version': '121'}],
259 | 'fullVersionList': [{'brand': 'Not '
260 | 'A(Brand',
261 | 'version': '99.0.0.0'},
262 | {'brand': 'Google '
263 | 'Chrome',
264 | 'version': '121.0.6167.160'},
265 | {'brand': 'Chromium',
266 | 'version': '121.0.6167.160'}],
267 | 'mobile': False,
268 | 'model': '',
269 | 'platform': 'macOS',
270 | 'platformVersion': '13.6.1',
271 | 'uaFullVersion': '121.0.6167.160'},
272 | doNotTrack=None,
273 | appCodeName='Mozilla',
274 | appName='Netscape',
275 | appVersion='5.0 (Macintosh; Intel '
276 | 'Mac OS X 10_15_7) '
277 | 'AppleWebKit/537.36 '
278 | '(KHTML, like Gecko) '
279 | 'Chrome/121.0.0.0 '
280 | 'Safari/537.36',
281 | oscpu=None,
282 | webdriver=False,
283 | language='en-US',
284 | languages=['en-US'],
285 | platform='MacIntel',
286 | deviceMemory=8,
287 | hardwareConcurrency=10,
288 | product='Gecko',
289 | productSub='20030107',
290 | vendor='Google Inc.',
291 | vendorSub=None,
292 | maxTouchPoints=0,
293 | extraProperties={'globalPrivacyControl': None,
294 | 'installedApps': [],
295 | 'isBluetoothSupported': False,
296 | 'pdfViewerEnabled': True,
297 | 'vendorFlavors': ['chrome']}),
298 | headers={'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
299 | 'Accept-Encoding': 'gzip, deflate, br',
300 | 'Accept-Language': 'en-US;q=1.0',
301 | 'Sec-Fetch-Dest': 'navigate',
302 | 'Sec-Fetch-Mode': 'same-site',
303 | 'Sec-Fetch-Site': '?1',
304 | 'Sec-Fetch-User': 'document',
305 | 'Upgrade-Insecure-Requests': '1',
306 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X '
307 | '10_15_7) AppleWebKit/537.36 (KHTML, like '
308 | 'Gecko) Chrome/121.0.0.0 Safari/537.36',
309 | 'sec-ch-ua': '"Not A(Brand";v="99", "Google '
310 | 'Chrome";v="121", "Chromium";v="121"',
311 | 'sec-ch-ua-mobile': '?0',
312 | 'sec-ch-ua-platform': '"macOS"'},
313 | videoCodecs={'h264': 'probably', 'ogg': '', 'webm': 'probably'},
314 | audioCodecs={'aac': 'probably',
315 | 'm4a': 'maybe',
316 | 'mp3': 'probably',
317 | 'ogg': 'probably',
318 | 'wav': 'probably'},
319 | pluginsData={'mimeTypes': ['Portable Document '
320 | 'Format~~application/pdf~~pdf',
321 | 'Portable Document '
322 | 'Format~~text/pdf~~pdf'],
323 | 'plugins': [{'description': 'Portable Document Format',
324 | 'filename': 'internal-pdf-viewer',
325 | 'mimeTypes': [{'description': 'Portable '
326 | 'Document '
327 | 'Format',
328 | 'enabledPlugin': 'PDF '
329 | 'Viewer',
330 | 'suffixes': 'pdf',
331 | 'type': 'application/pdf'},
332 | {'description': 'Portable '
333 | 'Document '
334 | 'Format',
335 | 'enabledPlugin': 'PDF '
336 | 'Viewer',
337 | 'suffixes': 'pdf',
338 | 'type': 'text/pdf'}],
339 | 'name': 'PDF Viewer'},
340 | {'description': 'Portable Document Format',
341 | 'filename': 'internal-pdf-viewer',
342 | 'mimeTypes': [{'description': 'Portable '
343 | 'Document '
344 | 'Format',
345 | 'enabledPlugin': 'Chrome '
346 | 'PDF '
347 | 'Viewer',
348 | 'suffixes': 'pdf',
349 | 'type': 'application/pdf'},
350 | {'description': 'Portable '
351 | 'Document '
352 | 'Format',
353 | 'enabledPlugin': 'Chrome '
354 | 'PDF '
355 | 'Viewer',
356 | 'suffixes': 'pdf',
357 | 'type': 'text/pdf'}],
358 | 'name': 'Chrome PDF Viewer'},
359 | {'description': 'Portable Document Format',
360 | 'filename': 'internal-pdf-viewer',
361 | 'mimeTypes': [{'description': 'Portable '
362 | 'Document '
363 | 'Format',
364 | 'enabledPlugin': 'Chromium '
365 | 'PDF '
366 | 'Viewer',
367 | 'suffixes': 'pdf',
368 | 'type': 'application/pdf'},
369 | {'description': 'Portable '
370 | 'Document '
371 | 'Format',
372 | 'enabledPlugin': 'Chromium '
373 | 'PDF '
374 | 'Viewer',
375 | 'suffixes': 'pdf',
376 | 'type': 'text/pdf'}],
377 | 'name': 'Chromium PDF Viewer'},
378 | {'description': 'Portable Document Format',
379 | 'filename': 'internal-pdf-viewer',
380 | 'mimeTypes': [{'description': 'Portable '
381 | 'Document '
382 | 'Format',
383 | 'enabledPlugin': 'Microsoft '
384 | 'Edge '
385 | 'PDF '
386 | 'Viewer',
387 | 'suffixes': 'pdf',
388 | 'type': 'application/pdf'},
389 | 'Document '
390 | 'Format',
391 | 'enabledPlugin': 'Microsoft '
392 | 'Edge '
393 | 'PDF '
394 | 'Viewer',
395 | 'suffixes': 'pdf',
396 | 'type': 'text/pdf'}],
397 | 'name': 'Microsoft Edge PDF Viewer'},
398 | {'description': 'Portable Document Format',
399 | 'filename': 'internal-pdf-viewer',
400 | 'mimeTypes': [{'description': 'Portable '
401 | 'Document '
402 | 'Format',
403 | 'enabledPlugin': 'WebKit '
404 | 'built-in '
405 | 'PDF',
406 | 'suffixes': 'pdf',
407 | 'type': 'application/pdf'},
408 | {'description': 'Portable '
409 | 'Document '
410 | 'Format',
411 | 'enabledPlugin': 'WebKit '
412 | 'built-in '
413 | 'PDF',
414 | 'suffixes': 'pdf',
415 | 'type': 'text/pdf'}],
416 | 'name': 'WebKit built-in PDF'}]},
417 | battery={'charging': False,
418 | 'chargingTime': None,
419 | 'dischargingTime': 29940,
420 | 'level': 0.98},
421 | videoCard=VideoCard(renderer='ANGLE (Apple, ANGLE Metal Renderer: '
422 | 'Apple M2 Pro, Unspecified Version)',
423 | vendor='Google Inc. (Apple)'),
424 | multimediaDevices={'micros': [{'deviceId': '',
425 | 'groupId': '',
426 | 'kind': 'audioinput',
427 | 'label': ''}],
428 | 'speakers': [{'deviceId': '',
429 | 'groupId': '',
430 | 'kind': 'audiooutput',
431 | 'label': ''}],
432 | 'webcams': [{'deviceId': '',
433 | 'groupId': '',
434 | 'kind': 'videoinput',
435 | 'label': ''}]},
436 | fonts=['Arial Unicode MS', 'Gill Sans', 'Helvetica Neue', 'Menlo']
437 | mockWebRTC: False,
438 | slim: False)
439 | ```
440 |
441 |
442 |
443 | ### Constraining fingerprints
444 |
445 | #### Screen width/height
446 |
447 | Constrain the minimum/maximum screen width and height:
448 |
449 | ```py
450 | from browserforge.fingerprints import Screen
451 |
452 | screen = Screen(
453 | min_width=100
454 | max_width=1280
455 | min_height=400
456 | max_height=720
457 | )
458 |
459 | fingerprints = FingerprintGenerator(screen=screen)
460 | ```
461 |
462 | Note: Not all bounds need to be defined.
463 |
464 | #### Browser specifications
465 |
466 | `FingerprintGenerator` and `FingerprintGenerator.generate` inherit the same parameters from `HeaderGenerator`.
467 |
468 | Because of this, user agents, browser specifications, device types, and operating system constrains can also be passed into `FingerprintGenerator.generate`.
469 |
470 | Here is a usage example:
471 |
472 | ```py
473 | fingerprint.generate(browser='chrome', os='windows')
474 | ```
475 |
476 |
477 |
478 | ## Injecting Fingerprints
479 |
480 | > [!WARNING]
481 | > Fingerprint injection in BrowserForge is deprecated. Please check out [Camoufox] instead.
482 |
483 | BrowserForge is fully compatible with your existing Playwright and Pyppeteer code. You only have to change your context/page initialization.
484 |
485 | ### Playwright
486 |
487 | #### Async API:
488 |
489 | ```py
490 | # Import the AsyncNewContext injector
491 | from browserforge.injectors.playwright import AsyncNewContext
492 |
493 | async def main():
494 | async with async_playwright() as playwright:
495 | browser = await playwright.chromium.launch()
496 | # Create a new async context with the injected fingerprint
497 | context = await AsyncNewContext(browser, fingerprint=fingerprint)
498 | page = await context.new_page()
499 | ...
500 | ```
501 |
502 | Replace `await browser.new_context` with `await AsyncNewContext` in your existing Playwright code.
503 |
504 |
505 | Parameters for AsyncNewContext
506 |
507 | ```
508 | Injects an async_api Playwright context with a Fingerprint.
509 |
510 | Parameters:
511 | browser (Browser): The browser to create the context in
512 | fingerprint (Optional[Fingerprint]): The fingerprint to inject. If None, one will be generated
513 | fingerprint_options (Optional[Dict]): Options for the Fingerprint generator if `fingerprint` is not passed
514 | **new_context_options: Other options for the new context
515 | ```
516 |
517 |
518 |
519 | #### Sync API:
520 |
521 | ```py
522 | # Import the NewContext injector
523 | from browserforge.injectors.playwright import NewContext
524 |
525 | def main():
526 | with sync_playwright() as playwright:
527 | browser = playwright.chromium.launch()
528 | # Create a new context with the injected fingerprint
529 | context = NewContext(browser, fingerprint=fingerprint)
530 | page = context.new_page()
531 | ...
532 | ```
533 |
534 | Replace `browser.new_context` with `NewContext` in your existing Playwright code.
535 |
536 |
537 | Parameters for NewContext
538 |
539 | ```
540 | Injects a sync_api Playwright context with a Fingerprint.
541 |
542 | Parameters:
543 | browser (Browser): The browser to create the context in
544 | fingerprint (Optional[Fingerprint]): The fingerprint to inject. If None, one will be generated
545 | fingerprint_options (Optional[Dict]): Options for the Fingerprint generator if `fingerprint` is not passed
546 | **new_context_options: Other options for the new context
547 | ```
548 |
549 |
550 |
551 | #### Undetected-Playwright
552 |
553 | [Undetected-Playwright](https://github.com/kaliiiiiiiiii/undetected-playwright-python) is also supported in the `browserforge.injectors.undetected_playwright` package. The usage is the same as the Playwright injector.
554 |
555 | ### Pyppeteer
556 |
557 | ```py
558 | # Import the NewPage injector
559 | from browserforge.injectors.pyppeteer import NewPage
560 | from pyppeteer import launch
561 |
562 | async def test():
563 | browser = await launch()
564 | # Create a new page with the injected fingerprint
565 | page = await NewPage(browser, fingerprint=fingerprint)
566 | ...
567 | ```
568 |
569 | Replace `browser.newPage` with `NewPage` in your existing Pyppeteer code.
570 |
571 |
572 | Parameters for NewPage
573 |
574 | ```
575 | Injects a Pyppeteer browser object with a Fingerprint.
576 |
577 | Parameters:
578 | browser (Browser): The browser to create the context in
579 | fingerprint (Optional[Fingerprint]): The fingerprint to inject. If None, one will be generated
580 | fingerprint_options (Optional[Dict]): Options for the Fingerprint generator if `fingerprint` is not passed
581 | ```
582 |
583 |
584 |
585 |
586 |
587 | ## Uninstall
588 |
589 | ```
590 | pip uninstall browserforge
591 | ```
592 |
593 | ---
594 |
--------------------------------------------------------------------------------
/browserforge/bayesian_network.py:
--------------------------------------------------------------------------------
1 | import random
2 | import zipfile
3 | from pathlib import Path
4 | from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, TypeVar, Union
5 |
6 | try:
7 | import orjson as json
8 | except ImportError:
9 | import json
10 |
11 | T = TypeVar('T')
12 | Map = Union[list, tuple]
13 |
14 |
15 | class BayesianNode:
16 | """
17 | Implementation of a single node in a bayesian network allowing sampling from its conditional distribution
18 | """
19 |
20 | def __init__(self, node_definition: Dict[str, Any]):
21 | self.node_definition = node_definition
22 |
23 | def get_probabilities_given_known_values(
24 | self, parent_values: Dict[str, Any]
25 | ) -> Dict[Any, float]:
26 | """
27 | Extracts unconditional probabilities of node values given the values of the parent nodes
28 | """
29 | probabilities = self.node_definition['conditionalProbabilities']
30 | for parent_name in self.parent_names:
31 | parent_value = parent_values.get(parent_name)
32 | if parent_value in probabilities.get('deeper', {}):
33 | probabilities = probabilities['deeper'][parent_value]
34 | else:
35 | probabilities = probabilities.get('skip', {})
36 | return probabilities
37 |
38 | def sample_random_value_from_possibilities(
39 | self, possible_values: List[str], probabilities: Dict[str, float]
40 | ) -> Any:
41 | """
42 | Randomly samples from the given values using the given probabilities
43 | """
44 | # Python natively supports weighted random sampling in random.choices,
45 | # but this method is much faster
46 | anchor = random.random()
47 | cumulative_probability = 0.0
48 | for possible_value in possible_values:
49 | cumulative_probability += probabilities[possible_value]
50 | if cumulative_probability > anchor:
51 | return possible_value
52 | # Default to first item
53 | return possible_values[0]
54 |
55 | def sample(self, parent_values: Dict[str, Any]) -> Any:
56 | """
57 | Randomly samples from the conditional distribution of this node given values of parents
58 | """
59 | probabilities = self.get_probabilities_given_known_values(parent_values)
60 | return self.sample_random_value_from_possibilities(
61 | list(probabilities.keys()), probabilities
62 | )
63 |
64 | def sample_according_to_restrictions(
65 | self,
66 | parent_values: Dict[str, Any],
67 | value_possibilities: Iterable[str],
68 | banned_values: List[str],
69 | ) -> Optional[str]:
70 | """
71 | Randomly samples from the conditional distribution of this node given restrictions on the possible values and the values of the parents.
72 | """
73 | probabilities = self.get_probabilities_given_known_values(parent_values)
74 | valid_values = [
75 | value
76 | for value in value_possibilities
77 | if value not in banned_values and value in probabilities
78 | ]
79 | if valid_values:
80 | return self.sample_random_value_from_possibilities(valid_values, probabilities)
81 | else:
82 | return None # Equivalent to `false` in TypeScript
83 |
84 | @property
85 | def name(self) -> str:
86 | return self.node_definition['name']
87 |
88 | @property
89 | def parent_names(self) -> List[str]:
90 | return self.node_definition.get('parentNames', [])
91 |
92 | @property
93 | def possible_values(self) -> List[str]:
94 | return self.node_definition.get('possibleValues', [])
95 |
96 |
97 | class BayesianNetwork:
98 | """
99 | Implementation of a bayesian network capable of randomly sampling from its distribution
100 | """
101 |
102 | def __init__(self, path: Path) -> None:
103 | network_definition = extract_json(path)
104 | self.nodes_in_sampling_order = [
105 | BayesianNode(node_def) for node_def in network_definition['nodes']
106 | ]
107 | self.nodes_by_name = {node.name: node for node in self.nodes_in_sampling_order}
108 |
109 | def generate_sample(self, input_values: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
110 | """
111 | Randomly samples from the distribution represented by the bayesian network.
112 | """
113 | if input_values is None:
114 | input_values = {}
115 | sample = input_values.copy()
116 | for node in self.nodes_in_sampling_order:
117 | if node.name not in sample:
118 | sample[node.name] = node.sample(sample)
119 | return sample
120 |
121 | def generate_consistent_sample_when_possible(
122 | self, value_possibilities: Dict[str, Iterable[str]]
123 | ) -> Optional[Dict[str, Any]]:
124 | """
125 | Randomly samples values from the distribution represented by the bayesian network,
126 | making sure the sample is consistent with the provided restrictions on value possibilities.
127 | Returns None if no such sample can be generated.
128 | """
129 | return self.recursively_generate_consistent_sample_when_possible({}, value_possibilities, 0)
130 |
131 | def recursively_generate_consistent_sample_when_possible(
132 | self,
133 | sample_so_far: Dict[str, Any],
134 | value_possibilities: Dict[str, Iterable[str]],
135 | depth: int,
136 | ) -> Optional[Dict[str, Any]]:
137 | """
138 | Recursively generates a random sample consistent with the given restrictions on possible values.
139 | """
140 | if depth == len(self.nodes_in_sampling_order):
141 | return sample_so_far
142 | node = self.nodes_in_sampling_order[depth]
143 | banned_values: List[str] = []
144 | sample_value = None
145 | while True:
146 | sample_value = node.sample_according_to_restrictions(
147 | sample_so_far,
148 | value_possibilities.get(node.name, node.possible_values),
149 | banned_values,
150 | )
151 | if sample_value is None:
152 | break
153 | sample_so_far[node.name] = sample_value
154 | next_sample = self.recursively_generate_consistent_sample_when_possible(
155 | sample_so_far, value_possibilities, depth + 1
156 | )
157 | if next_sample is not None:
158 | return next_sample
159 | banned_values.append(sample_value)
160 | del sample_so_far[node.name]
161 | return None
162 |
163 |
164 | def array_intersection(a: Sequence[T], b: Sequence[T]) -> List[T]:
165 | """
166 | Performs a set "intersection" on the given (flat) arrays
167 | """
168 | set_b = set(b)
169 | return [x for x in a if x in set_b]
170 |
171 |
172 | def array_zip(a: List[Tuple[T, ...]], b: List[Tuple[T, ...]]) -> List[Tuple[T, ...]]:
173 | """
174 | Combines two arrays into a single array using the set union
175 | Args:
176 | a: First array to be combined.
177 | b: Second array to be combined.
178 | Returns:
179 | Zipped (multi-dimensional) array.
180 | """
181 | return [tuple(set(x).union(y)) for x, y in zip(a, b)]
182 |
183 |
184 | def undeeper(obj: Dict[str, Any]) -> Dict[str, Any]:
185 | """
186 | Removes the "deeper/skip" structures from the conditional probability table
187 | """
188 | if not isinstance(obj, dict):
189 | return obj
190 | result: Dict[str, Any] = {}
191 | for key, value in obj.items():
192 | if key == 'skip':
193 | continue
194 | if key == 'deeper':
195 | result.update(undeeper(value))
196 | else:
197 | result[key] = undeeper(value)
198 | return result
199 |
200 |
201 | def filter_by_last_level_keys(tree: Dict[str, Any], valid_keys: Map) -> List[Tuple[str, ...]]:
202 | r"""
203 | Performs DFS on the Tree and returns values of the nodes on the paths that end with the given keys
204 | (stored by levels - first level is the root)
205 | ```
206 | 1
207 | / \
208 | 2 3
209 | / \ / \
210 | 4 5 6 7
211 | ```
212 | filter_by_last_level_keys(tree, ['4', '7']) => [[1], [2,3]]
213 | """
214 | out: List[Tuple[str, ...]] = []
215 |
216 | def recurse(t: Dict[str, Any], vk: Union[Tuple[str, ...], List[str]], acc: List[str]) -> None:
217 | for key in t.keys():
218 | if not isinstance(t[key], dict) or t[key] is None:
219 | if key in vk:
220 | nonlocal out
221 | out = (
222 | [(x,) for x in acc]
223 | if len(out) == 0
224 | else array_zip(out, [(x,) for x in acc])
225 | )
226 | continue
227 | else:
228 | recurse(t[key], vk, acc + [key])
229 |
230 | recurse(tree, valid_keys, [])
231 | return out
232 |
233 |
234 | def get_possible_values(
235 | network: 'BayesianNetwork', possible_values: Dict[str, Union[Tuple[str, ...], List[str]]]
236 | ) -> Dict[str, Sequence[str]]:
237 | """
238 | Given a `generative-bayesian-network` instance and a set of user constraints, returns an extended
239 | set of constraints **induced** by the original constraints and network structure
240 | """
241 |
242 | sets = []
243 | # For every pre-specified node, compute the "closure" for values of the other nodes
244 | for key, value in possible_values.items():
245 | if not isinstance(value, (list, tuple)):
246 | continue
247 | if len(value) == 0:
248 | raise ValueError(
249 | "The current constraints are too restrictive. No possible values can be found for the given constraints."
250 | )
251 | node = network.nodes_by_name[key]
252 | tree = undeeper(node.node_definition['conditionalProbabilities'])
253 | zipped_values = filter_by_last_level_keys(tree, value)
254 | sets.append({**dict(zip(node.parent_names, zipped_values)), key: value})
255 |
256 | # Compute the intersection of all the possible values for each node
257 | result: Dict[str, Sequence[str]] = {}
258 | for set_dict in sets:
259 | for key in set_dict.keys():
260 | if key in result:
261 | intersected_values = array_intersection(set_dict[key], result[key])
262 | if not intersected_values:
263 | raise ValueError(
264 | "The current constraints are too restrictive. No possible values can be found for the given constraints."
265 | )
266 | result[key] = intersected_values
267 | else:
268 | result[key] = set_dict[key]
269 |
270 | return result
271 |
272 |
273 | def extract_json(path: Path) -> dict:
274 | """
275 | Unzips a zip file if the path points to a zip file, otherwise directly loads a JSON file.
276 |
277 | Parameters:
278 | path: The path to the zip file or JSON file.
279 |
280 | Returns:
281 | A dictionary representing the JSON content.
282 | """
283 | if path.suffix != '.zip':
284 | # Directly load the JSON file
285 | with open(path, 'rb') as file:
286 | return json.loads(file.read())
287 | # Unzip the file and load the JSON content
288 | with zipfile.ZipFile(path, 'r') as zf:
289 | # Find the first JSON file in zip
290 | try:
291 | filename = next(file for file in zf.namelist() if file.endswith('.json'))
292 | except StopIteration:
293 | return {} # Broken
294 | with zf.open(filename) as f:
295 | # Assuming only one JSON file is needed
296 | return json.loads(f.read())
297 |
--------------------------------------------------------------------------------
/browserforge/download.py:
--------------------------------------------------------------------------------
1 |
2 | import click
3 |
4 | """
5 | Downloads the required model definitions - deprecated
6 | """
7 |
8 |
9 | """
10 | Public download functions
11 | """
12 |
13 |
14 | def Download(headers=False, fingerprints=False) -> None:
15 | """
16 | Deprecated. Downloading model definition files is no longer needed.
17 |
18 | Files are included as explicit python package dependency.
19 | """
20 | click.secho('Deprecated. Downloading model definition files is no longer needed.', fg='bright_yellow')
21 |
22 |
23 | def DownloadIfNotExists(**flags: bool) -> None:
24 | """
25 | Deprecated. Downloading model definition files is no longer needed.
26 |
27 | Files are included as explicit python package dependency.
28 | """
29 | pass
30 |
31 |
32 | def IsDownloaded(**flags: bool) -> bool:
33 | """
34 | Deprecated. Downloading model definition files is no longer needed.
35 |
36 | Files are included as explicit python package dependency.
37 | """
38 | return True
39 |
40 |
41 | def Remove() -> None:
42 | """
43 | Deprecated. Downloading model definition files is no longer needed.
44 |
45 | Files are included as explicit python package dependency.
46 | """
47 | pass
48 |
--------------------------------------------------------------------------------
/browserforge/fingerprints/__init__.py:
--------------------------------------------------------------------------------
1 | from browserforge.download import DownloadIfNotExists
2 |
3 | DownloadIfNotExists(fingerprints=True, headers=True)
4 |
5 | from browserforge.headers import Browser
6 |
7 | from .generator import (
8 | Fingerprint,
9 | FingerprintGenerator,
10 | NavigatorFingerprint,
11 | Screen,
12 | ScreenFingerprint,
13 | VideoCard,
14 | )
15 |
16 | __all__ = [
17 | "Browser",
18 | "Fingerprint",
19 | "FingerprintGenerator",
20 | "NavigatorFingerprint",
21 | "Screen",
22 | "ScreenFingerprint",
23 | "VideoCard",
24 | ]
25 |
--------------------------------------------------------------------------------
/browserforge/fingerprints/generator.py:
--------------------------------------------------------------------------------
1 | from dataclasses import asdict, dataclass
2 | from pathlib import Path
3 | from typing import Dict, List, Optional
4 |
5 | from apify_fingerprint_datapoints import get_fingerprint_network
6 |
7 | from browserforge.bayesian_network import BayesianNetwork, get_possible_values
8 | from browserforge.headers import HeaderGenerator
9 | from browserforge.headers.utils import get_user_agent
10 |
11 | try:
12 | import orjson as json
13 |
14 | USE_ORJSON = True
15 | except ImportError:
16 | import json
17 |
18 | USE_ORJSON = False
19 |
20 | DATA_DIR: Path = Path(__file__).parent / 'data'
21 |
22 |
23 | @dataclass
24 | class ScreenFingerprint:
25 | availHeight: int
26 | availWidth: int
27 | availTop: int
28 | availLeft: int
29 | colorDepth: int
30 | height: int
31 | pixelDepth: int
32 | width: int
33 | devicePixelRatio: float
34 | pageXOffset: int
35 | pageYOffset: int
36 | innerHeight: int
37 | outerHeight: int
38 | outerWidth: int
39 | innerWidth: int
40 | screenX: int
41 | clientWidth: int
42 | clientHeight: int
43 | hasHDR: bool
44 |
45 |
46 | @dataclass
47 | class NavigatorFingerprint:
48 | userAgent: str
49 | userAgentData: Dict[str, str]
50 | doNotTrack: Optional[str]
51 | appCodeName: str
52 | appName: str
53 | appVersion: str
54 | oscpu: str
55 | webdriver: str
56 | language: str
57 | languages: List[str]
58 | platform: str
59 | deviceMemory: Optional[int]
60 | hardwareConcurrency: int
61 | product: str
62 | productSub: str
63 | vendor: str
64 | vendorSub: str
65 | maxTouchPoints: int
66 | extraProperties: Dict[str, str]
67 |
68 |
69 | @dataclass
70 | class VideoCard:
71 | renderer: str
72 | vendor: str
73 |
74 |
75 | @dataclass
76 | class Fingerprint:
77 | """Output data of the fingerprint generator"""
78 |
79 | screen: ScreenFingerprint
80 | navigator: NavigatorFingerprint
81 | headers: Dict[str, str]
82 | videoCodecs: Dict[str, str]
83 | audioCodecs: Dict[str, str]
84 | pluginsData: Dict[str, str]
85 | battery: Optional[Dict[str, str]]
86 | videoCard: Optional[VideoCard]
87 | multimediaDevices: List[str]
88 | fonts: List[str]
89 | mockWebRTC: Optional[bool]
90 | slim: Optional[bool]
91 |
92 | def dumps(self) -> str:
93 | """
94 | Dumps the dataclass as a JSON string.
95 | """
96 | if USE_ORJSON:
97 | return json.dumps(self).decode()
98 | # Built-in `json` does not take dataclass objects
99 | # Instead, convert to a dict first
100 | return json.dumps(asdict(self))
101 |
102 |
103 | @dataclass
104 | class Screen:
105 | """Constrains the screen dimensions of the generated fingerprint"""
106 |
107 | min_width: Optional[int] = None
108 | max_width: Optional[int] = None
109 | min_height: Optional[int] = None
110 | max_height: Optional[int] = None
111 |
112 | def __post_init__(self):
113 | if (
114 | None not in (self.min_width, self.max_width)
115 | and self.min_width > self.max_width
116 | or None not in (self.min_height, self.max_height)
117 | and self.min_height > self.max_height
118 | ):
119 | raise ValueError(
120 | "Invalid screen constraints: min values cannot be greater than max values"
121 | )
122 |
123 | def is_set(self) -> bool:
124 | """
125 | Returns true if any constraints were set
126 | """
127 | return any(value is not None for value in self.__dict__.values())
128 |
129 |
130 | class FingerprintGenerator:
131 | """Generates realistic browser fingerprints"""
132 |
133 | fingerprint_generator_network = BayesianNetwork(get_fingerprint_network())
134 |
135 | def __init__(
136 | self,
137 | screen: Optional[Screen] = None,
138 | strict: bool = False,
139 | mock_webrtc: bool = False,
140 | slim: bool = False,
141 | **header_kwargs,
142 | ):
143 | """
144 | Initializes the FingerprintGenerator with the given options.
145 |
146 | Parameters:
147 | screen (Screen, optional): Screen constraints for the generated fingerprint.
148 | strict (bool, optional): Whether to raise an exception if the constraints are too strict. Default is False.
149 | mock_webrtc (bool, optional): Whether to mock WebRTC when injecting the fingerprint. Default is False.
150 | slim (bool, optional): Disables performance-heavy evasions when injecting the fingerprint. Default is False.
151 | **header_kwargs: Header generation options for HeaderGenerator
152 | """
153 | self.header_generator: HeaderGenerator = HeaderGenerator(**header_kwargs)
154 |
155 | # Set default options
156 | self.screen: Optional[Screen] = screen
157 | self.strict: bool = strict
158 | self.mock_webrtc: bool = mock_webrtc
159 | self.slim: bool = slim
160 |
161 | def generate(
162 | self,
163 | *,
164 | screen: Optional[Screen] = None,
165 | strict: Optional[bool] = None,
166 | mock_webrtc: Optional[bool] = None,
167 | slim: Optional[bool] = None,
168 | **header_kwargs,
169 | ) -> Fingerprint:
170 | """
171 | Generates a fingerprint and a matching set of ordered headers using a combination of the default options
172 | specified in the constructor and their possible overrides provided here.
173 |
174 | Parameters:
175 | screen (Screen, optional): Screen constraints for the generated fingerprint.
176 | strict (bool, optional): Whether to raise an exception if the constraints are too strict.
177 | mock_webrtc (bool, optional): Whether to mock WebRTC when injecting the fingerprint. Default is False.
178 | slim (bool, optional): Disables performance-heavy evasions when injecting the fingerprint. Default is False.
179 | **header_kwargs: Additional header generation options for HeaderGenerator.generate
180 | """
181 | filtered_values: Dict[str, str] = {}
182 | if header_kwargs is None:
183 | header_kwargs = {}
184 |
185 | # merge new options with old
186 | screen = _first(screen, self.screen)
187 | strict = _first(strict, self.strict)
188 |
189 | partial_csp = self.partial_csp(
190 | strict=strict, screen=screen, filtered_values=filtered_values
191 | )
192 |
193 | # Generate headers consistent with the inputs to get input-compatible user-agent
194 | # and accept-language headers needed later
195 | if partial_csp:
196 | header_kwargs['user_agent'] = partial_csp['userAgent']
197 | headers = self.header_generator.generate(**header_kwargs)
198 | # Extract generated User-Agent
199 | user_agent = get_user_agent(headers)
200 | if user_agent is None:
201 | raise ValueError("Failed to find User-Agent in generated response")
202 |
203 | # Generate fingerprint consistent with the generated user agent
204 | while True:
205 | fingerprint: Optional[Dict] = (
206 | self.fingerprint_generator_network.generate_consistent_sample_when_possible(
207 | {**filtered_values, 'userAgent': (user_agent,)}
208 | )
209 | )
210 | if fingerprint is not None:
211 | break
212 | # Raise
213 | if strict:
214 | raise ValueError(
215 | 'Cannot generate headers. User-Agent may be invalid, or screen constraints are too restrictive.'
216 | )
217 | # If no fingerprint was generated, relax the filtered values.
218 | # This seems to be an issue with some Mac and Linux systems
219 | filtered_values = {}
220 |
221 | # Delete any missing attributes and unpack any object/array-like attributes
222 | # that have been packed together to make the underlying network simpler
223 | for attribute in list(fingerprint.keys()):
224 | if fingerprint[attribute] == '*MISSING_VALUE*':
225 | fingerprint[attribute] = None
226 | if isinstance(fingerprint[attribute], str) and fingerprint[attribute].startswith(
227 | '*STRINGIFIED*'
228 | ):
229 | fingerprint[attribute] = json.loads(fingerprint[attribute][len('*STRINGIFIED*') :])
230 |
231 | # Manually add the set of accepted languages required by the input
232 | accept_language_header_value = headers.get('Accept-Language', '')
233 | accepted_languages = [
234 | locale.split(';', 1)[0] for locale in accept_language_header_value.split(',')
235 | ]
236 | fingerprint['languages'] = accepted_languages
237 |
238 | return self._transform_fingerprint(
239 | fingerprint,
240 | headers,
241 | _first(mock_webrtc, self.mock_webrtc),
242 | _first(slim, self.slim),
243 | )
244 |
245 | def partial_csp(
246 | self, strict: Optional[bool], screen: Optional[Screen], filtered_values: Dict
247 | ) -> Optional[Dict]:
248 | """
249 | Generates partial content security policy (CSP) based on the provided options and filtered values.
250 |
251 | Parameters:
252 | strict (Optional[bool): Whether to raise an exception if the constraints are too strict.
253 | screen (Optional[Screen]): Screen for generating the partial CSP.
254 | filtered_values (Dict): Filtered values used for generating the partial CSP.
255 |
256 | Returns:
257 | Dict: Partial CSP values.
258 | """
259 | # if extensive constraints need to be used
260 | if not (screen and screen.is_set()):
261 | return None
262 |
263 | filtered_values['screen'] = [
264 | screen_string
265 | for screen_string in self.fingerprint_generator_network.nodes_by_name[
266 | 'screen'
267 | ].possible_values
268 | if self._is_screen_within_constraints(screen_string, screen)
269 | ]
270 |
271 | try:
272 | return get_possible_values(self.fingerprint_generator_network, filtered_values)
273 | except Exception as e:
274 | if strict:
275 | raise e
276 | del filtered_values['screen']
277 | return None
278 |
279 | @staticmethod
280 | def _is_screen_within_constraints(screen_string: str, screen_options: Screen) -> bool:
281 | """
282 | Checks if the given screen dimensions are within the specified constraints.
283 |
284 | Parameters:
285 | screen_string (str): Stringified screen dimensions.
286 | screen_options (Screen): Screen constraint options.
287 |
288 | Returns:
289 | bool: True if the screen dimensions are within the constraints, False otherwise.
290 | """
291 | try:
292 | screen = json.loads(screen_string[len('*STRINGIFIED*') :])
293 | return (
294 | # Ensure that the screen width/height are greater than the minimum constraints
295 | # Default missing values to -1 to ensure they are excluded
296 | screen.get('width', -1) >= (screen_options.min_width or 0)
297 | and screen.get('height', -1) >= (screen_options.min_height or 0)
298 | # Ensure that the screen width/height are less than the maximum constraints
299 | and screen.get('width', 0) <= (screen_options.max_width or 1e5)
300 | and screen.get('height', 0) <= (screen_options.max_height or 1e5)
301 | )
302 | except (ValueError, TypeError):
303 | return False
304 |
305 | @staticmethod
306 | def _transform_fingerprint(
307 | fingerprint: Dict, headers: Dict, mock_webrtc: bool, slim: bool
308 | ) -> Fingerprint:
309 | """
310 | Transforms fingerprint into a final dataclass instance.
311 |
312 | Parameters:
313 | fingerprint (Dict): Fingerprint to be transformed.
314 | headers (Dict): Generated headers.
315 | mock_webrtc (bool): Whether to mock WebRTC when injecting the fingerprint.
316 | slim (bool): Disables performance-heavy evasions when injecting the fingerprint.
317 |
318 | Returns:
319 | Fingerprint: Transformed fingerprint as a Fingerprint dataclass instance.
320 | """
321 |
322 | navigator_kwargs = {
323 | k: fingerprint[k]
324 | for k in (
325 | 'userAgent',
326 | 'userAgentData',
327 | 'doNotTrack',
328 | 'appCodeName',
329 | 'appName',
330 | 'appVersion',
331 | 'oscpu',
332 | 'webdriver',
333 | 'platform',
334 | 'deviceMemory',
335 | 'product',
336 | 'productSub',
337 | 'vendor',
338 | 'vendorSub',
339 | 'extraProperties',
340 | 'hardwareConcurrency',
341 | 'languages',
342 | )
343 | }
344 |
345 | # Always take the first element for 'language'
346 | navigator_kwargs['language'] = navigator_kwargs['languages'][0]
347 | navigator_kwargs['maxTouchPoints'] = fingerprint.get('maxTouchPoints', 0)
348 |
349 | return Fingerprint(
350 | screen=ScreenFingerprint(**fingerprint['screen']),
351 | navigator=NavigatorFingerprint(**navigator_kwargs),
352 | headers=headers,
353 | videoCodecs=fingerprint['videoCodecs'],
354 | audioCodecs=fingerprint['audioCodecs'],
355 | pluginsData=fingerprint['pluginsData'],
356 | battery=fingerprint['battery'],
357 | videoCard=(
358 | VideoCard(**fingerprint['videoCard']) if fingerprint.get('videoCard') else None
359 | ),
360 | multimediaDevices=fingerprint['multimediaDevices'],
361 | fonts=fingerprint['fonts'],
362 | mockWebRTC=mock_webrtc,
363 | slim=slim,
364 | )
365 |
366 |
367 | def _first(*values):
368 | """
369 | Simple function that returns the first non-None value passed
370 | """
371 | return next((v for v in values if v is not None), None)
372 |
--------------------------------------------------------------------------------
/browserforge/headers/__init__.py:
--------------------------------------------------------------------------------
1 | from browserforge.download import DownloadIfNotExists
2 |
3 | DownloadIfNotExists(headers=True)
4 |
5 | from .generator import Browser, HeaderGenerator
6 |
7 | __all__ = [
8 | "Browser",
9 | "HeaderGenerator",
10 | ]
11 |
--------------------------------------------------------------------------------
/browserforge/headers/generator.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple, Union
3 |
4 | from browserforge.bayesian_network import BayesianNetwork, get_possible_values
5 | from apify_fingerprint_datapoints import (get_header_network, get_headers_order, get_browser_helper_file,
6 | get_input_network)
7 |
8 | from .utils import get_browser, get_user_agent, pascalize_headers, tuplify
9 |
10 | try:
11 | import orjson as json
12 | except ImportError:
13 | import json
14 |
15 | try:
16 | from typing import TypeAlias # novm
17 | except ImportError:
18 | from typing_extensions import TypeAlias # <3.10
19 |
20 |
21 | """Constants"""
22 | SUPPORTED_BROWSERS = ('chrome', 'firefox', 'safari', 'edge')
23 | SUPPORTED_OPERATING_SYSTEMS = ('windows', 'macos', 'linux', 'android', 'ios')
24 | SUPPORTED_DEVICES = ('desktop', 'mobile')
25 | SUPPORTED_HTTP_VERSIONS = ('1', '2')
26 | MISSING_VALUE_DATASET_TOKEN = '*MISSING_VALUE*'
27 | HTTP1_SEC_FETCH_ATTRIBUTES = {
28 | 'Sec-Fetch-Mode': 'same-site',
29 | 'Sec-Fetch-Dest': 'navigate',
30 | 'Sec-Fetch-Site': '?1',
31 | 'Sec-Fetch-User': 'document',
32 | }
33 | HTTP2_SEC_FETCH_ATTRIBUTES = {
34 | 'sec-fetch-mode': 'same-site',
35 | 'sec-fetch-dest': 'navigate',
36 | 'sec-fetch-site': '?1',
37 | 'sec-fetch-user': 'document',
38 | }
39 | ListOrString: TypeAlias = Union[Tuple[str, ...], List[str], str]
40 |
41 |
42 | @dataclass
43 | class Browser:
44 | """Represents a browser specification with name, min/max version, and HTTP version"""
45 |
46 | name: str
47 | min_version: Optional[int] = None
48 | max_version: Optional[int] = None
49 | http_version: Union[str, int] = '2'
50 |
51 | def __post_init__(self):
52 | # Convert http_version to
53 | if isinstance(self.http_version, int):
54 | self.http_version = str(self.http_version)
55 | # Confirm min_version < max_version
56 | if (
57 | isinstance(self.min_version, int)
58 | and isinstance(self.max_version, int)
59 | and self.min_version > self.max_version
60 | ):
61 | raise ValueError(
62 | f'Browser min version constraint ({self.min_version}) cannot exceed max version ({self.max_version})'
63 | )
64 |
65 |
66 | @dataclass
67 | class HttpBrowserObject:
68 | """Represents an HTTP browser object with name, version, complete string, and HTTP version"""
69 |
70 | name: Optional[str]
71 | version: Tuple[int, ...]
72 | complete_string: str
73 | http_version: str
74 |
75 | @property
76 | def is_http2(self):
77 | return self.http_version == '2'
78 |
79 |
80 | class HeaderGenerator:
81 | """Generates HTTP headers based on a set of constraints"""
82 |
83 | relaxation_order: Tuple[str, ...] = ('locales', 'devices', 'operatingSystems', 'browsers')
84 |
85 | # Initialize networks
86 | input_generator_network = BayesianNetwork(get_input_network())
87 | header_generator_network = BayesianNetwork(get_header_network())
88 |
89 | def __init__(
90 | self,
91 | browser: Union[ListOrString, Iterable[Browser]] = SUPPORTED_BROWSERS,
92 | os: ListOrString = SUPPORTED_OPERATING_SYSTEMS,
93 | device: ListOrString = SUPPORTED_DEVICES,
94 | locale: ListOrString = 'en-US',
95 | http_version: Literal[1, 2] = 2,
96 | strict: bool = False,
97 | ):
98 | """
99 | Initializes the HeaderGenerator with the given options.
100 |
101 | Parameters:
102 | browser (Union[ListOrString, Iterable[Browser]], optional): Browser(s) or Browser object(s).
103 | os (ListOrString, optional): Operating system(s) to generate headers for.
104 | device (ListOrString, optional): Device(s) to generate the headers for.
105 | locale (ListOrString, optional): List of at most 10 languages for the Accept-Language header. Default is 'en-US'.
106 | http_version (Literal[1, 2], optional): Http version to be used to generate headers. Defaults to 2.
107 | strict (bool, optional): Throws an error if it cannot generate headers based on the input. Defaults to False.
108 | """
109 | http_ver: str = str(http_version)
110 |
111 | self.options = {
112 | 'browsers': self._prepare_browsers_config(tuplify(browser), http_ver),
113 | 'os': tuplify(os),
114 | 'devices': tuplify(device),
115 | 'locales': tuplify(locale),
116 | 'http_version': http_ver,
117 | 'strict': strict,
118 | }
119 | # Loader orders
120 | self.unique_browsers = self._load_unique_browsers()
121 | self.headers_order = self._load_headers_order()
122 |
123 | def generate(
124 | self,
125 | *,
126 | browser: Optional[Iterable[Union[str, Browser]]] = None,
127 | os: Optional[ListOrString] = None,
128 | device: Optional[ListOrString] = None,
129 | locale: Optional[ListOrString] = None,
130 | http_version: Optional[Literal[1, 2]] = None,
131 | user_agent: Optional[ListOrString] = None,
132 | strict: Optional[bool] = None,
133 | request_dependent_headers: Optional[Dict[str, str]] = None,
134 | ):
135 | """
136 | Generates headers using the default options and their possible overrides.
137 |
138 | Parameters:
139 | browser (Optional[Iterable[Union[str, Browser]]], optional): Browser(s) to generate the headers for.
140 | os (Optional[ListOrString], optional): Operating system(s) to generate the headers for.
141 | device (Optional[ListOrString], optional): Device(s) to generate the headers for.
142 | locale (Optional[ListOrString], optional): Language(s) to include in the Accept-Language header.
143 | http_version (Optional[Literal[1, 2]], optional): HTTP version to be used to generate headers.
144 | user_agent (Optional[ListOrString], optional): User-Agent(s) to use.
145 | request_dependent_headers (Optional[Dict[str, str]], optional): Known values of request-dependent headers.
146 | strict (Optional[bool], optional): If true, throws an error if it cannot generate headers based on the input.
147 | """
148 |
149 | options = {
150 | 'browsers': tuplify(browser),
151 | 'os': tuplify(os),
152 | 'devices': tuplify(device),
153 | 'locales': tuplify(locale),
154 | 'http_version': str(http_version) if http_version else None,
155 | 'strict': strict,
156 | 'user_agent': tuplify(user_agent),
157 | 'request_dependent_headers': request_dependent_headers,
158 | }
159 | generated: Dict[str, str] = self._get_headers(
160 | **{k: v for k, v in options.items() if v is not None}
161 | )
162 | if (options['http_version'] or self.options['http_version']) == '2':
163 | return pascalize_headers(generated)
164 | return generated
165 |
166 | def _get_headers(
167 | self,
168 | request_dependent_headers: Optional[Dict[str, str]] = None,
169 | user_agent: Optional[Iterable[str]] = None,
170 | **options: Any,
171 | ) -> Dict[str, str]:
172 | """
173 | Generates HTTP headers based on the given constraints.
174 |
175 | Parameters:
176 | request_dependent_headers (Dict[str, str], optional): Dictionary of request-dependent headers.
177 | user_agent (Iterable[str], optional): User-Agent value(s).
178 | **options (Any): Additional options for header generation.
179 |
180 | Returns:
181 | Dict[str, str]: Dictionary of generated HTTP headers.
182 | """
183 | if request_dependent_headers is None:
184 | request_dependent_headers = {}
185 |
186 | # Process new options
187 | if 'browsers' in options or (
188 | # if a unique http_version was passed
189 | 'http_version' in options
190 | and options['http_version'] != self.options['http_version']
191 | ):
192 | self._update_http_version(options)
193 |
194 | header_options = {**self.options, **options}
195 | possible_attribute_values = self._get_possible_attribute_values(header_options)
196 |
197 | if user_agent:
198 | # evaluate iterable
199 | if not isinstance(user_agent, (tuple, list)):
200 | user_agent = tuple(user_agent)
201 | http1_values, http2_values = (
202 | get_possible_values(self.header_generator_network, {'User-Agent': user_agent}),
203 | get_possible_values(self.header_generator_network, {'user-agent': user_agent}),
204 | )
205 | else:
206 | http1_values, http2_values = {}, {}
207 |
208 | constraints = self._prepare_constraints(
209 | possible_attribute_values, http1_values, http2_values
210 | )
211 |
212 | input_sample = self.input_generator_network.generate_consistent_sample_when_possible(
213 | constraints
214 | )
215 | if not input_sample:
216 | if header_options['http_version'] == '1':
217 | headers2 = self._get_headers(
218 | request_dependent_headers, user_agent, **options, http_version='2'
219 | )
220 | return self.order_headers(pascalize_headers(headers2))
221 |
222 | relaxation_index = next(
223 | (i for i, key in enumerate(self.relaxation_order) if key in options), -1
224 | )
225 | if header_options['strict'] or relaxation_index == -1:
226 | raise ValueError(
227 | 'No headers based on this input can be generated. Please relax or change some of the requirements you specified.'
228 | )
229 |
230 | relaxed_options = {**options}
231 | del relaxed_options[self.relaxation_order[relaxation_index]]
232 | return self._get_headers(request_dependent_headers, user_agent, **relaxed_options)
233 |
234 | generated_sample = self.header_generator_network.generate_sample(input_sample)
235 | generated_http_and_browser = self._prepare_http_browser_object(
236 | generated_sample['*BROWSER_HTTP']
237 | )
238 |
239 | # Add Accept-Language header
240 | accept_language_field_name = (
241 | 'accept-language' if generated_http_and_browser.is_http2 else 'Accept-Language'
242 | )
243 | generated_sample[accept_language_field_name] = self._get_accept_language_header(
244 | header_options['locales']
245 | )
246 |
247 | # Add Sec headers
248 | if self._should_add_sec_fetch(generated_http_and_browser):
249 | if generated_http_and_browser.is_http2:
250 | generated_sample.update(HTTP2_SEC_FETCH_ATTRIBUTES)
251 | else:
252 | generated_sample.update(HTTP1_SEC_FETCH_ATTRIBUTES)
253 |
254 | # Ommit connection, close, and missing value headers
255 | generated_sample = {
256 | k: v
257 | for k, v in generated_sample.items()
258 | if not (
259 | k.lower() == 'connection'
260 | and v == 'close'
261 | or k.startswith('*')
262 | or v == MISSING_VALUE_DATASET_TOKEN
263 | )
264 | }
265 |
266 | # Reorder headers
267 | return self.order_headers({**generated_sample, **request_dependent_headers})
268 |
269 | def _update_http_version(
270 | self,
271 | options: Dict[str, Any],
272 | ):
273 | """
274 | Prepares options when a `browsers` or `http_version` kwarg is passed to .generate.
275 |
276 | Parameters:
277 | options (Dict[str, Any]): Other arguments.
278 | """
279 | if 'http_version' in options:
280 | http_version = options['http_version']
281 | else:
282 | http_version = self.options['http_version']
283 |
284 | if 'browsers' in options:
285 | options['browsers'] = self._prepare_browsers_config(options['browsers'], http_version)
286 | else:
287 | # Create a copy of the class browsers with an updated http_version
288 | options['browsers'] = [
289 | (
290 | Browser(
291 | name=brwsr.name,
292 | min_version=brwsr.min_version,
293 | max_version=brwsr.max_version,
294 | http_version=http_version,
295 | )
296 | if isinstance(brwsr, Browser)
297 | else Browser(name=brwsr, http_version=http_version)
298 | )
299 | for brwsr in self.options['browsers']
300 | ]
301 |
302 | def _prepare_browsers_config(
303 | self, browsers: Iterable[Union[str, Browser]], http_version: str
304 | ) -> List[Browser]:
305 | """
306 | Prepares the browser configuration based on the given browsers and HTTP version.
307 |
308 | Parameters:
309 | browsers (Iterable[Union[str, Browser]]): Supported browsers or Browser objects.
310 | http_version (str): HTTP version ('1' or '2').
311 |
312 | Returns:
313 | List[Browser]: List of Browser objects.
314 | """
315 | return [
316 | (
317 | Browser(name=browser, http_version=http_version)
318 | if isinstance(browser, str)
319 | else browser
320 | )
321 | for browser in browsers
322 | ]
323 |
324 | def _get_browser_http_options(self, browsers: Iterable[Browser]) -> List[str]:
325 | """
326 | Retrieves the browser HTTP options based on the given browser specifications.
327 |
328 | Parameters:
329 | browsers (Iterable[Browser]): Iterable of Browser objects.
330 |
331 | Returns:
332 | List[str]: List of browser HTTP options.
333 | """
334 | return [
335 | browser_option.complete_string
336 | for browser in browsers
337 | for browser_option in self.unique_browsers
338 | if browser.name == browser_option.name
339 | and (not browser.min_version or browser.min_version <= browser_option.version[0])
340 | and (not browser.max_version or browser.max_version >= browser_option.version[0])
341 | and (not browser.http_version or browser.http_version == browser_option.http_version)
342 | ]
343 |
344 | def order_headers(self, headers: Dict[str, str]) -> Dict[str, str]:
345 | """
346 | Orders the headers based on the browser-specific header order.
347 |
348 | Parameters:
349 | headers (Dict[str, str]): Dictionary of headers.
350 |
351 | Returns:
352 | Dict[str, str]: Ordered dictionary of headers.
353 | """
354 | # get the browser name
355 | user_agent = get_user_agent(headers)
356 | if user_agent is None:
357 | raise ValueError("Failed to find User-Agent in generated response")
358 | browser_name = get_browser(user_agent)
359 | if browser_name is None:
360 | raise ValueError("Failed to find browser in User-Agent")
361 |
362 | header_order = self.headers_order.get(browser_name)
363 | # Order headers according to the specific browser's header order
364 | return (
365 | {key: headers[key] for key in header_order if key in headers}
366 | if header_order
367 | else headers
368 | )
369 |
370 | def _get_possible_attribute_values(
371 | self, header_options: Dict[str, Any]
372 | ) -> Dict[str, List[str]]:
373 | """
374 | Retrieves the possible attribute values based on the given header options.
375 |
376 | Parameters:
377 | header_options (Dict[str, Any]): Dictionary of header options.
378 |
379 | Returns:
380 | Dict[str, List[str]]: Dictionary of possible attribute values.
381 | """
382 | browsers = self._prepare_browsers_config(
383 | header_options.get('browsers', ()),
384 | header_options.get('http_version', '2'),
385 | )
386 | browser_http_options = self._get_browser_http_options(browsers)
387 |
388 | possible_attribute_values = {
389 | '*BROWSER_HTTP': browser_http_options,
390 | '*OPERATING_SYSTEM': header_options.get('os', SUPPORTED_OPERATING_SYSTEMS),
391 | }
392 | if 'devices' in header_options:
393 | possible_attribute_values['*DEVICE'] = header_options['devices']
394 |
395 | return possible_attribute_values
396 |
397 | def _should_add_sec_fetch(self, browser: HttpBrowserObject) -> bool:
398 | """
399 | Determines whether Sec-Fetch headers should be added based on the user agent.
400 |
401 | Parameters:
402 | browser (HttpBrowserObject): Browser object.
403 |
404 | Returns:
405 | bool: True if Sec-Fetch headers should be added, False otherwise.
406 | """
407 | if browser.name == 'chrome' and browser.version[0] >= 76:
408 | return True
409 | if browser.name == 'firefox' and browser.version[0] >= 90:
410 | return True
411 | if browser.name == 'edge' and browser.version[0] >= 79:
412 | return True
413 | return False
414 |
415 | def _get_accept_language_header(self, locales: ListOrString) -> str:
416 | """
417 | Generates the Accept-Language header based on the given locales.
418 |
419 | Parameters:
420 | locales (ListOrString): Locale(s).
421 |
422 | Returns:
423 | str: Accept-Language header string.
424 | """
425 | return ', '.join(
426 | f"{locale};q={1.0 - index * 0.1:.1f}" for index, locale in enumerate(locales)
427 | )
428 |
429 | def _load_headers_order(self) -> Dict[str, List[str]]:
430 | """
431 | Loads the headers order from the headers-order.json file.
432 |
433 | Returns:
434 | Dict[str, List[str]]: Dictionary of headers order for each browser.
435 | """
436 | return json.loads(get_headers_order().read_bytes())
437 |
438 | def _load_unique_browsers(self) -> List[HttpBrowserObject]:
439 | """
440 | Loads the unique browsers from the browser-helper-file.json file.
441 |
442 | Returns:
443 | List[HttpBrowserObject]: List of HttpBrowserObject instances.
444 | """
445 | unique_browser_strings = json.loads(get_browser_helper_file().read_bytes())
446 | return [
447 | self._prepare_http_browser_object(browser_str)
448 | for browser_str in unique_browser_strings
449 | if browser_str != MISSING_VALUE_DATASET_TOKEN
450 | ]
451 |
452 | def _prepare_constraints(
453 | self,
454 | possible_attribute_values: Dict[str, List[str]],
455 | http1_values: Dict[str, Any],
456 | http2_values: Dict[str, Any],
457 | ) -> Dict[str, Iterable[str]]:
458 | """
459 | Prepares the constraints for generating consistent samples.
460 |
461 | Parameters:
462 | possible_attribute_values (Dict[str, List[str]]): Dictionary of possible attribute values.
463 | http1_values (Dict[str, Any]): Dictionary of HTTP/1 values.
464 | http2_values (Dict[str, Any]): Dictionary of HTTP/2 values.
465 |
466 | Returns:
467 | Dict[str, Iterable[str]]: Dictionary of constraints for each attribute.
468 | """
469 | return {
470 | key: tuple(
471 | filter(
472 | lambda x: (
473 | self.filter_browser_http(x, http1_values, http2_values)
474 | if key == '*BROWSER_HTTP'
475 | else self.filter_other_values(x, http1_values, http2_values, key)
476 | ),
477 | values,
478 | )
479 | )
480 | for key, values in possible_attribute_values.items()
481 | }
482 |
483 | @staticmethod
484 | def filter_browser_http(
485 | value: str, http1_values: Dict[str, Any], http2_values: Dict[str, Any]
486 | ) -> bool:
487 | """
488 | Filters the browser HTTP value based on the HTTP/1 and HTTP/2 values.
489 |
490 | Parameters:
491 | value (str): Browser HTTP value.
492 | http1_values (Dict[str, Any]): Dictionary of HTTP/1 values.
493 | http2_values (Dict[str, Any]): Dictionary of HTTP/2 values.
494 |
495 | Returns:
496 | bool: True if the value should be included, False otherwise.
497 | """
498 | browser_name, http_version = value.split('|')
499 | return (
500 | (not http1_values or browser_name in http1_values.get('*BROWSER', ()))
501 | if http_version == '1'
502 | else (not http2_values or browser_name in http2_values.get('*BROWSER', ()))
503 | )
504 |
505 | @staticmethod
506 | def filter_other_values(
507 | value: str, http1_values: Dict[str, Any], http2_values: Dict[str, Any], key: str
508 | ) -> bool:
509 | """
510 | Filters the other attribute values based on the HTTP/1 and HTTP/2 values.
511 |
512 | Parameters:
513 | value (str): Attribute value.
514 | http1_values (Dict[str, Any]): Dictionary of HTTP/1 values.
515 | http2_values (Dict[str, Any]): Dictionary of HTTP/2 values.
516 | key (str): Attribute key.
517 |
518 | Returns:
519 | bool: True if the value should be included, False otherwise.
520 | """
521 | if http1_values or http2_values:
522 | return value in http1_values.get(key, ()) or value in http2_values.get(key, ())
523 | return True
524 |
525 | def _prepare_http_browser_object(self, http_browser_string: str) -> HttpBrowserObject:
526 | """
527 | Extracts structured information about a browser and HTTP version from a string.
528 |
529 | Parameters:
530 | http_browser_string (str): HTTP browser string.
531 |
532 | Returns:
533 | HttpBrowserObject: HttpBrowserObject instance.
534 | """
535 | browser_string, http_version = http_browser_string.split('|')
536 | if browser_string == MISSING_VALUE_DATASET_TOKEN:
537 | return HttpBrowserObject(
538 | name=None, version=(), complete_string=MISSING_VALUE_DATASET_TOKEN, http_version=''
539 | )
540 |
541 | browser_name, version_string = browser_string.split('/')
542 | version_parts = version_string.split('.')
543 | version = tuple(int(part) for part in version_parts)
544 | return HttpBrowserObject(
545 | name=browser_name,
546 | version=version,
547 | complete_string=http_browser_string,
548 | http_version=http_version,
549 | )
550 |
--------------------------------------------------------------------------------
/browserforge/headers/utils.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Iterable, Optional
2 |
3 |
4 | def get_user_agent(headers: Dict[str, str]) -> Optional[str]:
5 | """
6 | Retrieves the User-Agent from the headers dictionary.
7 | """
8 | return headers.get('User-Agent') or headers.get('user-agent')
9 |
10 |
11 | def get_browser(user_agent: str) -> Optional[str]:
12 | """
13 | Determines the browser name from the User-Agent string.
14 | """
15 | if any(alias in user_agent for alias in ('Firefox', 'FxiOS')):
16 | return 'firefox'
17 | elif any(alias in user_agent for alias in ('Chrome', 'CriOS')):
18 | return 'chrome'
19 | elif 'Safari' in user_agent:
20 | return 'safari'
21 | elif any(alias in user_agent for alias in ('Edge', 'EdgA', 'Edg', 'EdgiOS')):
22 | return 'edge'
23 | return None
24 |
25 |
26 | PASCALIZE_UPPER = {'dnt', 'rtt', 'ect'}
27 |
28 |
29 | def pascalize(name: str) -> str:
30 | # ignore
31 | if name.startswith(':') or name.startswith('sec-ch-ua'):
32 | return name
33 | # uppercase
34 | if name in PASCALIZE_UPPER:
35 | return name.upper()
36 | return name.title()
37 |
38 |
39 | def pascalize_headers(headers: Dict[str, str]) -> Dict[str, str]:
40 | return {pascalize(key): value for key, value in headers.items()}
41 |
42 |
43 | def tuplify(obj: Any):
44 | if (isinstance(obj, Iterable) and not isinstance(obj, str)) or obj is None:
45 | return obj
46 | return (obj,)
47 |
--------------------------------------------------------------------------------
/browserforge/injectors/__init__.py:
--------------------------------------------------------------------------------
1 | from . import utils
2 |
--------------------------------------------------------------------------------
/browserforge/injectors/data/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | utils.js.xz contains a compressed version of the utils.js file
3 | from Apify's fingerprint-injector:
4 |
5 | https://github.com/apify/fingerprint-suite/blob/master/packages/fingerprint-injector/src/utils.js
6 |
7 | Its purpose is to inject a Fingerprint object into a browser page.
8 |
9 | Copyright 2018 Apify Technologies s.r.o.
10 | """
11 |
--------------------------------------------------------------------------------
/browserforge/injectors/data/utils.js.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijro/browserforge/99dd114332c17e895469107847e7193e2832504a/browserforge/injectors/data/utils.js.xz
--------------------------------------------------------------------------------
/browserforge/injectors/playwright/__init__.py:
--------------------------------------------------------------------------------
1 | from browserforge.injectors.utils import CheckIfInstalled
2 |
3 | CheckIfInstalled('playwright')
4 |
5 | from .injector import AsyncNewContext, NewContext
6 |
--------------------------------------------------------------------------------
/browserforge/injectors/playwright/injector.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Optional
2 |
3 | from browserforge.fingerprints import Fingerprint
4 | from browserforge.injectors.utils import InjectFunction, _fingerprint, only_injectable_headers
5 |
6 | from playwright.async_api import Browser as AsyncBrowser
7 | from playwright.async_api import BrowserContext as AsyncBrowserContext
8 | from playwright.sync_api import Browser, BrowserContext
9 |
10 |
11 | async def AsyncNewContext(
12 | browser: AsyncBrowser,
13 | fingerprint: Optional[Fingerprint] = None,
14 | fingerprint_options: Optional[Dict] = None,
15 | **context_options,
16 | ) -> AsyncBrowserContext:
17 | """
18 | Injects an async_api Playwright context with a Fingerprint.
19 |
20 | Parameters:
21 | browser (Browser): The browser to create the context in
22 | fingerprint (Optional[Fingerprint]): The fingerprint to inject. If None, one will be generated
23 | fingerprint_options (Optional[Dict]): Options for the Fingerprint generator if `fingerprint` is not passed
24 | **context_options: Other options for the new context
25 | """
26 | fingerprint = _fingerprint(fingerprint, fingerprint_options)
27 | function = InjectFunction(fingerprint)
28 | # Build new context
29 | context = await browser.new_context(**_context_options(fingerprint, context_options))
30 | # Set headers
31 | await context.set_extra_http_headers(
32 | only_injectable_headers(fingerprint.headers, browser.browser_type.name)
33 | )
34 |
35 | # Since there are no async lambdas, define a new async function for emulating dark scheme
36 | async def on_page(page):
37 | await page.emulate_media(color_scheme='dark')
38 |
39 | # Dark mode
40 | context.on("page", on_page)
41 |
42 | # Inject function
43 | await context.add_init_script(function)
44 |
45 | return context
46 |
47 |
48 | def NewContext(
49 | browser: Browser,
50 | fingerprint: Optional[Fingerprint] = None,
51 | fingerprint_options: Optional[Dict] = None,
52 | **context_options,
53 | ) -> BrowserContext:
54 | """
55 | Injects a sync_api Playwright context with a Fingerprint.
56 |
57 | Parameters:
58 | browser (Browser): The browser to create the context in
59 | fingerprint (Optional[Fingerprint]): The fingerprint to inject. If None, one will be generated
60 | fingerprint_options (Optional[Dict]): Options for the Fingerprint generator if `fingerprint` is not passed
61 | **context_options: Other options for the new context
62 | """
63 | fingerprint = _fingerprint(fingerprint, fingerprint_options)
64 | function = InjectFunction(fingerprint)
65 | # Build new context
66 | context = browser.new_context(**_context_options(fingerprint, context_options))
67 | # Set headers
68 | context.set_extra_http_headers(
69 | only_injectable_headers(fingerprint.headers, browser.browser_type.name)
70 | )
71 | # Dark mode
72 | context.on("page", lambda page: page.emulate_media(color_scheme='dark'))
73 |
74 | # Inject function
75 | context.add_init_script(function)
76 |
77 | return context
78 |
79 |
80 | def _context_options(
81 | fingerprint: Fingerprint,
82 | options: Dict,
83 | ):
84 | """
85 | Builds options for new context
86 | """
87 | return {
88 | 'user_agent': fingerprint.navigator.userAgent,
89 | 'color_scheme': 'dark',
90 | 'viewport': {
91 | 'width': fingerprint.screen.width,
92 | 'height': fingerprint.screen.height,
93 | **options.pop('viewport', {}),
94 | },
95 | 'extra_http_headers': {
96 | 'accept-language': fingerprint.headers['Accept-Language'],
97 | **options.pop('extra_http_headers', {}),
98 | },
99 | 'device_scale_factor': fingerprint.screen.devicePixelRatio,
100 | **options,
101 | }
102 |
--------------------------------------------------------------------------------
/browserforge/injectors/pyppeteer/__init__.py:
--------------------------------------------------------------------------------
1 | from browserforge.injectors.utils import CheckIfInstalled
2 |
3 | CheckIfInstalled('pyppeteer')
4 |
5 | from .injector import NewPage
6 |
--------------------------------------------------------------------------------
/browserforge/injectors/pyppeteer/injector.py:
--------------------------------------------------------------------------------
1 | import re
2 | from typing import Dict, Optional
3 |
4 | from pyppeteer.browser import Browser
5 | from pyppeteer.page import Page
6 |
7 | from browserforge.fingerprints import Fingerprint
8 | from browserforge.injectors.utils import InjectFunction, _fingerprint, only_injectable_headers
9 |
10 |
11 | async def NewPage(
12 | browser: Browser,
13 | fingerprint: Optional[Fingerprint] = None,
14 | fingerprint_options: Optional[Dict] = None,
15 | ) -> Page:
16 | """
17 | Injects a Pyppeteer browser object with a Fingerprint.
18 |
19 | Parameters:
20 | browser (Browser): The browser to create the context in
21 | fingerprint (Optional[Fingerprint]): The fingerprint to inject. If None, one will be generated
22 | fingerprint_options (Optional[Dict]): Options for the Fingerprint generator if `fingerprint` is not passed
23 | """
24 | fingerprint = _fingerprint(fingerprint, fingerprint_options)
25 | function = InjectFunction(fingerprint)
26 | # create a new page
27 | page = await browser.newPage()
28 |
29 | await page.setUserAgent(fingerprint.navigator.userAgent)
30 |
31 | # Pyppeteer does not support firefox, so we can ignore checks
32 | cdp_sess = await page.target.createCDPSession()
33 | await cdp_sess.send(
34 | 'Page.setDeviceMetricsOverride',
35 | {
36 | 'screenHeight': fingerprint.screen.height,
37 | 'screenWidth': fingerprint.screen.width,
38 | 'width': fingerprint.screen.width,
39 | 'height': fingerprint.screen.height,
40 | 'mobile': any(
41 | name in fingerprint.navigator.userAgent for name in ('phone', 'android', 'mobile')
42 | ),
43 | 'screenOrientation': (
44 | {'angle': 0, 'type': 'portraitPrimary'}
45 | if fingerprint.screen.height > fingerprint.screen.width
46 | else {'angle': 90, 'type': 'landscapePrimary'}
47 | ),
48 | 'deviceScaleFactor': fingerprint.screen.devicePixelRatio,
49 | },
50 | )
51 | await page.setExtraHTTPHeaders(only_injectable_headers(fingerprint.headers, 'chrome'))
52 |
53 | # Only set to dark mode if the Chrome version >= 76
54 | version = re.search('.*?/(\d+)[\d\.]+?', await browser.version())
55 | if version and int(version[1]) >= 76:
56 | await page._client.send(
57 | 'Emulation.setEmulatedMedia',
58 | {'features': [{'name': 'prefers-color-scheme', 'value': 'dark'}]},
59 | )
60 |
61 | # Inject function
62 | await page.evaluateOnNewDocument(function)
63 | return page
64 |
--------------------------------------------------------------------------------
/browserforge/injectors/undetected_playwright/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The undetected_playwright injector is a 1:1 copy of the playwright injector,
3 | using the "undetected_playwright" import name for typing purposes.
4 | """
5 |
6 | from browserforge.injectors.utils import CheckIfInstalled
7 |
8 | CheckIfInstalled('undetected_playwright')
9 |
10 | from .injector import AsyncNewContext, NewContext
11 |
--------------------------------------------------------------------------------
/browserforge/injectors/undetected_playwright/injector.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Optional
2 |
3 | from browserforge.fingerprints import Fingerprint
4 | from browserforge.injectors.utils import InjectFunction, _fingerprint, only_injectable_headers
5 |
6 | from undetected_playwright.async_api import Browser as AsyncBrowser
7 | from undetected_playwright.async_api import BrowserContext as AsyncBrowserContext
8 | from undetected_playwright.sync_api import Browser, BrowserContext
9 |
10 |
11 | async def AsyncNewContext(
12 | browser: AsyncBrowser,
13 | fingerprint: Optional[Fingerprint] = None,
14 | fingerprint_options: Optional[Dict] = None,
15 | **context_options,
16 | ) -> AsyncBrowserContext:
17 | """
18 | Injects an async_api Undetected-Playwright context with a Fingerprint.
19 |
20 | Parameters:
21 | browser (Browser): The browser to create the context in
22 | fingerprint (Optional[Fingerprint]): The fingerprint to inject. If None, one will be generated
23 | fingerprint_options (Optional[Dict]): Options for the Fingerprint generator if `fingerprint` is not passed
24 | **context_options: Other options for the new context
25 | """
26 | fingerprint = _fingerprint(fingerprint, fingerprint_options)
27 | function = InjectFunction(fingerprint)
28 | # Build new context
29 | context = await browser.new_context(**_context_options(fingerprint, context_options))
30 | # Set headers
31 | await context.set_extra_http_headers(
32 | only_injectable_headers(fingerprint.headers, browser.browser_type.name)
33 | )
34 |
35 | # Since there are no async lambdas, define a new async function for emulating dark scheme
36 | async def on_page(page):
37 | await page.emulate_media(color_scheme='dark')
38 |
39 | # Dark mode
40 | context.on("page", on_page)
41 |
42 | # Inject function
43 | await context.add_init_script(function)
44 |
45 | return context
46 |
47 |
48 | def NewContext(
49 | browser: Browser,
50 | fingerprint: Optional[Fingerprint] = None,
51 | fingerprint_options: Optional[Dict] = None,
52 | **context_options,
53 | ) -> BrowserContext:
54 | """
55 | Injects a sync_api Undetected-Playwright context with a Fingerprint.
56 |
57 | Parameters:
58 | browser (Browser): The browser to create the context in
59 | fingerprint (Optional[Fingerprint]): The fingerprint to inject. If None, one will be generated
60 | fingerprint_options (Optional[Dict]): Options for the Fingerprint generator if `fingerprint` is not passed
61 | **context_options: Other options for the new context
62 | """
63 | fingerprint = _fingerprint(fingerprint, fingerprint_options)
64 | function = InjectFunction(fingerprint)
65 | # Build new context
66 | context = browser.new_context(**_context_options(fingerprint, context_options))
67 | # Set headers
68 | context.set_extra_http_headers(
69 | only_injectable_headers(fingerprint.headers, browser.browser_type.name)
70 | )
71 | # Dark mode
72 | context.on("page", lambda page: page.emulate_media(color_scheme='dark'))
73 |
74 | # Inject function
75 | context.add_init_script(function)
76 |
77 | return context
78 |
79 |
80 | def _context_options(
81 | fingerprint: Fingerprint,
82 | options: Dict,
83 | ):
84 | """
85 | Builds options for new context
86 | """
87 | return {
88 | 'user_agent': fingerprint.navigator.userAgent,
89 | 'color_scheme': 'dark',
90 | 'viewport': {
91 | 'width': fingerprint.screen.width,
92 | 'height': fingerprint.screen.height,
93 | **options.pop('viewport', {}),
94 | },
95 | 'extra_http_headers': {
96 | 'accept-language': fingerprint.headers['Accept-Language'],
97 | **options.pop('extra_http_headers', {}),
98 | },
99 | **options,
100 | }
101 |
--------------------------------------------------------------------------------
/browserforge/injectors/utils.py:
--------------------------------------------------------------------------------
1 | import lzma
2 | from pathlib import Path
3 | from random import randrange
4 | from typing import Dict, Optional, Set
5 |
6 | from browserforge.fingerprints import Fingerprint, FingerprintGenerator
7 |
8 | UTILS_JS: Path = Path(__file__).parent / 'data/utils.js.xz'
9 |
10 | request_headers: Set[str] = {
11 | 'accept-encoding',
12 | 'accept',
13 | 'cache-control',
14 | 'pragma',
15 | 'sec-fetch-dest',
16 | 'sec-fetch-mode',
17 | 'sec-fetch-site',
18 | 'sec-fetch-user',
19 | 'upgrade-insecure-requests',
20 | }
21 |
22 |
23 | def only_injectable_headers(headers: Dict[str, str], browser_name: str) -> Dict[str, str]:
24 | """
25 | Some HTTP headers depend on the request (for example Accept (with values application/json, image/png) etc.).
26 | This function filters out those headers and leaves only the browser-wide ones.
27 | """
28 | filtered_headers = {k: v for k, v in headers.items() if k.lower() not in request_headers}
29 |
30 | # Chromium-based controlled browsers do not support `te` header.
31 | # Remove the `te` header if the browser is not Firefox
32 | if browser_name and 'firefox' not in browser_name.lower():
33 | if 'te' in filtered_headers:
34 | del filtered_headers['te']
35 | if 'Te' in filtered_headers:
36 | del filtered_headers['Te']
37 |
38 | return filtered_headers
39 |
40 |
41 | def InjectFunction(fingerprint: Fingerprint) -> str:
42 | return f"""
43 | (()=>{{
44 | {utils_js()}
45 |
46 | const fp = {fingerprint.dumps()};
47 | const {{
48 | battery,
49 | navigator: {{
50 | userAgentData,
51 | webdriver,
52 | ...navigatorProps
53 | }},
54 | screen: allScreenProps,
55 | videoCard,
56 | audioCodecs,
57 | videoCodecs,
58 | mockWebRTC,
59 | }} = fp;
60 |
61 | slim = fp.slim;
62 |
63 | const historyLength = {randrange(1, 6)};
64 |
65 | const {{
66 | outerHeight,
67 | outerWidth,
68 | devicePixelRatio,
69 | innerWidth,
70 | innerHeight,
71 | screenX,
72 | pageXOffset,
73 | pageYOffset,
74 | clientWidth,
75 | clientHeight,
76 | hasHDR,
77 | ...newScreen
78 | }} = allScreenProps;
79 |
80 | const windowScreenProps = {{
81 | innerHeight,
82 | outerHeight,
83 | outerWidth,
84 | innerWidth,
85 | screenX,
86 | pageXOffset,
87 | pageYOffset,
88 | devicePixelRatio,
89 | }};
90 |
91 | const documentScreenProps = {{
92 | clientHeight,
93 | clientWidth,
94 | }};
95 |
96 | runHeadlessFixes();
97 | if (mockWebRTC) blockWebRTC();
98 | if (slim) {{
99 | window['slim'] = true;
100 | }}
101 | overrideIntlAPI(navigatorProps.language);
102 | overrideStatic();
103 | if (userAgentData) {{
104 | overrideUserAgentData(userAgentData);
105 | }}
106 | if (window.navigator.webdriver) {{
107 | navigatorProps.webdriver = false;
108 | }}
109 | overrideInstancePrototype(window.navigator, navigatorProps);
110 | overrideInstancePrototype(window.screen, newScreen);
111 | overrideWindowDimensionsProps(windowScreenProps);
112 | overrideDocumentDimensionsProps(documentScreenProps);
113 | overrideInstancePrototype(window.history, {{ length: historyLength }});
114 | overrideWebGl(videoCard);
115 | overrideCodecs(audioCodecs, videoCodecs);
116 | overrideBattery(battery);
117 | }})()
118 | """
119 |
120 |
121 | def utils_js() -> str:
122 | """
123 | Opens and uncompresses the utils.js file and returns it as a string
124 | """
125 | with lzma.open(UTILS_JS, 'rt') as f:
126 | return f.read()
127 |
128 |
129 | def _fingerprint(
130 | fingerprint: Optional[Fingerprint] = None, fingerprint_options: Optional[Dict] = None
131 | ) -> Fingerprint:
132 | """
133 | Generates a fingerprint if one doesnt exist
134 | """
135 | if fingerprint:
136 | return fingerprint
137 | generator = FingerprintGenerator()
138 | return generator.generate(**(fingerprint_options or {}))
139 |
140 |
141 | def CheckIfInstalled(module_name: str):
142 | """
143 | Checks if a module is installed
144 | """
145 | import importlib.util
146 |
147 | return importlib.util.find_spec(module_name) is not None
148 |
--------------------------------------------------------------------------------
/browserforge/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijro/browserforge/99dd114332c17e895469107847e7193e2832504a/browserforge/py.typed
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["poetry-core>=1.0.0"]
3 | build-backend = "poetry.core.masonry.api"
4 |
5 | [tool.poetry]
6 | name = "browserforge"
7 | version = "1.2.4"
8 | description = "Intelligent browser header & fingerprint generator"
9 | authors = ["daijro "]
10 | license = "Apache-2.0"
11 | readme = "README.md"
12 | repository = "https://github.com/daijro/browserforge"
13 | keywords = [
14 | "client",
15 | "headers",
16 | "fingerprint",
17 | "generator",
18 | "browser",
19 | "http",
20 | "scraping",
21 | "requests",
22 | "playwright",
23 | ]
24 | classifiers = [
25 | "Topic :: Internet :: WWW/HTTP",
26 | "Topic :: Internet :: WWW/HTTP :: Browsers",
27 | "Topic :: Software Development :: Libraries :: Python Modules",
28 | ]
29 |
30 | [tool.poetry.dependencies]
31 | python = "^3.8"
32 | click = "*"
33 | orjson = { version = "*", optional = true }
34 | typing_extensions = {version = "*", python = "<3.10"}
35 | apify_fingerprint_datapoints = "*"
36 |
37 | [tool.poetry.extras]
38 | all = ["orjson"]
39 |
--------------------------------------------------------------------------------