├── .github
    └── workflows
    │   └── release.yml
├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── assets
    ├── example-output.json
    └── update.py
├── fpgen
    ├── __init__.py
    ├── __main__.py
    ├── bayesian_network.py
    ├── exceptions.py
    ├── generator.py
    ├── pkgman.py
    ├── py.typed
    ├── structs.py
    ├── trace.py
    ├── unpacker.py
    └── utils.py
├── pyproject.toml
└── tests
    ├── README.md
    ├── failed_cases.py
    ├── generator_matches_trace.py
    ├── profile.py
    ├── speed_test.py
    └── user_inputs.py


/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Release to PyPi
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     tags:
 7 |       - 'v*'  # Only trigger on tags that start with 'v'
 8 | 
 9 | permissions:
10 |   contents: read
11 | 
12 | jobs:
13 |   build:
14 |     runs-on: ubuntu-latest
15 |     env:
16 |       TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
17 | 
18 |     steps:
19 |     - uses: actions/checkout@v4
20 | 
21 |     - name: Install Python
22 |       run: |
23 |         sudo apt-get update
24 |         sudo apt-get install -y \
25 |           python3 \
26 |           python3-pip \
27 |           python3-dev
28 | 
29 |     - name: Install Python dependencies
30 |       run: |
31 |         python3 -m pip install twine build vermin
32 |     
33 |     - name: Release
34 |       run: |
35 |         make release


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Old files
  2 | _old/
  3 | _old_*/
  4 | *.old
  5 | 
  6 | # Model files
  7 | fpgen/**.json
  8 | *.zst
  9 | *.ndjson
 10 | *.dat
 11 | *.zip
 12 | data/
 13 | output/
 14 | 
 15 | # Tests
 16 | test*.py
 17 | diagnostics/
 18 | 
 19 | # Logs
 20 | *.log
 21 | *.kate-swp
 22 | 
 23 | 
 24 | # ==== DEFAULTS ====
 25 | 
 26 | # Byte-compiled / optimized / DLL files
 27 | __pycache__/
 28 | *.py[cod]
 29 | *$py.class
 30 | 
 31 | # C extensions
 32 | *.so
 33 | 
 34 | # Binaries
 35 | *.dll
 36 | *.dylib
 37 | 
 38 | # Distribution / packaging
 39 | .Python
 40 | build/
 41 | develop-eggs/
 42 | dist/
 43 | downloads/
 44 | eggs/
 45 | .eggs/
 46 | lib/
 47 | lib64/
 48 | parts/
 49 | sdist/
 50 | var/
 51 | wheels/
 52 | share/python-wheels/
 53 | *.egg-info/
 54 | .installed.cfg
 55 | *.egg
 56 | MANIFEST
 57 | 
 58 | # PyInstaller
 59 | #  Usually these files are written by a python script from a template
 60 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 61 | *.manifest
 62 | *.spec
 63 | 
 64 | # Installer logs
 65 | pip-log.txt
 66 | pip-delete-this-directory.txt
 67 | 
 68 | # Unit test / coverage reports
 69 | htmlcov/
 70 | .tox/
 71 | .nox/
 72 | .coverage
 73 | .coverage.*
 74 | .cache
 75 | nosetests.xml
 76 | coverage.xml
 77 | *.cover
 78 | *.py,cover
 79 | .hypothesis/
 80 | .pytest_cache/
 81 | cover/
 82 | 
 83 | # Translations
 84 | *.mo
 85 | *.pot
 86 | 
 87 | # Django stuff:
 88 | *.log
 89 | local_settings.py
 90 | db.sqlite3
 91 | db.sqlite3-journal
 92 | 
 93 | # Flask stuff:
 94 | instance/
 95 | .webassets-cache
 96 | 
 97 | # Scrapy stuff:
 98 | .scrapy
 99 | 
100 | # Sphinx documentation
101 | docs/_build/
102 | 
103 | # PyBuilder
104 | .pybuilder/
105 | target/
106 | 
107 | # Jupyter Notebook
108 | .ipynb_checkpoints
109 | 
110 | # IPython
111 | profile_default/
112 | ipython_config.py
113 | 
114 | # pyenv
115 | #   For a library or package, you might want to ignore these files since the code is
116 | #   intended to run in multiple environments; otherwise, check them in:
117 | # .python-version
118 | 
119 | # pipenv
120 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
121 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
122 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
123 | #   install all needed dependencies.
124 | #Pipfile.lock
125 | 
126 | # poetry
127 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
128 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
129 | #   commonly ignored for libraries.
130 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
131 | #poetry.lock
132 | 
133 | # pdm
134 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
135 | #pdm.lock
136 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
137 | #   in version control.
138 | #   https://pdm.fming.dev/#use-with-ide
139 | .pdm.toml
140 | 
141 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
142 | __pypackages__/
143 | 
144 | # Celery stuff
145 | celerybeat-schedule
146 | celerybeat.pid
147 | 
148 | # SageMath parsed files
149 | *.sage.py
150 | 
151 | # Environments
152 | .env
153 | .venv
154 | env/
155 | venv/
156 | ENV/
157 | env.bak/
158 | venv.bak/
159 | 
160 | # Spyder project settings
161 | .spyderproject
162 | .spyproject
163 | 
164 | # Rope project settings
165 | .ropeproject
166 | 
167 | # mkdocs documentation
168 | /site
169 | 
170 | # mypy
171 | .mypy_cache/
172 | .dmypy.json
173 | dmypy.json
174 | 
175 | # Pyre type checker
176 | .pyre/
177 | 
178 | # pytype static type analyzer
179 | .pytype/
180 | 
181 | # Cython debug symbols
182 | cython_debug/
183 | 
184 | # PyCharm
185 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
186 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
187 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
188 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
189 | #.idea/
190 | 
191 | # VsCode
192 | .vscode
193 | .trunk
194 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | vermin:
 2 | 	vermin . --eval-annotations --target=3.8 --violations fpgen/ || exit 1
 3 | 
 4 | clean:
 5 | 	@echo Cleaning...
 6 | 	find ./fpgen -type f ! -name "*.typed" ! -name "*.py" -exec rm -v {} \;
 7 | 	rm -rf ./dist
 8 | 
 9 | prepare: vermin clean
10 | 
11 | check: prepare
12 | 	@echo Building...
13 | 	python -m build
14 | 	twine check dist/*
15 | 
16 | release: check
17 | 	@echo Releasing...
18 | 	twine upload dist/*
19 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <h1 align="center">Fingerprint Generator</h1>
  2 | 
  3 | <p align="center">A fast browser data generator that mimics actual traffic patterns in the wild. With <i>extensive</i> data coverage.</p>
  4 | 
  5 | <p align="center">Created by <a href="https://github.com/daijro">daijro</a>. Data provided by Scrapfly.</p>
  6 | 
  7 | ---
  8 | 
  9 | ## Features
 10 | 
 11 | - Uses a Bayesian generative network to mimic real-world web traffic patterns
 12 | - Extensive data coverage for **nearly all known** browser data points
 13 | - Creates complete fingerprints in a few milliseconds ⚡
 14 | - Easily specify custom criteria for any data point (e.g. "only Windows + Chrome, with Intel GPUs")
 15 | - Simple for humans to use 🚀
 16 | 
 17 | ## Demo Video
 18 | 
 19 | Here is a demonstration of what fpgen generates & its ability to filter data points:
 20 | 
 21 | https://github.com/user-attachments/assets/5c56691a-5804-4007-b179-0bae7069a111
 22 | 
 23 | ---
 24 | 
 25 | # Installation
 26 | 
 27 | Install the package using pip:
 28 | 
 29 | ```bash
 30 | pip install fpgen
 31 | ```
 32 | 
 33 | <hr width=50>
 34 | 
 35 | ### Downloading the model
 36 | 
 37 | Fetch the latest model:
 38 | 
 39 | ```bash
 40 | fpgen fetch
 41 | ```
 42 | 
 43 | This will be ran automatically on the first import, or every 5 weeks.
 44 | 
 45 | To decompress the model for faster generation (_up to 10-50x faster!_), run:
 46 | 
 47 | ```bash
 48 | fpgen decompress
 49 | ```
 50 | 
 51 | Note: This action will use an additional 100mb+ of storage.
 52 | 
 53 | <details>
 54 | <summary>CLI Usage</summary>
 55 | 
 56 | ```
 57 | Usage: python -m fpgen [OPTIONS] COMMAND [ARGS]...
 58 | 
 59 | Options:
 60 |   --help  Show this message and exit.
 61 | 
 62 | Commands:
 63 |   decompress  Decompress model files for speed efficiency (will take 100mb+)
 64 |   fetch       Fetch the latest model from GitHub
 65 |   recompress  Compress model files after running decompress
 66 |   remove      Remove all downloaded and/or extracted model files
 67 | ```
 68 | 
 69 | </details>
 70 | 
 71 | ---
 72 | 
 73 | # Usage
 74 | 
 75 | ### Generate a fingerprint
 76 | 
 77 | Simple usage:
 78 | 
 79 | ```python
 80 | >>> import fpgen
 81 | >>> fpgen.generate(browser='Chrome', os='Windows')
 82 | ```
 83 | 
 84 | Or use the Generator object to pass filters downward:
 85 | 
 86 | ```python
 87 | >>> gen = fpgen.Generator(browser='Chrome')  # Filter by Chrome
 88 | >>> gen.generate(os='Windows')  # Generate Windows & Chrome fingerprints
 89 | ```
 90 | 
 91 | <details>
 92 | <summary>
 93 | Parameters list
 94 | </summary>
 95 | 
 96 | ```
 97 | Initializes the Generator with the given options.
 98 | Values passed to the Generator object will be inherited when calling Generator.generate()
 99 | 
100 | Parameters:
101 |     conditions (dict, optional): Conditions for the generated fingerprint.
102 |     window_bounds (WindowBounds, optional): Constrain the output window size.
103 |     strict (bool, optional): Whether to raise an exception if the conditions are too strict.
104 |     flatten (bool, optional): Whether to flatten the output dictionary
105 |     target (Optional[Union[str, StrContainer]]): Only generate specific value(s)
106 |     **conditions_kwargs: Conditions for the generated fingerprint (passed as kwargs)
107 | ```
108 | 
109 | </details>
110 | 
111 | [See example output.](https://raw.githubusercontent.com/scrapfly/fingerprint-generator/refs/heads/main/assets/example-output.json)
112 | 
113 | ---
114 | 
115 | ## Filtering the output
116 | 
117 | ### Setting fingerprint criteria
118 | 
119 | You can narrow down generated fingerprints by specifying filters for **any** data field.
120 | 
121 | ```python
122 | # Only generate fingerprints with Windows, Chrome, and Intel GPU:
123 | >>> fpgen.generate(
124 | ...     os='Windows',
125 | ...     browser='Chrome',
126 | ...     gpu={'vendor': 'Google Inc. (Intel)'}
127 | ... )
128 | ```
129 | 
130 | <details>
131 | <summary>
132 | This can also be passed as a dictionary.
133 | </summary>
134 | 
135 | ```python
136 | >>> fpgen.generate({
137 | ...     'os': 'Windows',
138 | ...     'browser': 'Chrome',
139 | ...     'gpu': {'vendor': 'Google Inc. (Intel)'},
140 | ... })
141 | ```
142 | 
143 | </details>
144 | 
145 | ### Multiple constraints
146 | 
147 | Pass in multiple constraints for the generator to select from using a tuple.
148 | 
149 | ```python
150 | >>> fpgen.generate({
151 | ...     'os': ('Windows', 'MacOS'),
152 | ...     'browser': ('Firefox', 'Chrome'),
153 | ... })
154 | ```
155 | 
156 | If you are passing many nested constraints, run `fpgen decompress` to improve model performance.
157 | 
158 | ## Custom filters
159 | 
160 | Data can be filtered by passing in callable functions.
161 | 
162 | ### Examples
163 | 
164 | Set the minimum browser version:
165 | 
166 | ```python
167 | # Constrain client:
168 | >>> fpgen.generate(client={'browser': {'major': lambda ver: int(ver) >= 130}})
169 | # Or, just pass a dot seperated path to client.browser.major:
170 | >>> fpgen.generate({'client.browser.major': lambda ver: int(ver) >= 130})
171 | ```
172 | 
173 | Only allow NVIDIA GPUs:
174 | 
175 | ```python
176 | # Note: Strings are lowercased before they're passed.
177 | >>> fpgen.generate(gpu={'vendor': lambda vdr: 'nvidia' in vdr})
178 | ```
179 | 
180 | Limit the maximum/minimum window size:
181 | 
182 | ```python
183 | # Set allowed ranges for outerWidth & outerHeight:
184 | >>> fpgen.generate(
185 | ...   window={
186 | ...     'outerWidth': lambda width: 1000 <= width <= 2000,
187 | ...     'outerHeight': lambda height: 500 <= height <= 1500
188 | ...   }
189 | ... )
190 | ```
191 | 
192 | 
193 | <details>
194 | <summary>
195 | Or, filter the window dictionary directly.
196 | </summary>
197 | 
198 | ```python
199 | def window_filter(window):
200 |     if not (1000 <= window['outerWidth'] <= 2000):
201 |         return False
202 |     if not (500 <= window['outerHeight'] <= 1500):
203 |         return False
204 |     return True
205 | 
206 | fpgen.generate(window=window_filter)
207 | ```
208 | </details>
209 | </details>
210 | 
211 | ---
212 | 
213 | ## Only generate specific data
214 | 
215 | To generate specific data fields, use the `target` parameter with a string or a list of strings.
216 | 
217 | ### Examples
218 | 
219 | Only generate HTTP headers:
220 | 
221 | ```python
222 | >>> fpgen.generate(target='headers')
223 | {'accept': '*/*', 'accept-encoding': 'gzip, deflate, br, zstd', 'accept-language': 'en-US,en;q=0.9', 'priority': 'u=1, i', 'sec-ch-ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"', 'sec-ch-ua-mobile': None, 'sec-ch-ua-platform': '"Windows"', 'sec-fetch-dest': 'empty', 'sec-fetch-mode': 'cors', 'sec-fetch-site': 'same-site', 'sec-gpc': None, 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36'}
224 | ```
225 | 
226 | <details>
227 | <summary>
228 | Or, by using the generate_target shortcut.
229 | </summary>
230 | 
231 | ```python
232 | >>> fpgen.generate_target('headers')
233 | {'accept': '*/*', 'accept-encoding': 'gzip, deflate, br, zstd', 'accept-language': 'en-GB,en;q=0.9,en-US;q=0.8,sk;q=0.7', 'priority': 'u=1, i', 'sec-ch-ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"', 'sec-ch-ua-mobile': None, 'sec-ch-ua-platform': '"Windows"', 'sec-fetch-dest': 'empty', 'sec-fetch-mode': 'cors', 'sec-fetch-site': 'same-site', 'sec-gpc': None, 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'}
234 | ```
235 | 
236 | </details>
237 | 
238 | 
239 | Generate a User-Agent for Windows & Chrome:
240 | 
241 | ```python
242 | >>> fpgen.generate(
243 | ...     os='Windows',
244 | ...     browser='Chrome',
245 | ...     # Nested targets must be seperated by dots:
246 | ...     target='headers.user-agent'
247 | ... )
248 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0'
249 | ```
250 | 
251 | Generate a Firefox TLS fingerprint:
252 | 
253 | ```python
254 | >>> fpgen.generate(
255 | ...     browser='Firefox',
256 | ...     target='network.tls.scrapfly_fp'
257 | ... )
258 | {'version': '772', 'ch_ciphers': '4865-4867-4866-49195-49199-52393-52392-49196-49200-49162-49161-49171-49172-156-157-47-53', 'ch_extensions': '0-5-10-11-13-16-23-27-28-34-35-43-45-51-65037-65281', 'groups': '4588-29-23-24-25-256-257', 'points': '0', 'compression': '0', 'supported_versions': '772-771', 'supported_protocols': 'h2-http11', 'key_shares': '4588-29-23', 'psk': '1', 'signature_algs': '1027-1283-1539-2052-2053-2054-1025-1281-1537-515-513', 'early_data': '0'}
259 | ```
260 | 
261 | You can provide multiple targets as a list.
262 | 
263 | ---
264 | 
265 | ## Get the probabilities of a target
266 | 
267 | Calculate the probability distribution of a target given any filter:
268 | 
269 | ```python
270 | >>> fpgen.trace(target='browser', os='Windows')
271 | [<Chrome: 71.29276%>, <Edge: 12.96372%>, <Firefox: 12.64484%>, <Opera: 2.12217%>, <Yandex Browser: 0.94575%>, <Whale: 0.03076%>]
272 | ```
273 | 
274 | Multiple targets can be passed as a list/tuple.
275 | Here is an example of tracking the probability of browser & OS given a GPU vendor:
276 | 
277 | ```python
278 | >>> fpgen.trace(
279 | ...   target=('browser', 'os'),
280 | ...   gpu={'vendor': 'Google Inc. (Intel)'}
281 | ... )
282 | {'browser': [<Chrome: 76.46641%>, <Edge: 13.02665%>, <Firefox: 8.48189%>, <Opera: 1.36188%>, <Yandex Browser: 0.65133%>, <Whale: 0.01184%>],
283 |  'os': [<Windows: 84.08380%>, <Linux: 8.07652%>, <MacOS: 7.46072%>, <ChromeOS: 0.37896%>]}
284 | ```
285 | 
286 | This also works in the Generator object:
287 | 
288 | ```python
289 | >>> gen = fpgen.Generator(os='ChromeOS')
290 | >>> gen.trace(target='browser')
291 | [<Chrome: 100.00000%>]
292 | ```
293 | 
294 | <details>
295 | <summary>
296 | Parameters for trace
297 | </summary>
298 | 
299 | ```
300 | Compute the probability distribution(s) of a target variable given conditions.
301 | 
302 | Parameters:
303 |     target (str): The target variable name.
304 |     conditions (Dict[str, Any], optional): A dictionary mapping variable names
305 |     flatten (bool, optional): If True, return a flattened dictionary.
306 |     **conditions_kwargs: Additional conditions to apply
307 | 
308 | Returns:
309 |     A dictionary mapping probabilities to the target's possible values.
310 | ```
311 | 
312 | </details>
313 | 
314 | <hr width=50>
315 | 
316 | ### Reading TraceResult
317 | 
318 | To read the output `TraceResult` object:
319 | 
320 | ```python
321 | >>> chrome = fpgen.trace(target='browser', os='ChromeOS')[0]
322 | >>> chrome.probability
323 | 1.0
324 | >>> chrome.value
325 | 'Chrome'
326 | ```
327 | 
328 | ---
329 | 
330 | ## Query possible values
331 | 
332 | You can get a list of a target's possible values by passing it into `fpgen.query`:
333 | 
334 | List all possible browsers:
335 | 
336 | ```python
337 | >>> fpgen.query('browser')
338 | ['Chrome', 'Edge', 'Firefox', 'Opera', 'Safari', 'Samsung Internet', 'Yandex Browser']
339 | ```
340 | 
341 | Passing a nested target:
342 | 
343 | ```python
344 | >>> fpgen.query('navigator.maxTouchPoints') # Dot seperated path
345 | [0, 1, 2, 5, 6, 9, 10, 17, 20, 40, 256]
346 | ```
347 | 
348 | <details>
349 | <summary>
350 | Parameters for query
351 | </summary>
352 | 
353 | ```
354 | Query a list of possibilities given a target.
355 | 
356 | Parameters:
357 |     target (str): Target node to query possible values for
358 |     flatten (bool, optional): Whether to flatten the output dictionary
359 |     sort (bool, optional): Whether to sort the output arrays
360 | ```
361 | 
362 | </details>
363 | 
364 | > [!NOTE]
365 | > Since fpgen is trained on live data, queries may occasionally return invalid or anomalous values. Values lower than a 0.001% probability will not appear in traces or generated fingerprints.
366 | 
367 | ---
368 | 
369 | ## Generated data
370 | 
371 | Here is a rough list of the data fpgen can generate:
372 | 
373 | - **Browser data:**
374 |   - All navigator data
375 |   - All mimetype data: Audio, video, media source, play types, PDF, etc
376 |   - All window viewport data (position, inner/outer viewport sizes, toolbar & scrollbar sizes, etc)
377 |   - All screen data
378 |   - Supported & unsupported DRM modules
379 |   - Memory heap limit
380 | 
381 | * **System data:**
382 |   - GPU data (vendor, renderer, WebGL/WebGL2, extensions, context attributes, parameters, shader precision formats, etc)
383 |   - Battery data (charging, charging time, discharging time, level)
384 |   - Screen size, color depth, taskbar size, etc.
385 |   - Full fonts list
386 |   - Cast receiver data
387 | 
388 | - **Network data:**
389 |   - HTTP headers
390 |   - TLS fingerprint data
391 |   - HTTP/2 fingerprint & frames
392 |   - RTC video & audio capabilities, codecs, clock rates, mimetypes, header extensions, etc
393 | 
394 | * **Audio data:**
395 |   - Audio signal
396 |   - All Audio API constants (AnalyserNode, BiquadFilterNode, DynamicsCompressorNode, OscillatorNode, etc)
397 | 
398 | - **Internationalization data:**
399 |   - Regional internationalization (Locale, calendar, numbering system, timezone, date format, etc)
400 |   - Voices
401 | 
402 | * **_And much more!_**
403 | 
404 | For a more complete list, see the [full example output](https://raw.githubusercontent.com/scrapfly/fingerprint-generator/refs/heads/main/assets/example-output.json).
405 | 
406 | ---
407 | 


--------------------------------------------------------------------------------
/assets/update.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Refreshes example-output.json
 3 | """
 4 | 
 5 | import json
 6 | from pathlib import Path
 7 | 
 8 | import fpgen
 9 | 
10 | DIR = Path('.').absolute()
11 | 
12 | 
13 | with open(DIR / 'assets' / 'example-output.json', 'w') as f:
14 |     data = fpgen.generate()
15 |     json.dump(data, f, indent=2)
16 |     data = fpgen.generate()
17 |     json.dump(data, f, indent=2)
18 | 


--------------------------------------------------------------------------------
/fpgen/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Fpgen is a fast & comprehensive browser fingerprint generator.
 3 | """
 4 | 
 5 | 
 6 | def __check_module__() -> None:
 7 |     '''
 8 |     Detect if fpgen is being ran as a module.
 9 |     '''
10 |     import inspect
11 |     import os
12 |     import sys
13 | 
14 |     # Detect if we're running as poetry script
15 |     if sys.argv and os.path.basename(sys.argv[0]) == 'fpgen':
16 |         os.environ['FPGEN_NO_INIT'] = '1'
17 |         return
18 | 
19 |     stack: list = inspect.stack(2)
20 |     if len(stack) >= 2:
21 |         prev, launch = stack[-2:]
22 |         try:
23 |             if (launch.function, prev.function) == ('_run_module_as_main', '_get_module_details'):
24 |                 # Enable "partial execution mode" to prevent automatic downloads, starting network, etc.
25 |                 os.environ['FPGEN_NO_INIT'] = '1'
26 |         except AttributeError:
27 |             pass
28 | 
29 | 
30 | __check_module__()
31 | del __check_module__  # Remove from namespace
32 | 
33 | # ruff: noqa: E402
34 | 
35 | from .generator import Generator, generate, generate_target
36 | from .trace import TraceResult, trace
37 | 
38 | # Expose the bayesian network interface for tests
39 | from .utils import NETWORK as __network__
40 | from .utils import query
41 | 
42 | __all__ = (
43 |     'Generator',
44 |     'TraceResult',
45 |     'generate',
46 |     'generate_target',
47 |     'query',
48 |     'trace',
49 |     '__network__',
50 | )
51 | 


--------------------------------------------------------------------------------
/fpgen/__main__.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from .pkgman import (
 4 |     FILE_PAIRS,
 5 |     decompress_model,
 6 |     download_model,
 7 |     recompress_model,
 8 |     remove_model,
 9 | )
10 | 
11 | 
12 | @click.group()
13 | def cli():
14 |     pass
15 | 
16 | 
17 | @cli.command()
18 | @click.option(
19 |     '--decompress', is_flag=True, help='Also decompress the model files after downloading'
20 | )
21 | def fetch(decompress):
22 |     """
23 |     Fetch the latest model from GitHub
24 |     """
25 |     # Remove existing files
26 |     remove_model(log=False)
27 |     # Download new files
28 |     download_model()
29 |     if decompress:
30 |         decompress_model()
31 |     click.echo(click.style("Complete!", fg="green"))
32 | 
33 | 
34 | @cli.command()
35 | def remove():
36 |     """
37 |     Remove all downloaded and/or extracted model files
38 |     """
39 |     found = remove_model()
40 |     if not found:
41 |         click.echo(click.style("No files found to remove.", fg="yellow"))
42 |         return
43 |     click.echo(click.style("Complete!", fg="green"))
44 | 
45 | 
46 | @cli.command()
47 | def decompress():
48 |     """
49 |     Recompress model files for speed efficiency (will take 100mb+)
50 |     """
51 |     # Check there's anything to decompress
52 |     if any(f.exists() for f in FILE_PAIRS.keys()):
53 |         click.echo(click.style("Model is already decompressed.", fg="yellow"))
54 |         return
55 |     decompress_model()
56 | 
57 | 
58 | @cli.command()
59 | def recompress():
60 |     """
61 |     Compress model files after running decompress
62 |     """
63 |     # Check there's anything to compress
64 |     if any(f.exists() for f in FILE_PAIRS.values()):
65 |         click.echo(click.style("Model is already compressed.", fg="yellow"))
66 |         return
67 |     recompress_model()
68 | 
69 | 
70 | if __name__ == '__main__':
71 |     cli()
72 | 


--------------------------------------------------------------------------------
/fpgen/bayesian_network.py:
--------------------------------------------------------------------------------
  1 | import heapq
  2 | import random
  3 | from pathlib import Path
  4 | from typing import Any, Dict, Iterable, List, Mapping, Optional, Set, Tuple, Union
  5 | 
  6 | from .exceptions import RestrictiveConstraints
  7 | from .pkgman import extract_json
  8 | from .structs import CaseInsensitiveDict
  9 | from .unpacker import lookup_value_list
 10 | 
 11 | StrContainer = Union[str, List[str], Tuple[str, ...], Set[str]]
 12 | 
 13 | # Width for beam search
 14 | # This cuts off values that are way too low or contaminated
 15 | BEAM_WIDTH = 1000
 16 | 
 17 | 
 18 | class BayesianNode:
 19 |     """
 20 |     A single node in a Bayesian network with methods to sample conditional probabilities
 21 |     """
 22 | 
 23 |     __slots__ = (
 24 |         'node_definition',
 25 |         'name',
 26 |         'parent_names',
 27 |         'possible_values',
 28 |         'probabilities',
 29 |         'index',
 30 |     )
 31 | 
 32 |     def __init__(self, node_definition: Dict[str, Any], index: int):
 33 |         # Node defintion info
 34 |         self.node_definition = node_definition
 35 |         self.name = node_definition['name']
 36 |         self.parent_names = node_definition['parentNames']
 37 |         self.possible_values = node_definition['possibleValues']
 38 |         # CPT data structure
 39 |         self.probabilities = node_definition['conditionalProbabilities']
 40 |         # Index in the sampling order
 41 |         self.index = index
 42 | 
 43 |     def get_probabilities_given_known_values(
 44 |         self, parent_values: Mapping[str, Any]
 45 |     ) -> Dict[Any, float]:
 46 |         """
 47 |         Extracts the probabilities for this node's values, given known parent values
 48 |         """
 49 |         probabilities = self.probabilities
 50 |         for parent_name in self.parent_names:
 51 |             parent_value = parent_values[parent_name]
 52 |             probabilities = probabilities.get(parent_value, {})
 53 |         return probabilities
 54 | 
 55 | 
 56 | class BayesianNetwork:
 57 |     """
 58 |     Bayesian network implementation for probabilistic sampling
 59 |     """
 60 | 
 61 |     def __init__(self, network_file: Path) -> None:
 62 |         network_definition = extract_json(network_file)
 63 |         self.nodes_in_sampling_order = [
 64 |             BayesianNode(node_def, index)
 65 |             for index, node_def in enumerate(network_definition['nodes'])
 66 |         ]
 67 |         nodes_by_name = {node.name: node for node in self.nodes_in_sampling_order}
 68 |         self.nodes_by_name = CaseInsensitiveDict(nodes_by_name)
 69 |         # Keep a list of the original names
 70 |         self.node_names = tuple(nodes_by_name.keys())
 71 |         # Precompute ancestors
 72 |         self.ancestors_by_name: Dict[str, Set[str]] = {}
 73 |         for node in self.nodes_in_sampling_order:
 74 |             self.get_all_ancestors(node.name)
 75 | 
 76 |     def generate_consistent_sample(
 77 |         self, evidence: Mapping[str, Set[str]]
 78 |     ) -> Optional[Dict[str, Any]]:
 79 |         """
 80 |         Generate a full sample from the Bayesian network.
 81 |         """
 82 |         result: Dict[str, str] = {}
 83 |         # Create a working copy of evidence that we can update in place.
 84 |         current_evidence = {k: set(v) for k, v in evidence.items()}
 85 | 
 86 |         for node in self.nodes_in_sampling_order:
 87 |             node_name = node.name
 88 | 
 89 |             # For nodes with explicit evidence, remove the node itself from the evidence for beam search.
 90 |             if node_name in current_evidence:
 91 |                 allowed_values = current_evidence[node_name]
 92 |                 search_evidence = {k: v for k, v in current_evidence.items() if k != node_name}
 93 |                 distribution = self.trace(node_name, search_evidence)
 94 | 
 95 |                 # Filter the distribution to allowed values and renormalize.
 96 |                 filtered_dist = {k: v for k, v in distribution.items() if k in allowed_values}
 97 |                 if not filtered_dist or sum(filtered_dist.values()) <= 0:
 98 |                     uniform_prob = 1.0 / len(allowed_values)
 99 |                     filtered_dist = {val: uniform_prob for val in allowed_values}
100 |                 else:
101 |                     total = sum(filtered_dist.values())
102 |                     filtered_dist = {k: v / total for k, v in filtered_dist.items()}
103 |                 sampled_value = self.sample_value_from_distribution(filtered_dist)
104 |             else:
105 |                 # For unconstrained nodes, use all current evidence.
106 |                 distribution = self.trace(node_name, current_evidence)
107 |                 sampled_value = self.sample_value_from_distribution(distribution)
108 | 
109 |             result[node_name] = sampled_value
110 |             # Update current evidence with the newly sampled node value.
111 |             current_evidence[node_name] = {sampled_value}
112 | 
113 |         return result
114 | 
115 |     def generate_certain_nodes(
116 |         self,
117 |         evidence: Mapping[str, Set[str]],
118 |         targets: Optional[StrContainer] = None,
119 |     ) -> Optional[Dict[str, Any]]:
120 |         """
121 |         Generate values for target nodes given conditions.
122 |         """
123 |         # If no target specified, generate full sample
124 |         if targets is None:
125 |             return self.generate_consistent_sample(evidence)
126 | 
127 |         # Generate result for each target
128 |         result = {}
129 | 
130 |         for target_node in targets:
131 |             # Calculate distribution for this target
132 |             distribution = self.trace(target_node, evidence)
133 | 
134 |             # # Handle multi-value conditions for the target
135 |             if target_node in evidence:
136 |                 allowed_values = evidence[target_node]
137 | 
138 |                 # Filter and renormalize
139 |                 filtered_dist = {k: v for k, v in distribution.items() if k in allowed_values}
140 | 
141 |                 # If no probability mass, use uniform distribution over allowed values
142 |                 if not filtered_dist or sum(filtered_dist.values()) <= 0:
143 |                     raise RestrictiveConstraints(
144 |                         f"Cannot generate fingerprint: No valid values for {target_node} with current conditions."
145 |                     )
146 | 
147 |                 # Renormalize
148 |                 total = sum(filtered_dist.values())
149 |                 filtered_dist = {k: v / total for k, v in filtered_dist.items()}
150 | 
151 |                 distribution = filtered_dist
152 | 
153 |             # Sample from the distribution
154 |             if distribution:
155 |                 result[target_node] = self.sample_value_from_distribution(distribution)
156 |             else:
157 |                 raise RestrictiveConstraints(
158 |                     f"Cannot generate fingerprint: Empty distribution for {target_node}."
159 |                 )
160 | 
161 |         return result
162 | 
163 |     def validate_evidence(self, evidence: Mapping[str, Set[str]]) -> None:
164 |         """
165 |         Validate that evidence is compatible with each other based on the
166 |         Bayesian network structure. Raises an exception if they're incompatible.
167 |         """
168 |         # Skip validation for single constraint
169 |         if len(evidence) <= 1:
170 |             return
171 | 
172 |         # For each constrained node, check if it's compatible with other fixed conditions
173 |         for node_name, allowed_values in evidence.items():
174 |             # Get other fixed conditions (nodes with single values)
175 |             fixed_constraints: Dict[str, Union[str, Set[str]]] = {}
176 |             for k, v in evidence.items():
177 |                 if k != node_name and len(v) == 1:
178 |                     fixed_constraints[k] = next(iter(v))
179 | 
180 |             # If we have fixed conditions, check compatibility
181 |             if fixed_constraints:
182 |                 # Use beam search with fixed conditions to check compatibility
183 |                 dist = self.trace(node_name, fixed_constraints)
184 | 
185 |                 # If beam search returns a distribution
186 |                 if dist and all(dist.get(val, 0) <= 0 for val in allowed_values):
187 |                     # === BUILD THE EXCEPTION MESSAGE ===
188 |                     # Show first 5 values failing node values.
189 |                     values_str = ", ".join(lookup_value_list(tuple(allowed_values)[:5]))
190 |                     if len(allowed_values) > 5:
191 |                         values_str += ", ..."
192 |                     # Get the constraints
193 |                     constraints_values = lookup_value_list(fixed_constraints.values())
194 |                     constraints_str = ", ".join(
195 |                         f"{k}={v}" for k, v in zip(fixed_constraints.keys(), constraints_values)
196 |                     )
197 |                     raise RestrictiveConstraints(
198 |                         f"Cannot generate fingerprint: {node_name}=({values_str}) "
199 |                         f"is impossible with constraint: {constraints_str}"
200 |                     )
201 | 
202 |     def get_all_ancestors(self, node_name: str) -> Set[str]:
203 |         """
204 |         Get all ancestors of a node (nodes that can influence its value)
205 |         """
206 |         if node_name in self.ancestors_by_name:
207 |             return self.ancestors_by_name[node_name]
208 | 
209 |         node = self.nodes_by_name[node_name]
210 |         ancestors: Set[str] = set()
211 |         if not node:
212 |             return ancestors
213 | 
214 |         # Add direct parents
215 |         for parent in node.parent_names:
216 |             ancestors.add(parent)
217 |             # Add parent's ancestors recursively
218 |             ancestors.update(self.get_all_ancestors(parent))
219 | 
220 |         self.ancestors_by_name[node_name] = ancestors
221 |         return ancestors
222 | 
223 |     def trace(self, target: str, evidence: Mapping[str, Union[str, Set[str]]]) -> Dict[str, float]:
224 |         """
225 |         Calculate conditional probability distribution for target given evidence
226 |         using beam search.
227 |         """
228 |         # Get the actual target name and build relevant nodes set.
229 |         target = self.nodes_by_name[target].name
230 |         relevant_nodes = self.get_all_ancestors(target).copy()
231 |         relevant_nodes.add(target)
232 | 
233 |         # Add evidence nodes and their ancestors.
234 |         for ev_node in evidence:
235 |             if ev_node in self.nodes_by_name:
236 |                 relevant_nodes.add(ev_node)
237 |                 relevant_nodes.update(self.get_all_ancestors(ev_node))
238 | 
239 |         # Sort nodes by sampling order
240 |         ordered_nodes = [
241 |             node for node in self.nodes_in_sampling_order if node.name in relevant_nodes
242 |         ]
243 | 
244 |         # Initialize beam
245 |         beam: List[Tuple[Dict[str, Any], float]] = [({}, 1.0)]
246 |         # Local cache for conditional probability lookups
247 |         cpt_cache: Dict[Tuple[str, Tuple[Any, ...]], Dict[Any, float]] = {}
248 | 
249 |         for node in ordered_nodes:
250 |             new_beam = []
251 |             node_name = node.name
252 | 
253 |             # Determine allowed values from evidence if present
254 |             allowed_values = evidence[node_name] if node_name in evidence else None
255 | 
256 |             # Process each assignment in the current beam
257 |             for assignment, prob in beam:
258 |                 # Parent order is defined by node.parent_names
259 |                 try:
260 |                     parent_values_tuple = tuple(assignment[parent] for parent in node.parent_names)
261 |                 except KeyError:
262 |                     # Should not occur if assignments are built in order
263 |                     parent_values_tuple = ()
264 | 
265 |                 cache_key = (node_name, parent_values_tuple)
266 |                 if cache_key in cpt_cache:
267 |                     cpt = cpt_cache[cache_key]
268 |                 else:
269 |                     parent_values = {parent: assignment[parent] for parent in node.parent_names}
270 |                     cpt = node.get_probabilities_given_known_values(parent_values)
271 |                     # Use uniform distribution if missing
272 |                     if not cpt and node.possible_values:
273 |                         uniform_prob = 1.0 / len(node.possible_values)
274 |                         cpt = {val: uniform_prob for val in node.possible_values}
275 | 
276 |                 # Expand the beam with new assignments
277 |                 for value, p in cpt.items():
278 |                     if (allowed_values is None or value in allowed_values) and p > 0:
279 |                         # Create a new assignment with the new node value
280 |                         new_assignment = assignment.copy()
281 |                         new_assignment[node_name] = value
282 |                         new_beam.append((new_assignment, prob * p))
283 | 
284 |             # Prune the beam if no valid configurations are left
285 |             if new_beam:
286 |                 if len(new_beam) > BEAM_WIDTH:
287 |                     # Get the top BEAM_WIDTH assignments
288 |                     beam = heapq.nlargest(BEAM_WIDTH, new_beam, key=lambda x: x[1])
289 |                 else:
290 |                     beam = new_beam
291 |             else:
292 |                 return {}
293 | 
294 |         # Extract the target distribution
295 |         target_dist: Dict[str, float] = {}
296 |         total_prob = 0.0
297 |         for assignment, prob in beam:
298 |             if target in assignment:
299 |                 value = assignment[target]
300 |                 target_dist[value] = target_dist.get(value, 0) + prob
301 |                 total_prob += prob
302 | 
303 |         if total_prob > 0:
304 |             return {val: p / total_prob for val, p in target_dist.items()}
305 |         return {}
306 | 
307 |     def sample_value_from_distribution(self, distribution: Mapping[str, float]) -> str:
308 |         """
309 |         Sample a value from a probability distribution
310 |         """
311 |         anchor = random.random()  # nosec
312 |         cumulative_probability = 0.0
313 |         for value, probability in distribution.items():
314 |             cumulative_probability += probability
315 |             if anchor < cumulative_probability:
316 |                 return value
317 |         # Fall back to first value
318 |         return next(iter(distribution.keys()))
319 | 
320 |     def get_distribution_for_node(
321 |         self,
322 |         node: BayesianNode,
323 |         sample: Mapping[str, Any],
324 |         evidence: Optional[Dict[str, Set[str]]] = None,
325 |     ) -> Dict[str, float]:
326 |         """
327 |         Get the probability distribution for a node given the current sample
328 |         """
329 |         # For multi-value conditions, use beam search
330 |         if evidence and node.name in evidence and len(evidence[node.name]) > 1:
331 |             # Current evidence is what we've sampled so far
332 |             current_evidence = {k: v for k, v in sample.items()}
333 | 
334 |             # Calculate distribution using beam search
335 |             distribution = self.trace(node.name, current_evidence)
336 |             # Filter by allowed values and renormalize
337 |             if node.name in evidence:
338 |                 allowed_values = evidence[node.name]
339 |                 filtered_dist = {k: v for k, v in distribution.items() if k in allowed_values}
340 | 
341 |                 # If no probability mass, the conditions are impossible
342 |                 if not filtered_dist or sum(filtered_dist.values()) <= 0:
343 |                     raise RestrictiveConstraints(
344 |                         f"Cannot generate fingerprint: no valid values for {node.name} with current conditions"
345 |                     )
346 | 
347 |                 # Renormalize
348 |                 total = sum(filtered_dist.values())
349 |                 filtered_dist = {k: v / total for k, v in filtered_dist.items()}
350 |                 return filtered_dist
351 | 
352 |             return distribution
353 | 
354 |         # For regular nodes, use direct sampling
355 |         parent_values = {parent: sample[parent] for parent in node.parent_names}
356 | 
357 |         cpt = node.get_probabilities_given_known_values(parent_values)
358 |         if not cpt and node.possible_values:
359 |             # If missing probabilities, use uniform distribution
360 |             uniform_prob = 1.0 / len(node.possible_values)
361 |             cpt = {v: uniform_prob for v in node.possible_values}
362 | 
363 |         if not cpt:
364 |             raise RestrictiveConstraints(
365 |                 f"Cannot generate fingerprint: no probability table for {node.name}"
366 |             )
367 | 
368 |         return cpt
369 | 
370 |     def get_shared_possibilities(
371 |         self,
372 |         value_possibilities: Mapping[str, Set[str]],
373 |         seen_nodes: Optional[Set[Tuple[str, int]]] = None,
374 |         orig_parents: Optional[Tuple[str, ...]] = None,
375 |     ) -> Optional[Dict[str, Set[str]]]:
376 |         """
377 |         Get shared possibilities across nodes based on conditions.
378 |         Returns None if conditions are contradictory.
379 | 
380 |         This is deprecated as of v1.3.0 but still exposed for testing.
381 |         """
382 |         # Return empty dict immediately
383 |         if not value_possibilities:
384 |             return {}
385 | 
386 |         if seen_nodes is None:
387 |             seen_nodes = set()
388 | 
389 |         # Propagate upward to find possible parent values
390 |         all_parents = {node: set(values) for node, values in value_possibilities.items()}
391 |         for node, values in value_possibilities.items():
392 |             # Track nodes we've processed
393 |             if (node, len(values)) in seen_nodes:
394 |                 continue
395 |             seen_nodes.add((node, len(values)))
396 |             self._intersect_parents(node, values, all_parents)
397 | 
398 |         if orig_parents is None:
399 |             orig_parents = tuple(all_parents.keys())
400 | 
401 |         # If any parent has no valid values, conditions are contradictory
402 |         if any(len(parents) == 0 for parents in all_parents.values()):
403 |             return None
404 | 
405 |         return all_parents
406 | 
407 |     def _intersect_parents(
408 |         self, node: str, values: Set[str], all_parents: Dict[str, Set[str]]
409 |     ) -> None:
410 |         """
411 |         Intersect possible parent values based on child node conditions
412 |         """
413 |         node_obj = self.nodes_by_name.get(node)
414 |         if not node_obj:
415 |             return
416 | 
417 |         parent_names = node_obj.parent_names
418 |         num_parents = len(parent_names)
419 | 
420 |         # No parents exist, nothing to do
421 |         if not num_parents:
422 |             return
423 | 
424 |         # Build a set of each parent's possible values
425 |         parent_values: List[Set[str]] = [set() for _ in range(num_parents)]
426 |         for value in values:
427 |             collect_parents(
428 |                 node_obj.probabilities,
429 |                 value,
430 |                 parent_values=parent_values,
431 |             )
432 | 
433 |         # Update all_parents with the intersection of this node's parents
434 |         for n, parents in enumerate(parent_values):
435 |             parent_name = parent_names[n]
436 |             if parent_name not in all_parents:
437 |                 all_parents[parent_name] = parents
438 |             else:
439 |                 all_parents[parent_name] = all_parents[parent_name].intersection(parents)
440 | 
441 |         # Recurse to earlier parents if needed
442 |         if parent_names and parent_names[0] != self.nodes_in_sampling_order[0].name:
443 |             self._intersect_parents(
444 |                 node=parent_names[0], values=parent_values[0], all_parents=all_parents
445 |             )
446 | 
447 | 
448 | def collect_parents(
449 |     probabilities: Mapping[str, Any],
450 |     target: str,
451 |     parent_values: List[Set[str]],
452 |     so_far: Optional[List[str]] = None,
453 |     depth: int = 0,
454 | ) -> None:
455 |     """
456 |     Collects all the possible parent values of a node
457 |     """
458 |     if so_far is None:
459 |         so_far = []
460 |     for parent, values in probabilities.items():
461 |         if isinstance(values, dict):
462 |             collect_parents(
463 |                 probabilities=values,
464 |                 target=target,
465 |                 parent_values=parent_values,
466 |                 so_far=so_far + [parent],
467 |                 depth=depth + 1,
468 |             )
469 |         elif parent == target:
470 |             for n, parent in enumerate(so_far):
471 |                 parent_values[n].add(parent)
472 | 


--------------------------------------------------------------------------------
/fpgen/exceptions.py:
--------------------------------------------------------------------------------
 1 | class NetworkError(ValueError):
 2 |     """Error with the network"""
 3 | 
 4 | 
 5 | class InvalidConstraints(NetworkError):
 6 |     """Raises when a constraint isn't possible"""
 7 | 
 8 | 
 9 | class RestrictiveConstraints(InvalidConstraints):
10 |     """Raises when the passed constraints are too restrictive"""
11 | 
12 | 
13 | class InvalidNode(NetworkError):
14 |     """Raises when a node doesn't exist"""
15 | 
16 | 
17 | class NodePathError(InvalidNode):
18 |     """Raises when a key path doesn't exist"""
19 | 
20 | 
21 | class MissingRelease(Exception):
22 |     """Raised when a required GitHub release asset is missing."""
23 | 
24 | 
25 | class CannotTraceLargeConfigSpace(ValueError):
26 |     """Raises when the configuration space of a node is too large to trace with exact inference"""
27 | 


--------------------------------------------------------------------------------
/fpgen/generator.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass
  2 | from typing import Any, Dict, List, Optional, Set, Union, overload
  3 | 
  4 | from .bayesian_network import StrContainer
  5 | from .exceptions import RestrictiveConstraints
  6 | from .trace import TraceResult, TraceResultDict, trace
  7 | from .utils import (
  8 |     NETWORK,
  9 |     _assert_dict_xor_kwargs,
 10 |     _assert_network_exists,
 11 |     _find_roots,
 12 |     _make_output_dict,
 13 |     _maybe_flatten,
 14 |     _reassemble_targets,
 15 |     _tupilize,
 16 |     build_evidence,
 17 | )
 18 | 
 19 | 
 20 | class Generator:
 21 |     """
 22 |     Generates realistic browser fingerprints
 23 |     """
 24 | 
 25 |     def __init__(
 26 |         self,
 27 |         conditions: Optional[Dict[str, Any]] = None,
 28 |         *,
 29 |         strict: bool = True,
 30 |         flatten: bool = False,
 31 |         **conditions_kwargs: Any,
 32 |     ):
 33 |         """
 34 |         Initializes the Generator with the given options.
 35 |         Values passed to the Generator object will be inherited when calling Generator.generate()
 36 | 
 37 |         Parameters:
 38 |             conditions (dict, optional): Conditions for the generated fingerprint.
 39 |             strict (bool, optional): Whether to raise an exception if the conditions are too strict.
 40 |             flatten (bool, optional): Whether to flatten the output dictionary
 41 |             target (Optional[Union[str, StrContainer]]): Only generate specific value(s)
 42 |             **conditions_kwargs: Conditions for the generated fingerprint (passed as kwargs)
 43 |         """
 44 |         _assert_dict_xor_kwargs(conditions, conditions_kwargs)
 45 |         # Set default options
 46 |         self.strict: bool = strict
 47 |         self.flatten: bool = flatten
 48 |         self.evidence: Dict[str, Set[str]] = {}
 49 | 
 50 |         if conditions_kwargs:
 51 |             conditions = conditions_kwargs
 52 |         if conditions:
 53 |             build_evidence(conditions, self.evidence)
 54 | 
 55 |     @overload
 56 |     def generate(
 57 |         self,
 58 |         conditions: Optional[Dict[str, Any]] = None,
 59 |         *,
 60 |         strict: Optional[bool] = None,
 61 |         flatten: Optional[bool] = None,
 62 |         target: str,
 63 |         **conditions_kwargs: Any,
 64 |     ) -> Any: ...
 65 | 
 66 |     @overload
 67 |     def generate(
 68 |         self,
 69 |         conditions: Optional[Dict[str, Any]] = None,
 70 |         *,
 71 |         strict: Optional[bool] = None,
 72 |         flatten: Optional[bool] = None,
 73 |         target: Optional[StrContainer] = None,
 74 |         **conditions_kwargs: Any,
 75 |     ) -> Dict[str, Any]: ...
 76 | 
 77 |     def generate(
 78 |         self,
 79 |         conditions: Optional[Dict[str, Any]] = None,
 80 |         *,
 81 |         strict: Optional[bool] = None,
 82 |         flatten: Optional[bool] = None,
 83 |         target: Optional[Union[str, StrContainer]] = None,
 84 |         **conditions_kwargs: Any,
 85 |     ) -> Dict[str, Any]:
 86 |         """
 87 |         Generates a fingerprint.
 88 | 
 89 |         Parameters:
 90 |             conditions (dict, optional): Conditions for the generated fingerprints.
 91 |                 These conditions will be inherited by generated fingerprints.
 92 |             strict (bool, optional): Whether to raise an exception if the conditions are too strict.
 93 |             flatten (bool, optional): Whether to flatten the output dictionary
 94 |             target (Optional[Union[str, StrContainer]]): Only generate specific value(s)
 95 |             **conditions_kwargs: Conditions for the generated fingerprints (passed as kwargs)
 96 | 
 97 |         Returns:
 98 |             A generated fingerprint.
 99 |         """
100 |         _assert_dict_xor_kwargs(conditions, conditions_kwargs)
101 |         _assert_network_exists()
102 | 
103 |         if conditions_kwargs:
104 |             conditions = conditions_kwargs
105 | 
106 |         # Merge new options with old
107 |         strict = _first(strict, self.strict)
108 |         flatten = _first(flatten, self.flatten)
109 | 
110 |         # Inherit the evidence from the class instance
111 |         evidence = self.evidence.copy()
112 |         if conditions:
113 |             build_evidence(conditions, evidence, strict=strict)
114 | 
115 |         # Convert targets to set
116 |         if target:
117 |             target_tup = _tupilize(target)
118 |             target_roots = set(_find_roots(target_tup))
119 |         else:
120 |             target_roots = None
121 | 
122 |         # Generate fingerprint
123 |         while True:
124 |             # If we only are searching for certain targets, call generate_certain_nodes
125 |             if target_roots:
126 |                 fingerprint = NETWORK.generate_certain_nodes(evidence, target_roots)
127 |             else:
128 |                 fingerprint = NETWORK.generate_consistent_sample(evidence)
129 | 
130 |             # Found the fingerprint
131 |             if fingerprint is not None:
132 |                 break
133 |             # Raise an error if the evidence are too strict
134 |             if strict:
135 |                 raise RestrictiveConstraints(
136 |                     'Cannot generate fingerprint. Constraints are too restrictive.'
137 |                 )
138 |             # If no fingerprint was generated, relax the filtered values until we find one
139 |             evidence.pop(next(iter(evidence.keys())))
140 | 
141 |         # If we arent searching for certain targets, we can return right away
142 |         if target:
143 |             output = _make_output_dict(fingerprint, flatten=False)  # Don't flatten yet
144 |             output = _reassemble_targets(_tupilize(target), output)
145 |             if isinstance(target, str):
146 |                 output = output[target]
147 |             return _maybe_flatten(flatten, output)
148 | 
149 |         return _make_output_dict(fingerprint, flatten=flatten)
150 | 
151 |     def generate_target(
152 |         self, target: str, conditions: Optional[Dict[str, Any]] = None, **kwargs: Any
153 |     ) -> Any:
154 |         """
155 |         Generates a specific target. Shortcut for the `generate` method.
156 |         """
157 |         return self.generate(target=target, conditions=conditions, **kwargs)
158 | 
159 |     @overload
160 |     def trace(
161 |         self,
162 |         target: str,
163 |         conditions: Optional[Dict[str, Any]] = None,
164 |         *,
165 |         flatten: bool = False,
166 |         **conditions_kwargs: Any,
167 |     ) -> List[TraceResult]: ...
168 | 
169 |     @overload
170 |     def trace(
171 |         self,
172 |         target: StrContainer,
173 |         conditions: Optional[Dict[str, Any]] = None,
174 |         *,
175 |         flatten: bool = False,
176 |         **conditions_kwargs: Any,
177 |     ) -> TraceResultDict: ...
178 | 
179 |     def trace(
180 |         self,
181 |         target: Union[str, StrContainer],
182 |         conditions: Optional[Dict[str, Any]] = None,
183 |         *,
184 |         flatten: bool = False,
185 |         **conditions_kwargs: Any,
186 |     ) -> Union[List[TraceResult], TraceResultDict]:
187 |         """
188 |         Compute the probability distribution(s) of a target variable given conditions.
189 | 
190 |         Parameters:
191 |             target (str): The target variable name.
192 |             conditions (Dict[str, Any], optional): A dictionary mapping variable names
193 |             flatten (bool, optional): If True, return a flattened dictionary.
194 |             **conditions_kwargs: Additional conditions to apply
195 | 
196 |         Returns:
197 |             A dictionary mapping probabilities to the target's possible values.
198 |         """
199 |         return trace(
200 |             target=target,
201 |             flatten=flatten,
202 |             conditions=conditions,
203 |             **conditions_kwargs,
204 |             # Inherit the conditions from the class instance
205 |             __evidence__=self.evidence.copy(),
206 |         )
207 | 
208 | 
209 | def _first(*values):
210 |     """
211 |     Simple function that returns the first non-None value passed
212 |     """
213 |     return next((v for v in values if v is not None), None)
214 | 
215 | 
216 | """
217 | A global `generate` function for those calling
218 | fpgen.generate() directly without creating a Generator object
219 | """
220 | 
221 | GLOBAL_GENERATOR: Optional[Generator] = None
222 | 
223 | 
224 | def generate(*args, **kwargs) -> Dict[str, Any]:
225 |     """
226 |     Generates a fingerprint.
227 | 
228 |     Parameters:
229 |         conditions (dict, optional): Conditions for the generated fingerprints.
230 |             These conditions will be inherited by generated fingerprints.
231 |         strict (bool, optional): Whether to raise an exception if the conditions are too strict.
232 |         flatten (bool, optional): Whether to flatten the output dictionary
233 |         target (Optional[Union[str, StrContainer]]): Only generate specific value(s)
234 |         **conditions_kwargs: Conditions for the generated fingerprints (passed as kwargs)
235 | 
236 |     Returns:
237 |         A generated fingerprint.
238 |     """
239 |     global GLOBAL_GENERATOR
240 |     if GLOBAL_GENERATOR is None:
241 |         GLOBAL_GENERATOR = Generator()
242 |     return GLOBAL_GENERATOR.generate(*args, **kwargs)
243 | 
244 | 
245 | def generate_target(target: str, conditions: Optional[Dict[str, Any]] = None, **kwargs) -> Any:
246 |     """
247 |     Generates a specific target. Shortcut for the `generate` method.
248 |     """
249 |     return generate(target=target, conditions=conditions, **kwargs)
250 | 
251 | 
252 | __all__ = ('Generator', 'WindowBounds', 'generate', 'generate_target')
253 | 


--------------------------------------------------------------------------------
/fpgen/pkgman.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import tempfile
  3 | import zipfile
  4 | from datetime import datetime, timedelta
  5 | from pathlib import Path
  6 | from typing import Any, Dict
  7 | 
  8 | import click
  9 | import httpx
 10 | import orjson
 11 | import zstandard
 12 | 
 13 | from .exceptions import MissingRelease
 14 | 
 15 | # Model files
 16 | DATA_DIR = Path(__file__).parent / 'data'
 17 | 
 18 | NETWORK_FILE = DATA_DIR / "fingerprint-network.json"
 19 | VALUES_JSON = DATA_DIR / 'values.json'
 20 | VALUES_DATA = DATA_DIR / 'values.dat'
 21 | 
 22 | # Mapping of files to their compressed variant
 23 | FILE_PAIRS = {
 24 |     NETWORK_FILE: NETWORK_FILE.with_suffix('.json.zst'),
 25 |     VALUES_JSON: VALUES_JSON.with_suffix('.json.zst'),
 26 |     VALUES_DATA: VALUES_DATA.with_suffix('.dat.zst'),
 27 | }
 28 | 
 29 | # Repo to pull releases from
 30 | GITHUB_REPO = 'scrapfly/fingerprint-generator'
 31 | 
 32 | 
 33 | class ModelPuller:
 34 |     """
 35 |     Pulls the model from GitHub and extracts it to the data directory.
 36 |     """
 37 | 
 38 |     def __init__(self) -> None:
 39 |         self.api_url = f"https://api.github.com/repos/{GITHUB_REPO}/releases"
 40 | 
 41 |     def check_asset(self, asset: Dict) -> Any:
 42 |         """
 43 |         Compare the asset to determine if it's the desired asset.
 44 | 
 45 |         Args:
 46 |             asset: Asset information from GitHub API
 47 | 
 48 |         Returns:
 49 |             Any: Data to be returned if this is the desired asset, or None/False if not
 50 |         """
 51 |         url = asset.get('browser_download_url')
 52 |         if url and url.endswith('.zip'):
 53 |             return url
 54 | 
 55 |     def missing_asset_error(self) -> None:
 56 |         """
 57 |         Raise a MissingRelease exception if no release is found.
 58 |         """
 59 |         raise MissingRelease(f"Could not find a release asset in {GITHUB_REPO}.")
 60 | 
 61 |     def get_asset(self) -> Any:
 62 |         """
 63 |         Fetch the latest release from the GitHub API.
 64 |         Gets the first asset that returns a truthy value from check_asset.
 65 |         """
 66 |         resp = httpx.get(self.api_url, timeout=20, verify=False)
 67 |         resp.raise_for_status()
 68 | 
 69 |         releases = resp.json()
 70 | 
 71 |         for release in releases:
 72 |             for asset in release['assets']:
 73 |                 if data := self.check_asset(asset):
 74 |                     return data
 75 | 
 76 |         self.missing_asset_error()
 77 | 
 78 |     def download(self):
 79 |         """
 80 |         Download the model from GitHub and extract it to the data directory.
 81 |         """
 82 |         # Pull form a custom source, or the GitHub API
 83 | 
 84 |         url = os.getenv('FPGEN_MODEL_URL')
 85 |         if url:
 86 |             click.echo(f"Fetching model files from {url}...")
 87 |         else:
 88 |             click.echo("Fetching model files from GitHub...")
 89 |             url = self.get_asset()
 90 | 
 91 |         # Optionally get the model password
 92 |         password = os.getenv('FPGEN_MODEL_PASSWORD')
 93 |         if password:
 94 |             password = password.encode()
 95 | 
 96 |         # Stream to tempfile then extract using zipfile
 97 |         with tempfile.NamedTemporaryFile(delete=False) as temp_file:
 98 |             with httpx.stream(
 99 |                 'GET', url, timeout=20, verify=False, follow_redirects=True
100 |             ) as r:  # nosec
101 |                 for chunk in r.iter_bytes():
102 |                     temp_file.write(chunk)
103 |                 temp_file.flush()
104 |             temp_file.close()
105 |             # Print extraction message if running as module
106 |             if __is_module__():
107 |                 click.echo(f"Extracting to {DATA_DIR}...")
108 |             with zipfile.ZipFile(temp_file.name) as z:
109 |                 z.extractall(DATA_DIR, pwd=password)
110 | 
111 |             os.unlink(temp_file.name)
112 | 
113 | 
114 | """
115 | File helper
116 | """
117 | 
118 | 
119 | def extract_json(path: Path) -> dict:
120 |     """
121 |     Reads JSON from a file (or from a zst if needed).
122 |     """
123 |     # Check for uncompressed json
124 |     if path.exists():
125 |         with open(path, 'rb') as f:
126 |             return orjson.loads(f.read())
127 | 
128 |     # Check for zst json
129 |     elif (zst_path := path.with_suffix('.json.zst')).exists():
130 |         with open(zst_path, 'rb') as f:
131 |             decomp = zstandard.ZstdDecompressor()
132 |             return orjson.loads(decomp.decompress(f.read()))
133 | 
134 |     raise FileNotFoundError(f'Missing required data file for: {path}')
135 | 
136 | 
137 | """
138 | Model file utility functions
139 | """
140 | 
141 | 
142 | def download_model():
143 |     """
144 |     Call the model puller to download files
145 |     """
146 |     ModelPuller().download()
147 | 
148 | 
149 | def decompress_model():
150 |     """
151 |     Decompress model files
152 |     """
153 |     import zstandard
154 | 
155 |     dctx = zstandard.ZstdDecompressor()
156 |     for src_zst, dst in {v: k for k, v in FILE_PAIRS.items()}.items():
157 |         if not src_zst.exists():
158 |             click.echo(f"Warning: {src_zst} not found, skipping")
159 |             continue
160 | 
161 |         click.echo(f"Decompressing {src_zst} -> {dst}")
162 |         with open(src_zst, 'rb') as src, open(dst, 'wb') as dst_f:
163 |             dctx.copy_stream(src, dst_f)
164 |         src_zst.unlink()
165 | 
166 | 
167 | def recompress_model():
168 |     """
169 |     Recompress model files after running decompress
170 |     """
171 |     import zstandard
172 | 
173 |     cctx = zstandard.ZstdCompressor(level=19)
174 |     for src, dst_zst in FILE_PAIRS.items():
175 |         if not src.exists():
176 |             click.echo(f"Warning: {src} not found, skipping")
177 |             continue
178 | 
179 |         click.echo(f"Compressing {src} -> {dst_zst}")
180 |         with open(src, 'rb') as src_f:
181 |             data = src_f.read()
182 |             compressed = cctx.compress(data)
183 |             with open(dst_zst, 'wb') as dst:
184 |                 dst.write(compressed)
185 |         src.unlink()
186 | 
187 | 
188 | def remove_model(log=True):
189 |     """
190 |     Remove all model files
191 |     """
192 |     for file_pair in FILE_PAIRS.items():
193 |         found = False
194 |         for file in file_pair:
195 |             if not file.exists():
196 |                 continue
197 |             if log:
198 |                 click.echo(f"Removing {file}")
199 |             file.unlink()
200 |             found = True
201 |     return found
202 | 
203 | 
204 | def files_are_recent(file_list):
205 |     """
206 |     Checks if all passed files are <5 weeks old
207 |     """
208 |     cutoff = datetime.now() - timedelta(weeks=5)
209 |     return all(datetime.fromtimestamp(f.stat().st_mtime) >= cutoff for f in file_list)
210 | 
211 | 
212 | def assert_downloaded():
213 |     """
214 |     Checks if the model files are downloaded
215 |     """
216 |     if __is_module__():
217 |         return  # Skip if running as a module
218 | 
219 |     # Check decompressed files (FILE_PAIRS keys)
220 |     if all(file.exists() for file in FILE_PAIRS.keys()):
221 |         # When updating decompressed files, decompress again after redownloading
222 |         if not files_are_recent(FILE_PAIRS.keys()):
223 |             ModelPuller().download()
224 |             decompress_model()
225 |         return
226 | 
227 |     # Check compressed files (FILE_PAIRS values)
228 |     if all(file.exists() for file in FILE_PAIRS.values()) and files_are_recent(FILE_PAIRS.values()):
229 |         return
230 | 
231 |     # First time importing
232 |     ModelPuller().download()
233 | 
234 | 
235 | def __is_module__() -> bool:
236 |     """
237 |     Checks if fpgen is being ran as a module
238 |     """
239 |     return bool(os.getenv('FPGEN_NO_INIT'))
240 | 
241 | 
242 | # Check model files are downloaded
243 | assert_downloaded()
244 | 


--------------------------------------------------------------------------------
/fpgen/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scrapfly/fingerprint-generator/fc7e489687821f691bae9308d73e294497d015fb/fpgen/py.typed


--------------------------------------------------------------------------------
/fpgen/structs.py:
--------------------------------------------------------------------------------
 1 | # https://stackoverflow.com/a/32888599
 2 | class CaseInsensitiveDict(dict):
 3 |     @classmethod
 4 |     def _k(cls, key):
 5 |         return key.casefold() if isinstance(key, str) else key
 6 | 
 7 |     def __init__(self, *args, **kwargs):
 8 |         super(CaseInsensitiveDict, self).__init__(*args, **kwargs)
 9 |         self._convert_keys()
10 | 
11 |     def __getitem__(self, key):
12 |         return super(CaseInsensitiveDict, self).__getitem__(self.__class__._k(key))
13 | 
14 |     def __setitem__(self, key, value):
15 |         super(CaseInsensitiveDict, self).__setitem__(self.__class__._k(key), value)
16 | 
17 |     def __delitem__(self, key):
18 |         return super(CaseInsensitiveDict, self).__delitem__(self.__class__._k(key))
19 | 
20 |     def __contains__(self, key):
21 |         return super(CaseInsensitiveDict, self).__contains__(self.__class__._k(key))
22 | 
23 |     def has_key(self, key):
24 |         return super(CaseInsensitiveDict, self).has_key(self.__class__._k(key))
25 | 
26 |     def pop(self, key, *args, **kwargs):
27 |         return super(CaseInsensitiveDict, self).pop(self.__class__._k(key), *args, **kwargs)
28 | 
29 |     def get(self, key, *args, **kwargs):
30 |         return super(CaseInsensitiveDict, self).get(self.__class__._k(key), *args, **kwargs)
31 | 
32 |     def setdefault(self, key, *args, **kwargs):
33 |         return super(CaseInsensitiveDict, self).setdefault(self.__class__._k(key), *args, **kwargs)
34 | 
35 |     def update(self, E={}, **F):
36 |         super(CaseInsensitiveDict, self).update(self.__class__(E))
37 |         super(CaseInsensitiveDict, self).update(self.__class__(**F))
38 | 
39 |     def _convert_keys(self):
40 |         for k in list(self.keys()):
41 |             v = super(CaseInsensitiveDict, self).pop(k)
42 |             self.__setitem__(k, v)
43 | 


--------------------------------------------------------------------------------
/fpgen/trace.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass
  2 | from typing import Any, Dict, List, Optional, Set, Union, overload
  3 | 
  4 | import orjson
  5 | 
  6 | from .bayesian_network import StrContainer
  7 | from .exceptions import RestrictiveConstraints
  8 | from .unpacker import lookup_value_list
  9 | from .utils import (
 10 |     NETWORK,
 11 |     _assert_dict_xor_kwargs,
 12 |     _find_roots,
 13 |     _tupilize,
 14 |     build_evidence,
 15 | )
 16 | 
 17 | 
 18 | @dataclass
 19 | class TraceResult:
 20 |     value: Any
 21 |     probability: float
 22 | 
 23 |     def __repr__(self) -> str:
 24 |         return f"<{self.value}: {self.probability * 100:.5f}%>"
 25 | 
 26 | 
 27 | # Recursive type for the return value
 28 | TraceResultDict = Dict[str, Union[List[TraceResult], "TraceResultDict"]]
 29 | 
 30 | 
 31 | @overload
 32 | def trace(
 33 |     target: str,
 34 |     conditions: Optional[Dict[str, Any]] = None,
 35 |     *,
 36 |     flatten: bool = False,
 37 |     **conditions_kwargs,
 38 | ) -> List[TraceResult]: ...
 39 | 
 40 | 
 41 | @overload
 42 | def trace(
 43 |     target: StrContainer,
 44 |     conditions: Optional[Dict[str, Any]] = None,
 45 |     *,
 46 |     flatten: bool = False,
 47 |     **conditions_kwargs,
 48 | ) -> TraceResultDict: ...
 49 | 
 50 | 
 51 | def trace(
 52 |     target: Union[str, StrContainer],
 53 |     conditions: Optional[Dict[str, Any]] = None,
 54 |     *,
 55 |     flatten: bool = False,
 56 |     __evidence__: Optional[Dict[str, Set[str]]] = None,
 57 |     **conditions_kwargs,
 58 | ) -> Union[List[TraceResult], TraceResultDict]:
 59 |     """
 60 |     Compute the probability distribution(s) of a target variable given conditions.
 61 | 
 62 |     Parameters:
 63 |         target (str): The target variable name.
 64 |         conditions (Dict[str, Any], optional): A dictionary mapping variable names
 65 |         flatten (bool, optional): If True, return a flattened dictionary.
 66 |         **conditions_kwargs: Additional conditions to apply
 67 | 
 68 |     Returns:
 69 |         A dictionary mapping probabilities to the target's possible values.
 70 |     """
 71 |     _assert_dict_xor_kwargs(conditions, conditions_kwargs)
 72 | 
 73 |     # If evidence was already passed, consume it
 74 |     evidence: Dict[str, Set[str]] = __evidence__ or {}
 75 | 
 76 |     # Build conditions
 77 |     if conditions_kwargs:
 78 |         conditions = conditions_kwargs
 79 |     if conditions:
 80 |         build_evidence(conditions, evidence)
 81 | 
 82 |     # Get the targets
 83 |     target_tup = _tupilize(target)
 84 |     target_roots = tuple(_find_roots(target_tup))
 85 | 
 86 |     # List is empty, raise an error
 87 |     if not target_tup:
 88 |         raise ValueError("Please pass at least one valid target.")
 89 | 
 90 |     # If there is only one target, return the result
 91 |     if len(target_roots) == 1:
 92 |         return _pull_target(target_roots[0], evidence)
 93 | 
 94 |     # If flatten is true, return a dictionary of targets
 95 |     if flatten:
 96 |         return {root: _pull_target(root, evidence) for root in target_roots}
 97 | 
 98 |     # Otherwise, return a expanded dictionary of targets
 99 |     output: Dict[str, Any] = {}
100 |     for root in target_roots:
101 |         parts = root.split(".")
102 |         d = output
103 |         for part in parts[:-1]:
104 |             if part not in d:
105 |                 d[part] = {}
106 |             d = d[part]
107 |         output[parts[-1]] = _pull_target(root, evidence)
108 |     return output
109 | 
110 | 
111 | def _pull_target(target: str, evidence: Dict[str, Set[str]]) -> List[TraceResult]:
112 |     """
113 |     Gets the probability distribution for a target variable given conditions.
114 |     """
115 |     possibilities = NETWORK.trace(target=target, evidence=evidence)
116 |     if not possibilities:
117 |         raise RestrictiveConstraints(
118 |             f"Restraints are too restrictive. No possible values for {target}."
119 |         )
120 |     data = lookup_value_list(possibilities.keys())
121 |     data = map(orjson.loads, data)
122 |     probs = possibilities.values()
123 |     resp = [
124 |         TraceResult(value=value, probability=probability) for value, probability in zip(data, probs)
125 |     ]
126 |     resp.sort(key=lambda x: x.probability, reverse=True)
127 |     return resp
128 | 


--------------------------------------------------------------------------------
/fpgen/unpacker.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | from typing import List, Tuple
 3 | 
 4 | import numpy as np
 5 | from indexed_zstd import IndexedZstdFile
 6 | 
 7 | from .pkgman import VALUES_DATA, VALUES_JSON, __is_module__, extract_json
 8 | 
 9 | 
10 | def load_values_json() -> List[Tuple[str, List[str]]]:
11 |     values_json = extract_json(VALUES_JSON)
12 |     return list(values_json.items())
13 | 
14 | 
15 | if not __is_module__():
16 |     # Do not attempt to load values.json if we are running as a module
17 |     VALUE_PAIRS = load_values_json()
18 | 
19 | 
20 | def base85_to_int(s: str) -> int:
21 |     # Decode using base85
22 |     decoded_bytes = base64.b85decode(s)
23 |     # Convert bytes to integer
24 |     return int.from_bytes(decoded_bytes, byteorder='big')
25 | 
26 | 
27 | def get_dat_file():
28 |     """
29 |     Returns a seekable file descriptor (or indexed zst file)
30 |     """
31 |     if VALUES_DATA.exists():
32 |         return open(VALUES_DATA, 'rb')
33 |     elif (zst_path := VALUES_DATA.with_suffix('.dat.zst')).exists():
34 |         return IndexedZstdFile(str(zst_path))
35 | 
36 |     raise FileNotFoundError(f'Missing required file: {VALUES_DATA}')
37 | 
38 | 
39 | def lookup_value(index):
40 |     offset, length = VALUE_PAIRS[base85_to_int(index)]
41 |     file = get_dat_file()
42 |     file.seek(int(offset, 16))
43 |     data = file.read(length).decode('utf-8')
44 |     file.close()
45 |     return data
46 | 
47 | 
48 | def lookup_value_list(index_list):
49 |     """
50 |     Returns a list of values from the data file given a list of lookup values
51 |     """
52 |     # Empty numpy array of len(index_list)
53 |     value_map = np.empty(len(index_list), dtype=object)
54 | 
55 |     file = get_dat_file()
56 |     # Read in order from lowest index to highest
57 |     sorted_indices = sorted(
58 |         (base85_to_int(lookup_index), n) for n, lookup_index in enumerate(index_list)
59 |     )
60 | 
61 |     for index, n in sorted_indices:
62 |         offset, length = VALUE_PAIRS[index]
63 |         file.seek(int(offset, 16))
64 |         # Set to key in order of the original list
65 |         value_map[n] = file.read(length).decode('utf-8')
66 | 
67 |     file.close()
68 |     return value_map
69 | 


--------------------------------------------------------------------------------
/fpgen/utils.py:
--------------------------------------------------------------------------------
  1 | from collections.abc import MutableMapping
  2 | from typing import (
  3 |     Any,
  4 |     Dict,
  5 |     Iterable,
  6 |     Iterator,
  7 |     List,
  8 |     Literal,
  9 |     Mapping,
 10 |     Optional,
 11 |     Set,
 12 |     Tuple,
 13 |     Union,
 14 |     overload,
 15 | )
 16 | 
 17 | import orjson
 18 | 
 19 | from .bayesian_network import BayesianNetwork, StrContainer
 20 | from .exceptions import (
 21 |     InvalidConstraints,
 22 |     InvalidNode,
 23 |     NetworkError,
 24 |     NodePathError,
 25 |     RestrictiveConstraints,
 26 | )
 27 | from .pkgman import NETWORK_FILE, __is_module__
 28 | from .structs import CaseInsensitiveDict
 29 | from .unpacker import lookup_value_list
 30 | 
 31 | # Load the network. (unless we're running as a module)
 32 | NETWORK: BayesianNetwork
 33 | if __is_module__():
 34 |     NETWORK = None  # type: ignore
 35 | else:
 36 |     NETWORK = BayesianNetwork(NETWORK_FILE)
 37 | 
 38 | 
 39 | def _assert_network_exists() -> None:
 40 |     """
 41 |     Quick helper to confirm NETWORK is defined
 42 |     """
 43 |     if NETWORK is None:
 44 |         raise NetworkError("FPGEN_NO_INIT is enabled. Cannot access the network.")
 45 | 
 46 | 
 47 | def query(
 48 |     target: str, *, flatten: bool = False, sort: bool = False
 49 | ) -> Union[Dict[str, Any], List[Any]]:
 50 |     """
 51 |     Query a list of possibilities given a target.
 52 | 
 53 |     Parameters:
 54 |         target (str): Target node to query possible values for
 55 |         flatten (bool, optional): Whether to flatten the output dictionary
 56 |         sort (bool, optional): Whether to sort the output arrays
 57 |     """
 58 |     _assert_network_exists()
 59 | 
 60 |     # Check node list first
 61 |     values = _lookup_possibilities(target, casefold=False)
 62 |     if values:
 63 |         output: Union[Tuple, map]
 64 |         output = tuple(map(orjson.loads, values))
 65 |         # Merge dicts if data is all dicts, else just return a deduped list
 66 |         if all(isinstance(d, dict) for d in output):
 67 |             # Flatten the output dict before returning if needed
 68 |             return _maybe_flatten(flatten, _merge_dicts(output, sort=sort))
 69 |         else:
 70 |             # Dedupe the list
 71 |             return _dedupe(output, sort=sort)
 72 | 
 73 |     # Target is within a node. Need to look up the tree
 74 |     nested_keys: List[str] = []
 75 |     root_data = _lookup_root_possibilities(
 76 |         target, nested_keys=nested_keys, none_if_missing=True, casefold=False
 77 |     )
 78 |     if root_data is not None:
 79 |         # Read possibile values as jsons
 80 |         output = map(orjson.loads, root_data[1])
 81 |         # Pull the item at the target path
 82 |         output = map(lambda d: _at_path(d, nested_keys), output)
 83 |         output = tuple(output)
 84 | 
 85 |         # If they are all dicts, merge them
 86 |         if all(isinstance(d, dict) for d in output):
 87 |             # Flatten the output dict if needed
 88 |             return _maybe_flatten(flatten, _merge_dicts(output, sort=sort))
 89 | 
 90 |         # Return a deduped list
 91 |         return _dedupe(output, sort=sort)
 92 | 
 93 |     # Search down the tree
 94 |     data = _search_downward(target)
 95 |     resp: Dict[str, List[Any]] = {
 96 |         # Remove the current node path
 97 |         key.removeprefix(f'{target}.'): [
 98 |             # Parse each possible value via orjson
 99 |             orjson.loads(d)
100 |             for d in (_lookup_possibilities(key, casefold=False) or tuple())
101 |         ]
102 |         for key in data
103 |     }
104 |     if flatten:
105 |         # May need to flatten further
106 |         return _flatten({node: _dedupe(values, sort=sort) for node, values in resp.items()})
107 |     return _unflatten(resp, sort=sort)
108 | 
109 | 
110 | """
111 | Helper functions for searching for nodes up/down the network
112 | """
113 | 
114 | 
115 | def _at_path(data: Mapping, path: StrContainer, *, casefold=False) -> Any:
116 |     """
117 |     Gets the value in nested dictionary given its path
118 |     """
119 |     for key in path:
120 |         if casefold:
121 |             data = CaseInsensitiveDict(data)
122 |         if not isinstance(data, MutableMapping) or key not in data:
123 |             raise NodePathError(key)
124 |         data = data[key]
125 |     return data
126 | 
127 | 
128 | @overload
129 | def _lookup_root_possibilities(
130 |     key: str,
131 |     nested_keys: Optional[List[str]] = None,
132 |     casefold: bool = True,
133 |     none_if_missing: Literal[False] = False,
134 | ) -> Tuple[str, Dict[str, Any]]: ...
135 | 
136 | 
137 | @overload
138 | def _lookup_root_possibilities(
139 |     key: str,
140 |     nested_keys: Optional[List[str]] = None,
141 |     casefold: bool = True,
142 |     none_if_missing: Literal[True] = True,
143 | ) -> Optional[Tuple[str, Dict[str, Any]]]: ...
144 | 
145 | 
146 | def _lookup_root_possibilities(
147 |     key: str,
148 |     nested_keys: Optional[List[str]] = None,
149 |     casefold: bool = True,
150 |     none_if_missing: bool = False,
151 | ) -> Optional[Tuple[str, Dict[str, Any]]]:
152 |     """
153 |     Finds the first avaliable root node of a given key, and queries its possibilities
154 |     """
155 |     if not key:
156 |         raise InvalidNode('Key cannot be empty.')
157 |     while key:
158 |         keys = key.rsplit('.', 1)
159 |         # Ran out of keys to parse
160 |         if len(keys) != 2:
161 |             if none_if_missing:
162 |                 return None
163 |             raise InvalidNode(f'{key} is not a valid node')
164 |         key, sliced_key = keys
165 | 
166 |         if nested_keys is not None:
167 |             nested_keys.append(sliced_key)
168 | 
169 |         # if a nested key is avaliable, enter it
170 |         possible_values = _lookup_possibilities(key, casefold)
171 |         # iterate backwards until we find the node
172 |         if possible_values is not None:
173 |             break
174 | 
175 |     if possible_values is None:
176 |         if none_if_missing:
177 |             return None
178 |         raise InvalidNode(f'{key} is not a valid node')
179 | 
180 |     if nested_keys:
181 |         nested_keys.reverse()
182 | 
183 |     return key, possible_values
184 | 
185 | 
186 | def _lookup_possibilities(node_name: str, casefold: bool = True) -> Optional[Dict[str, Any]]:
187 |     """
188 |     Returns the possible values for the given node name.
189 |     Returns as a dictionary {value: lookup_index}
190 |     """
191 |     if node_name not in NETWORK.nodes_by_name:
192 |         return None
193 | 
194 |     lookup_values = NETWORK.nodes_by_name[node_name].possible_values
195 |     actual_values = lookup_value_list(lookup_values)
196 | 
197 |     return {
198 |         (actual.casefold() if casefold else actual): lookup
199 |         for actual, lookup in zip(actual_values, lookup_values)
200 |     }
201 | 
202 | 
203 | def _search_downward(domain: str) -> Iterable[str]:
204 |     """
205 |     Searches for all nodes that begin with a specific key
206 |     """
207 |     found = False
208 |     for i, node in enumerate(NETWORK.nodes_by_name.keys()):
209 |         if not node.startswith(domain):
210 |             continue
211 |         # Check if its a . afterward
212 |         key_len = len(domain)
213 |         if len(node) > key_len and node[key_len] != '.':
214 |             continue
215 |         if not found:
216 |             found = True
217 |         # Get the original case
218 |         yield NETWORK.node_names[i]
219 | 
220 |     if not found:
221 |         raise InvalidNode(f'Unknown node: "{domain}"')
222 | 
223 | 
224 | def _find_roots(targets: Union[str, StrContainer]) -> Iterator[str]:
225 |     """
226 |     Given a list of targets, return all nodes that make up that target's data
227 |     """
228 |     for target in targets:
229 |         target = target.casefold()
230 |         while True:
231 |             # Found a valid target
232 |             if target in NETWORK.nodes_by_name:
233 |                 yield target
234 |                 break
235 | 
236 |             keys = target.rsplit('.', 1)
237 |             if len(keys) > 1:
238 |                 # Move target back 1
239 |                 target = keys[0]
240 |                 continue
241 | 
242 |             # We are at the root key.
243 |             # Find potential keys before quitting
244 |             yield from _search_downward(keys[0])
245 |             break
246 | 
247 | 
248 | def _reassemble_targets(targets: StrContainer, fingerprint: Dict[str, Any]):
249 |     result = {}
250 |     for target in targets:
251 |         try:
252 |             data = _at_path(fingerprint, target.split('.'), casefold=True)
253 |         except NodePathError as key:
254 |             raise InvalidNode(f"'{target}' is not a valid key path (missing {key}).")
255 |         result[target] = data
256 |     return result
257 | 
258 | 
259 | """
260 | Miscellaneous python list/dict helpers
261 | """
262 | 
263 | 
264 | def _dedupe(lst: Iterable[Any], sort: bool) -> List[Any]:
265 |     """
266 |     Group items by their type, deduping each group
267 |     """
268 |     groups: Dict[type, Any] = {}
269 |     for item in lst:
270 |         t = type(item)
271 |         if t not in groups:
272 |             groups[t] = []
273 |         # Only add item if it's not already in its type group
274 |         if item not in groups[t]:
275 |             groups[t].append(item)
276 | 
277 |     result = []
278 |     # Process groups in order sorted by type name
279 |     for t in sorted(groups.keys(), key=lambda typ: typ.__name__):
280 |         items = groups[t]
281 |         # Do not sort if `sort` is False, or if type is unhashable
282 |         if not sort or t in (list, dict):
283 |             result.extend(items)
284 |         else:
285 |             result.extend(sorted(items))
286 |     return result
287 | 
288 | 
289 | def _unflatten(dictionary, sort: bool) -> Dict[str, Any]:
290 |     """
291 |     Unflatten dicts and dedupe any nested lists
292 |     """
293 |     result_dict: Dict[str, Any] = dict()
294 |     for key, value in dictionary.items():
295 |         parts = key.split(".")
296 |         d = result_dict
297 |         for part in parts[:-1]:
298 |             if part not in d:
299 |                 d[part] = dict()
300 |             d = d[part]
301 |         # Dedupe lists
302 |         if isinstance(value, list):
303 |             value = _dedupe(value, sort=sort)
304 |         d[parts[-1]] = value
305 |     return result_dict
306 | 
307 | 
308 | def _flatten(dictionary: Dict[str, Any], parent_key=False) -> Dict[str, Any]:
309 |     """
310 |     Turn a nested dictionary into a flattened dictionary
311 |     https://stackoverflow.com/questions/6027558/flatten-nested-dictionaries-compressing-keys
312 |     """
313 |     items: List[Tuple[str, Any]] = []
314 |     for key, value in dictionary.items():
315 |         new_key = str(parent_key) + '.' + key if parent_key else key
316 |         if isinstance(value, dict):
317 |             items.extend(_flatten(value, new_key).items())
318 |         else:
319 |             items.append((new_key, value))
320 |     return dict(items)
321 | 
322 | 
323 | def _maybe_flatten(flatten: Optional[bool], data):
324 |     if not isinstance(data, dict):
325 |         return data
326 |     if flatten:
327 |         return _flatten(data)
328 |     return data
329 | 
330 | 
331 | def _merge_dicts(dict_list: Iterable[Dict[str, Any]], sort: bool) -> Dict[str, Any]:
332 |     """
333 |     Merge items in a list of dicts into a dict of merged values.
334 |     For a given key, if all values are dicts, merge them recursively.
335 |     If all values are lists, flatten them into a single list and dedupe.
336 |     Otherwise, dedupe the list of values.
337 |     """
338 |     if not dict_list:
339 |         return {}
340 | 
341 |     merged: Dict[str, Any] = {}
342 |     # Get the union of keys from all dictionaries.
343 |     all_keys: Set[str] = set()
344 |     for d in dict_list:
345 |         all_keys.update(d.keys())
346 | 
347 |     for key in all_keys:
348 |         # Get the list of values for the current key, skipping dicts that don't have it
349 |         values = [d[key] for d in dict_list if key in d]
350 | 
351 |         if all(isinstance(v, dict) for v in values):
352 |             # Merge dictionaries recursively
353 |             merged[key] = _merge_dicts(values, sort=sort)
354 |         elif all(isinstance(v, list) for v in values):
355 |             # Merge lists
356 |             merged_list = []
357 |             for lst in values:
358 |                 merged_list.extend(lst)
359 |             merged[key] = _dedupe(merged_list, sort=sort)
360 |         else:
361 |             # For mixed/scalar values, dedupe
362 |             merged[key] = _dedupe(values, sort=sort)
363 | 
364 |     return merged
365 | 
366 | 
367 | def _tupilize(value) -> Union[List[str], Tuple[str, ...]]:
368 |     """
369 |     If a value is not a tuple or list, wrap it in a tuple
370 |     """
371 |     return value if isinstance(value, (tuple, list)) else (value,)
372 | 
373 | 
374 | """
375 | Parse user input
376 | """
377 | 
378 | 
379 | def _flatten_conditions(
380 |     dictionary: Mapping[str, Any], parent_key: str = '', casefold: bool = False
381 | ) -> Dict[str, Any]:
382 |     """
383 |     Flattens the passed list of conditions
384 |     """
385 |     # Original flattening logic from here:
386 |     # https://stackoverflow.com/questions/6027558/flatten-nested-dictionaries-compressing-keys
387 |     items: List[Tuple[str, Any]] = []
388 |     for key, value in dictionary.items():
389 |         new_key = parent_key + '.' + key if parent_key else key
390 |         if isinstance(value, MutableMapping):
391 |             items.extend(_flatten_conditions(value, new_key).items())
392 |         else:
393 |             # If we have a tuple or set, treat it as an array of possible values
394 |             if isinstance(value, (set, tuple)):
395 |                 value = tuple(orjson.dumps(v).decode() for v in value)
396 |             # If we have a function, don't flatten it
397 |             elif not callable(value):
398 |                 value = orjson.dumps(value).decode()
399 |             if casefold:
400 |                 new_key = new_key.casefold()
401 |             items.append((new_key, value))
402 |     return dict(items)
403 | 
404 | 
405 | def build_evidence(
406 |     conditions: Dict[str, Any], evidence: Dict[str, Set[str]], strict: Optional[bool] = None
407 | ) -> None:
408 |     """
409 |     Builds evidence based on the user's inputted conditions
410 |     """
411 |     if strict is None:
412 |         strict = True
413 | 
414 |     # Flatten to match the format of the fingerprint network
415 |     conditions = _flatten_conditions(conditions, casefold=True)
416 | 
417 |     for key, value in conditions.items():
418 |         possible_values = _lookup_possibilities(key)
419 | 
420 |         # Handle nested keys
421 |         nested_keys: List[str] = []
422 |         if possible_values is None:
423 |             key, possible_values = _lookup_root_possibilities(key, nested_keys)
424 |         # Get the real name for the key
425 |         key = NETWORK.nodes_by_name[key].name
426 | 
427 |         evidence[key] = set()
428 | 
429 |         for value_con in _tupilize(value):
430 |             # Read the passed value
431 |             if callable(value_con):
432 |                 val = value_con  # Callable
433 |             else:
434 |                 val = orjson.loads(value_con.casefold())  # Dict/list/str data
435 | 
436 |             # Handle nested keys by filtering out possible values that dont
437 |             # match the value at the target
438 |             if nested_keys:
439 |                 nested_keys = list(map(lambda s: s.casefold(), nested_keys))
440 |                 for poss_value, lookup_index in possible_values.items():
441 |                     # Parse the dictionary
442 |                     outputted_possible = orjson.loads(poss_value)
443 | 
444 |                     # Check if the value is a possible value at the nested path
445 |                     try:
446 |                         target_value = _at_path(outputted_possible, nested_keys)
447 |                     except NodePathError:
448 |                         continue  # Path didn't exist, bad data
449 |                     if callable(val) and val(target_value):
450 |                         evidence[key].add(lookup_index)
451 |                     elif target_value == val:
452 |                         evidence[key].add(lookup_index)
453 | 
454 |                 # If nothing was found, raise an error
455 |                 if not evidence[key]:
456 |                     if callable(val):
457 |                         # Callable didnt work
458 |                         raise InvalidConstraints(
459 |                             f'The passed function ({val}) yielded no possible values for "{key}" '
460 |                             f'at "{".".join(nested_keys)}"'
461 |                         )
462 |                     raise InvalidConstraints(
463 |                         f'{value_con} is not a possible value for "{key}" '
464 |                         f'at "{".".join(nested_keys)}"'
465 |                     )
466 |                 continue
467 | 
468 |             # ===== NON NESTED VALUE HANDLING =====
469 | 
470 |             # If callable, get all possible values then check for matches
471 |             if callable(val):
472 |                 # Filter by val(x)
473 |                 found = False
474 |                 for possible_val, lookup_index in possible_values.items():
475 |                     if val(orjson.loads(possible_val)):
476 |                         evidence[key].add(lookup_index)
477 |                         found = True
478 |                 if not found:
479 |                     raise InvalidConstraints(
480 |                         f'The passed function ({val}) yielded no possible values for "{key}"'
481 |                     )
482 |                 continue
483 | 
484 |             # Non nested values can be handled by directly checking possible_values
485 |             lookup_index = possible_values.get(value_con.casefold())
486 |             # Value is not possible
487 |             if lookup_index is None:
488 |                 raise InvalidConstraints(f'{value_con} is not a possible value for "{key}"')
489 |             evidence[key].add(lookup_index)
490 | 
491 |     # Validate the evidence is possible (or try to relax the evidence if strict is False)
492 |     while True:
493 |         try:
494 |             NETWORK.validate_evidence(evidence)
495 |         except RestrictiveConstraints as e:
496 |             if strict:
497 |                 raise e
498 |             # Remove the last added key
499 |             evidence.pop(next(iter(evidence.keys())))
500 |         break
501 | 
502 | 
503 | def _assert_dict_xor_kwargs(
504 |     passed_dict: Optional[Dict[str, Any]], passed_kwargs: Optional[Dict[str, Any]]
505 | ) -> None:
506 |     """
507 |     Confirms a dict is either passed as an argument, xor kwargs are passed.
508 |     """
509 |     # Exit if neither is passed
510 |     if passed_dict is None and passed_kwargs is None:
511 |         return
512 |     # Exit if both are passed
513 |     if passed_dict and passed_kwargs:
514 |         raise ValueError(
515 |             f"Cannot pass values as dict & as parameters: {passed_dict} and {passed_kwargs}"
516 |         )
517 |     # Raise if incorrect type
518 |     if not isinstance(passed_dict or passed_kwargs, dict):
519 |         raise ValueError(
520 |             "Invalid argument. Constraints must be passed as kwargs or as a dictionary."
521 |         )
522 | 
523 | 
524 | """
525 | Convert network output to human readable output
526 | """
527 | 
528 | 
529 | def _make_output_dict(data: Dict[str, Any], flatten: Optional[bool]) -> Dict[str, Any]:
530 |     """
531 |     Unflattens & builds the output dictionary
532 |     """
533 |     if flatten:
534 |         # Get key value pairs directly without building structure
535 |         values = lookup_value_list(data.values())
536 |         for key, value in zip(data.keys(), values):
537 |             data[key] = orjson.loads(value)
538 |         # Flatten node values that themselves are dicts
539 |         return _flatten(data)
540 | 
541 |     # Original unflattening logic from here:
542 |     # https://stackoverflow.com/questions/6037503/python-unflatten-dict
543 |     result_dict: Dict[str, Any] = dict()
544 |     for key, value in zip(data.keys(), lookup_value_list(data.values())):
545 |         parts = key.split(".")
546 |         d = result_dict
547 |         for part in parts[:-1]:
548 |             if part not in d:
549 |                 d[part] = dict()
550 |             d = d[part]
551 |         d[parts[-1]] = orjson.loads(value)
552 | 
553 |     return result_dict
554 | 
555 | 
556 | # Only expose `query` publicly
557 | __all__ = ('query',)
558 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["poetry-core>=1.0.0"]
 3 | build-backend = "poetry.core.masonry.api"
 4 | 
 5 | [tool.poetry]
 6 | name = "fpgen"
 7 | version = "1.3.0"
 8 | description = "A fast & comprehensive browser fingerprint generator"
 9 | authors = ["daijro <daijro.dev@gmail.com>"]
10 | license = "Apache-2.0"
11 | readme = "README.md"
12 | repository = "https://github.com/scrapfly/fingerprint-generator"
13 | keywords = [
14 |     "client",
15 |     "headers",
16 |     "fingerprint",
17 |     "generator",
18 |     "browser",
19 |     "http",
20 |     "scraping",
21 |     "requests",
22 |     "playwright",
23 | ]
24 | classifiers = [
25 |     "Topic :: Internet :: WWW/HTTP",
26 |     "Topic :: Internet :: WWW/HTTP :: Browsers",
27 |     "Topic :: Software Development :: Libraries :: Python Modules",
28 | ]
29 | 
30 | [tool.poetry.dependencies]
31 | python = "^3.8"
32 | click = "*"
33 | indexed-zstd = "*"
34 | orjson = "*"
35 | numpy = "*"
36 | zstandard = "*"
37 | httpx = "*"
38 | 
39 | [tool.poetry.scripts]
40 | fpgen = "fpgen.__main__:cli"


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
1 | # Tests
2 | 
3 | Dirty test cases for internal use.
4 | 
5 | Confirms that user inputs & conditional probabilities are handled correctly.


--------------------------------------------------------------------------------
/tests/failed_cases.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Test for exceptions that should be raised.
  3 | """
  4 | 
  5 | import sys
  6 | from pathlib import Path
  7 | 
  8 | sys.path.append(str(Path(__file__).parent.parent))
  9 | from fpgen import Generator, generate, trace
 10 | from fpgen.exceptions import InvalidConstraints, InvalidNode, RestrictiveConstraints
 11 | 
 12 | # Generator with incorrect values
 13 | try:
 14 |     generate(screen={'width': 1920, 'height': 5000})
 15 | except InvalidConstraints as e:
 16 |     print('[PASSED] VERIFIER:', e)
 17 | else:
 18 |     print('[FAILED]')
 19 | 
 20 | # Incorrect nested node
 21 | try:
 22 |     generate(screen={'width': 1920, 'heighta': 1080})
 23 | except InvalidConstraints as e:
 24 |     print('[PASSED] VERIFIER:', e)
 25 | else:
 26 |     print('[FAILED]')
 27 | 
 28 | try:
 29 |     generate(screena={'width': 1920, 'height': 1080})
 30 | except InvalidNode as e:
 31 |     print('[PASSED] VERIFIER:', e)
 32 | else:
 33 |     print('[FAILED]')
 34 | 
 35 | 
 36 | # Test generator constructor
 37 | try:
 38 |     Generator(screen={'width': 1920, 'height': lambda x: x > 5000})
 39 | except InvalidConstraints as e:
 40 |     print('[PASSED] VERIFIER:', e)
 41 | else:
 42 |     print('[FAILED]')
 43 | 
 44 | # Test that Chrome is 100% probability on ChromeOS
 45 | result = trace(target='browser', os='ChromeOS')
 46 | try:
 47 |     assert len(result) == 1 and result[0].value == 'Chrome' and result[0].probability == 1.0
 48 | except AssertionError:
 49 |     print('[FAILED] TRACE: Expected Chrome 100% probability on ChromeOS, got:', result)
 50 | else:
 51 |     print('[PASSED] TRACE: Chrome is 100% probability on ChromeOS')
 52 | 
 53 | # Test that Firefox/Safari are impossible on ChromeOS
 54 | try:
 55 |     result = trace(target='browser', os='ChromeOS', browser=('Firefox', 'Safari'))
 56 | except RestrictiveConstraints as e:
 57 |     print('[PASSED] TRACE: Firefox/Safari correctly impossible on ChromeOS')
 58 | else:
 59 |     print('[FAILED] TRACE: Expected exception for impossible Firefox/Safari on ChromeOS')
 60 | 
 61 | # Test Firefox/Safari probabilities without OS constraint
 62 | result = trace(target='browser', browser=('Firefox', 'Safari'))
 63 | try:
 64 |     assert len(result) == 2
 65 |     assert all(r.value in ('Firefox', 'Safari') for r in result)
 66 |     assert abs(sum(r.probability for r in result) - 1.0) < 0.0001
 67 | except AssertionError:
 68 |     print('[FAILED] TRACE: Expected valid Firefox/Safari probabilities, got:', result)
 69 | else:
 70 |     print('[PASSED] TRACE: Valid Firefox/Safari probabilities')
 71 | 
 72 | # Test Chrome is 100% on ChromeOS even with Firefox/Safari allowed
 73 | result = trace(target='browser', os='ChromeOS', browser=('Firefox', 'Safari', 'Chrome'))
 74 | try:
 75 |     assert len(result) == 1 and result[0].value == 'Chrome' and result[0].probability == 1.0
 76 | except AssertionError:
 77 |     print(
 78 |         '[FAILED] TRACE: Expected Chrome 100% on ChromeOS with Firefox/Safari allowed, got:', result
 79 |     )
 80 | else:
 81 |     print('[PASSED] TRACE: Chrome is 100% on ChromeOS with Firefox/Safari allowed')
 82 | 
 83 | try:
 84 |     trace(target='browser', os='ChromeOS', browser='Firefox')
 85 | except RestrictiveConstraints as e:
 86 |     print('[PASSED] TRACE: Firefox cannot exist on ChromeOS')
 87 | else:
 88 |     print('[FAILED] TRACE: Should have raised an exception.')
 89 | 
 90 | 
 91 | # Basic passing case
 92 | try:
 93 |     data = generate(os='ChromeOS')
 94 | except Exception as e:
 95 |     print('[FAILED] GENERATE: Basic target case failed:', e)
 96 | else:
 97 |     print('[PASSED] GENERATE: Passed basic case (control)')
 98 | 
 99 | try:
100 |     data = generate(os='ChromeOS', target='browser')
101 | except Exception as e:
102 |     print('[FAILED] GENERATE: Basic target case failed:', e)
103 | else:
104 |     print('[PASSED] GENERATE: Passed basic case (control)')
105 | 
106 | # Test impossible constraint handling
107 | try:
108 |     data = generate(browser='firefox', os='ChromeOS')
109 | except RestrictiveConstraints as e:
110 |     print('[PASSED] GENERATE: Throws on impossible constraint', e)
111 | else:
112 |     print('[FAILED] GENERATE: Firefox should not exist on ChromeOS')
113 | 
114 | try:
115 |     data = generate(browser='firefox', os='ChromeOS', target='browser')
116 | except RestrictiveConstraints as e:
117 |     print('[PASSED] GENERATE: Throws on impossible constraint', e)
118 | else:
119 |     print('[FAILED] GENERATE: Firefox should not exist on ChromeOS (target)')
120 | 
121 | try:
122 |     data = generate(browser=('firefox', 'safari', 'chrome'), os='ChromeOS', target='browser')
123 |     assert data == 'Chrome'
124 | except AssertionError:
125 |     print('[FAILED] GENERATE: Doesn\'t pick the correct constraint')
126 | else:
127 |     print('[PASSED] GENERATE: Picks the correct constraint')
128 | 


--------------------------------------------------------------------------------
/tests/generator_matches_trace.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from pathlib import Path
 3 | 
 4 | sys.path.append(str(Path(__file__).parent.parent))
 5 | import fpgen
 6 | 
 7 | # Controls whether to use `target` or not (uses a different algorithm)
 8 | USE_TARGET = False
 9 | # Number of trials to run
10 | TRIALS = 500
11 | 
12 | TESTS = [
13 |     ('browser', {'os': ('Linux', 'MacOS'), 'browser': ('Edge', 'Safari')}),
14 |     ('browser', {'os': ('Linux')}),
15 |     ('browser', {'navigator': {'productsub': '20100101'}}),
16 |     ('navigator.language', {'browser': 'firefox'}),
17 |     ('os', {'browser': 'Firefox'}),
18 | ]
19 | 
20 | LIMIT = 10
21 | 
22 | for target, constraints in TESTS:
23 |     pretty_constraints = ', '.join(f'{k}={v}' for k, v in constraints.items())
24 |     print(f'Expected P({target}|{pretty_constraints}):')
25 |     print(fpgen.trace(target=target, **constraints)[:LIMIT])
26 |     print(f'Expected P({target}):')
27 |     print(fpgen.trace(target=target)[:LIMIT])
28 | 
29 |     # Collected data
30 |     browser_data = {}
31 | 
32 |     for _ in range(TRIALS):
33 |         print(f'Trial {_+1}/{TRIALS}', end='\r')
34 |         if USE_TARGET:
35 |             a = fpgen.generate(flatten=True, target=target, **constraints)
36 |         else:
37 |             a = fpgen.generate(flatten=True, **constraints)[target]
38 |         browser_data[a] = browser_data.get(a, 0) + 1
39 | 
40 |     print(f"\nGenerator test using P({target}|{pretty_constraints}):")
41 |     for browser, count in sorted(browser_data.items(), key=lambda x: x[1], reverse=True)[:LIMIT]:
42 |         print(f"{browser}: {count/TRIALS*100:.2f}%")
43 |     print('\n---------\n')
44 | 


--------------------------------------------------------------------------------
/tests/profile.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Track where the generator hangs.
 3 | """
 4 | 
 5 | import sys
 6 | from pathlib import Path
 7 | 
 8 | sys.path.append(str(Path(__file__).parent.parent))
 9 | from pyinstrument import Profiler
10 | 
11 | from fpgen import generate
12 | 
13 | profiler = Profiler(interval=0.001)
14 | profiler.start()
15 | 
16 | # Intensive constraint
17 | generate(
18 |     browser=('Firefox', 'Chrome'),
19 |     client={'browser': {'major': ('134', '133')}},
20 | )
21 | profiler.stop()
22 | 
23 | print(profiler.output_text(show_all=True))
24 | 


--------------------------------------------------------------------------------
/tests/speed_test.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Test the speed of the generate, trace, and query functions.
 3 | """
 4 | 
 5 | import sys
 6 | from pathlib import Path
 7 | from timeit import timeit
 8 | 
 9 | sys.path.append(str(Path(__file__).parent.parent))
10 | 
11 | from contextlib import contextmanager
12 | from time import time
13 | 
14 | from fpgen import Generator, generate, query, trace
15 | 
16 | 
17 | @contextmanager
18 | def timer(description):
19 |     print(description)
20 |     start = time()
21 |     yield
22 |     print('Time to run', time() - start)
23 | 
24 | 
25 | # Trace operations
26 | with timer('trace (target=navigator.language)'):
27 |     trace(target='navigator.language')
28 | 
29 | with timer("trace (target=browser, os=('Linux', 'MacOS'), browser=('Edge', 'Safari'))"):
30 |     trace('browser', os=('Linux', 'MacOS'), browser=('Edge', 'Safari'))
31 | 
32 | # Generate operations
33 | with timer('generating (full fingerprint)'):
34 |     generate()
35 | 
36 | # with timer('generating (navigator.language=en-US)'):
37 | #     generate({'navigator.language': 'en-US'})
38 | 
39 | with timer('generating (navigator.language=en-US, target=browser)'):
40 |     generate({'navigator.language': ('en-US', 'en-GB', 'fr', 'de-DE')}, target='browser')
41 | 
42 | with timer('generating (browser=firefox, target=browser)'):
43 |     generate(browser=('firefox'), target='browser')
44 | 
45 | with timer('generating (browser=firefox, target=navigator.language)'):
46 |     generate(browser=('firefox'), target='navigator.language')
47 | 
48 | with timer('generating with a function constraint'):
49 |     generate({'window': {'innerWidth': lambda x: x > 1000}}, target='navigator.language')
50 | 
51 | # Timeit tests
52 | 
53 | print('\n========== TIMEIT TESTS ==========\n')
54 | 
55 | print('Generator test')
56 | print(timeit(lambda: generate(), number=100), '/ 100')
57 | 
58 | print('Generator test (with nested constraints)')
59 | print(timeit(lambda: generate(screen={'width': 1920, 'height': 1080}), number=10), '/ 10')
60 | 
61 | gen = Generator(screen={'width': 1920, 'height': 1080})
62 | 
63 | print('Generator test with nested constraints (pre-filtered)')
64 | print(timeit(lambda: gen.generate(), number=10), '/ 10')
65 | 
66 | print('Query test (large value set)')
67 | print(timeit(lambda: query('allFonts'), number=10), '/ 10')
68 | 
69 | print('Trace test')
70 | print(timeit(lambda: trace('browser'), number=100), '/ 100')
71 | 
72 | print('Trace test (large value set)')
73 | print(timeit(lambda: trace('allFonts'), number=10), '/ 10')
74 | 


--------------------------------------------------------------------------------
/tests/user_inputs.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Tests various user inputs to confirm that they are handled correctly.
  3 | """
  4 | 
  5 | import sys
  6 | from pathlib import Path
  7 | 
  8 | sys.path.append(str(Path(__file__).parent.parent))
  9 | import re
 10 | 
 11 | from fpgen import Generator, __network__, query
 12 | 
 13 | 
 14 | def test(name, out, x=None):
 15 |     if x and not re.search(x, str(out)):
 16 |         print(
 17 |             ('> FAILED: ' + name).ljust(60, ' '),
 18 |             out,
 19 |         )
 20 |         return
 21 |     print(('PASSED! ' + name).ljust(60, ' '), str(out)[:100])
 22 | 
 23 | 
 24 | # Test options
 25 | FLATTEN_OPT = False
 26 | SORT_OPT = False
 27 | 
 28 | print(' ==== GENERATOR TESTS ==== ')
 29 | 
 30 | gen = Generator(flatten=FLATTEN_OPT)
 31 | 
 32 | test(
 33 |     'Generate full fp',
 34 |     gen.generate(),
 35 |     '^{.*',
 36 | )
 37 | test(
 38 |     'Generate full fp (non strict, fallback os)',
 39 |     gen.generate(
 40 |         os='ChromeOS',
 41 |         browser='Firefox',
 42 |         strict=False,
 43 |         target=('os', 'browser'),
 44 |     ),
 45 |     ".*'browser': 'Firefox'.*",
 46 | )
 47 | test(
 48 |     'Target (at node) given constraint',
 49 |     gen.generate(
 50 |         {'navigator': {'productsub': '20100101'}},
 51 |         target=('headers.user-agent'),
 52 |     ),
 53 |     'Firefox',
 54 | )
 55 | test(
 56 |     'Target (within node) given constraint',
 57 |     gen.generate(
 58 |         {'navigator': {'productsub': '20100101'}},
 59 |         target=['screen.width', 'screen.height'],
 60 |     ),
 61 |     r'\d+',
 62 | )
 63 | test(
 64 |     'Target (above node) given constraint',
 65 |     gen.generate(
 66 |         {'navigator': {'productsub': '20100101'}},
 67 |         target='navigator',
 68 |     ),
 69 |     '^{.*$',
 70 | )
 71 | test(
 72 |     'Passing multi constraints (no target)',
 73 |     gen.generate(
 74 |         browser=('Firefox', 'Chrome'),
 75 |         client={'browser': {'major': ('134', '133')}},
 76 |     ),
 77 |     r'\b13[34]\b',
 78 | )
 79 | test(
 80 |     'Passing multi constraints (target)',
 81 |     gen.generate(
 82 |         browser=('Firefox', 'Chrome'),
 83 |         client={'browser': {'major': ('134', '133')}},
 84 |         target='client',
 85 |     ),
 86 |     r'\b13[34]\b',
 87 | )
 88 | gpu = {
 89 |     'vendor': 'Google Inc. (Apple)',
 90 |     'renderer': 'ANGLE (Apple, ANGLE Metal Renderer: Apple M2, Unspecified Version)',
 91 | }
 92 | test(
 93 |     'Constraint tgt (at node, `window`)',
 94 |     gen.generate(gpu=gpu, target='window'),
 95 | )
 96 | test(
 97 |     'Constraint tgt (above nodes, `navigator`)',
 98 |     gen.generate(gpu=gpu, target='navigator'),
 99 | )
100 | test(
101 |     'Constraint tgt (within node, `screen.width`)',
102 |     gen.generate(gpu=gpu, target='screen.width'),
103 | )
104 | 
105 | print('\n ==== QUERY TESTS ==== ')
106 | 
107 | test(
108 |     'Possibilities (at node 1, `navigator.productsub`)',
109 |     query('navigator.productsub', flatten=FLATTEN_OPT, sort=SORT_OPT),
110 | )
111 | test(
112 |     'Possibilities (at node 2, `screen`)',
113 |     query('screen', flatten=FLATTEN_OPT, sort=SORT_OPT),
114 | )
115 | test(
116 |     'Possibilities (above nodes, `navigator`)',
117 |     query('navigator', flatten=FLATTEN_OPT, sort=SORT_OPT),
118 | )
119 | test(
120 |     'Possibilities (within node, `screen.width`)',
121 |     query('screen.width', flatten=FLATTEN_OPT, sort=SORT_OPT),
122 | )
123 | 
124 | 
125 | print(' ==== QUERY ALL NODES ==== ')
126 | 
127 | for node in __network__.nodes_by_name:
128 |     # Get the possibilities
129 |     print(f'Listing possibilities for {node}')
130 |     a = query(node, flatten=FLATTEN_OPT, sort=SORT_OPT)
131 |     print(str(a)[:100])
132 | 


--------------------------------------------------------------------------------