├── .github ├── dependabot.yaml └── workflows │ ├── auto-merge.yml │ ├── disperse.yml │ ├── pythonpackage.yml │ └── wheels.yaml ├── .gitignore ├── CLAUDE.md ├── CODE_OF_CONDUCT.md ├── COPYING ├── Cargo.toml ├── MANIFEST.in ├── README.md ├── SECURITY.md ├── disperse.toml ├── fastbencode ├── __init__.py ├── _bencode_py.py ├── py.typed └── tests │ ├── __init__.py │ └── test_bencode.py ├── pyproject.toml ├── setup.py └── src └── lib.rs /.github/dependabot.yaml: -------------------------------------------------------------------------------- 1 | # Keep GitHub Actions up to date with GitHub's Dependabot... 2 | # https://docs.github.com/en/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot 3 | # https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#package-ecosystem 4 | version: 2 5 | updates: 6 | - package-ecosystem: "github-actions" 7 | directory: "/" 8 | schedule: 9 | interval: weekly 10 | - package-ecosystem: "pip" 11 | directory: "/" 12 | schedule: 13 | interval: weekly 14 | -------------------------------------------------------------------------------- /.github/workflows/auto-merge.yml: -------------------------------------------------------------------------------- 1 | name: Dependabot auto-merge 2 | on: pull_request_target 3 | 4 | permissions: 5 | pull-requests: write 6 | contents: write 7 | 8 | jobs: 9 | dependabot: 10 | runs-on: ubuntu-latest 11 | if: ${{ github.actor == 'dependabot[bot]' }} 12 | steps: 13 | - name: Dependabot metadata 14 | id: metadata 15 | uses: dependabot/fetch-metadata@v2 16 | with: 17 | github-token: "${{ secrets.GITHUB_TOKEN }}" 18 | - name: Enable auto-merge for Dependabot PRs 19 | run: gh pr merge --auto --squash "$PR_URL" 20 | env: 21 | PR_URL: ${{github.event.pull_request.html_url}} 22 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 23 | -------------------------------------------------------------------------------- /.github/workflows/disperse.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Disperse configuration 3 | 4 | "on": 5 | - push 6 | 7 | jobs: 8 | build: 9 | 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@v4 14 | - uses: jelmer/action-disperse-validate@v2 15 | -------------------------------------------------------------------------------- /.github/workflows/pythonpackage.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Python package 3 | 4 | "on": [push, pull_request] 5 | 6 | jobs: 7 | build: 8 | 9 | runs-on: ${{ matrix.os }} 10 | strategy: 11 | matrix: 12 | os: [ubuntu-latest, macos-latest, windows-latest] 13 | python-version: ['3.10', '3.11', '3.12', '3.13', '3.9'] 14 | fail-fast: false 15 | 16 | steps: 17 | - uses: actions/checkout@v4 18 | - name: Set up Python ${{ matrix.python-version }} 19 | uses: actions/setup-python@v5 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | - name: Install dependencies 23 | run: | 24 | python -m pip install --upgrade pip cython 25 | pip install -U pip ".[dev]" 26 | - name: Style checks 27 | run: | 28 | python -m ruff check . 29 | python -m ruff format --check . 30 | - name: Test suite run (pure Python) 31 | run: | 32 | python -m unittest fastbencode.tests.test_suite 33 | env: 34 | PYTHONHASHSEED: random 35 | - name: Run cargo fmt 36 | run: | 37 | cargo fmt --all -- --check 38 | - name: Install in editable mode 39 | run: | 40 | pip install -e . 41 | - name: Test suite run (with C extension) 42 | run: | 43 | python -m unittest fastbencode.tests.test_suite 44 | env: 45 | PYTHONHASHSEED: random 46 | -------------------------------------------------------------------------------- /.github/workflows/wheels.yaml: -------------------------------------------------------------------------------- 1 | name: Build Python distributions 2 | 3 | on: 4 | push: 5 | pull_request: 6 | schedule: 7 | - cron: "0 6 * * *" # Daily 6AM UTC build 8 | 9 | jobs: 10 | build-wheels: 11 | runs-on: ${{ matrix.os }} 12 | strategy: 13 | matrix: 14 | os: [ubuntu-latest, macos-latest, windows-latest] 15 | fail-fast: true 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | - uses: actions/setup-python@v5 20 | - name: Install dependencies 21 | run: | 22 | python -m pip install --upgrade pip 23 | pip install setuptools wheel cibuildwheel setuptools-rust 24 | - name: Set up QEMU 25 | uses: docker/setup-qemu-action@v3 26 | if: "matrix.os == 'ubuntu-latest'" 27 | - name: Build wheels 28 | run: python -m cibuildwheel --output-dir wheelhouse 29 | - name: Upload wheels 30 | uses: actions/upload-artifact@v4 31 | with: 32 | name: artifact-${{ matrix.os }} 33 | path: ./wheelhouse/*.whl 34 | 35 | build-sdist: 36 | runs-on: ubuntu-latest 37 | steps: 38 | - uses: actions/checkout@v4 39 | - uses: actions/setup-python@v5 40 | - name: Install dependencies 41 | run: | 42 | python -m pip install --upgrade pip 43 | pip install build 44 | - name: Build sdist 45 | run: python -m build --sdist 46 | - name: Upload sdist 47 | uses: actions/upload-artifact@v4 48 | with: 49 | name: artifact-sdist 50 | path: ./dist/*.tar.gz 51 | 52 | test-sdist: 53 | needs: 54 | - build-sdist 55 | runs-on: ubuntu-latest 56 | steps: 57 | - uses: actions/setup-python@v5 58 | - name: Install dependencies 59 | run: | 60 | python -m pip install --upgrade pip 61 | # Upgrade packging to avoid a bug in twine. 62 | # See https://github.com/pypa/twine/issues/1216 63 | pip install "twine>=6.1.0" "packaging>=24.2" 64 | - name: Download sdist 65 | uses: actions/download-artifact@v4 66 | with: 67 | name: artifact-sdist 68 | path: dist 69 | - name: Test sdist 70 | run: twine check dist/* 71 | - name: Test installation from sdist 72 | run: pip install dist/*.tar.gz 73 | 74 | publish: 75 | runs-on: ubuntu-latest 76 | needs: 77 | - build-wheels 78 | - build-sdist 79 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') 80 | permissions: 81 | id-token: write 82 | environment: 83 | name: pypi 84 | url: https://pypi.org/p/fastbencode 85 | steps: 86 | - name: Download distributions 87 | uses: actions/download-artifact@v4 88 | with: 89 | merge-multiple: true 90 | pattern: artifact-* 91 | path: dist 92 | - name: Publish package distributions to PyPI 93 | uses: pypa/gh-action-pypi-publish@release/v1 94 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.so 2 | build 3 | __pycache__ 4 | fastbencode.egg-info 5 | *.pyc 6 | dist 7 | *~ 8 | .mypy_cache 9 | *.swp 10 | *.swo 11 | *.swn 12 | 13 | # Rust 14 | target/ 15 | Cargo.lock 16 | **/*.rs.bk 17 | -------------------------------------------------------------------------------- /CLAUDE.md: -------------------------------------------------------------------------------- 1 | # CLAUDE.md 2 | 3 | This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. 4 | 5 | ## Repository Overview 6 | 7 | fastbencode is an implementation of the bencode serialization format originally used by BitTorrent. The package includes both a pure-Python version and an optional Rust extension based on PyO3. Both provide the same functionality, but the Rust extension provides significantly better performance. 8 | 9 | ## Development Commands 10 | 11 | ### Building and Installing 12 | 13 | ```bash 14 | # Install in development mode without Rust extension (pure Python) 15 | pip3 install -e . 16 | 17 | # Build the Rust extension 18 | python3 setup.py build_ext --inplace 19 | ``` 20 | 21 | ### Running Tests 22 | 23 | ```bash 24 | # Run all tests (both Python and Rust implementations) 25 | PYTHONPATH=. python3 -m unittest fastbencode.tests.test_suite 26 | 27 | # Run specific test file 28 | PYTHONPATH=. python3 -m unittest fastbencode.tests.test_bencode 29 | 30 | # Run a specific test 31 | PYTHONPATH=. python3 -m unittest fastbencode.tests.test_bencode.TestBencodeDecode.test_int 32 | ``` 33 | 34 | ### Linting 35 | 36 | ```bash 37 | # Run ruff linter 38 | ruff check . 39 | ``` 40 | 41 | ## Architecture 42 | 43 | The codebase consists of two main implementations of the bencode format: 44 | 45 | 1. **Pure Python implementation** (`fastbencode/_bencode_py.py`): 46 | - Provides baseline functionality for all bencode operations 47 | - Used as a fallback when the Rust extension is not available 48 | 49 | 2. **Rust implementation** (`src/lib.rs`): 50 | - Implemented using PyO3 for Python bindings 51 | - Provides the same functionality with better performance 52 | - Compiled into `_bencode_rs` module 53 | 54 | Both implementations provide the following key functions: 55 | - `bencode`: Encode Python objects to bencode format 56 | - `bdecode`: Decode bencode data to Python objects 57 | - `bencode_utf8`: Like bencode but handles UTF-8 strings 58 | - `bdecode_utf8`: Like bdecode but decodes strings as UTF-8 59 | - `bdecode_as_tuple`: Like bdecode but returns tuples instead of lists 60 | 61 | The main module (`fastbencode/__init__.py`) tries to import the Rust implementation first and falls back to the pure Python one if that fails. 62 | 63 | ## Testing Strategy 64 | 65 | The test suite uses Python's unittest framework with a custom test multiplier that runs all tests against both the Python and Rust implementations. This ensures feature parity between the two implementations. 66 | 67 | Tests are defined in `fastbencode/tests/test_bencode.py` and cover: 68 | - Encoding and decoding basic types (integers, strings, lists, dictionaries) 69 | - Error handling for malformed input 70 | - Edge cases (large values, recursion limits, etc.) 71 | - UTF-8 handling 72 | 73 | ## Codebase Specifics 74 | 75 | - Dictionary keys in bencode must be bytestrings for performance reasons 76 | - The `Bencached` class is used for pre-encoded values to avoid re-encoding 77 | - Recursion depth is limited to prevent stack overflows (with special handling for different Python implementations) 78 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socioeconomic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at core@breezy-vcs.org. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "fastbencode" 3 | version = "0.3.2" 4 | edition = "2021" 5 | authors = ["Jelmer Vernooij "] 6 | license = "GPL-2.0-or-later" 7 | description = "Implementation of bencode with Rust implementation" 8 | readme = "README.md" 9 | repository = "https://github.com/breezy-team/fastbencode" 10 | 11 | [lib] 12 | name = "fastbencode__bencode_rs" 13 | crate-type = ["cdylib"] 14 | 15 | [dependencies] 16 | pyo3 = { version = "0.24.2", features = ["extension-module"] } 17 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include COPYING 3 | include fastbencode/py.typed 4 | include Cargo.toml 5 | recursive-include src *.rs 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | fastbencode 2 | =========== 3 | 4 | fastbencode is an implementation of the bencode serialization format originally 5 | used by BitTorrent. 6 | 7 | The package includes both a pure-Python version and an optional Rust extension 8 | based on PyO3. Both provide the same functionality, but the Rust extension 9 | provides significantly better performance. 10 | 11 | Example: 12 | 13 | >>> from fastbencode import bencode, bdecode 14 | >>> bencode([1, 2, b'a', {b'd': 3}]) 15 | b'li1ei2e1:ad1:di3eee' 16 | >>> bdecode(bencode([1, 2, b'a', {b'd': 3}])) 17 | [1, 2, b'a', {b'd': 3}] 18 | 19 | The default ``bencode``/``bdecode`` functions just operate on 20 | bytestrings. Use ``bencode_utf8`` / ``bdecode_utf8`` to 21 | serialize/deserialize all plain strings as UTF-8 bytestrings. 22 | Note that for performance reasons, all dictionary keys still have to be 23 | bytestrings. 24 | 25 | License 26 | ======= 27 | fastbencode is available under the GNU GPL, version 2 or later. 28 | 29 | Copyright 30 | ========= 31 | 32 | * Original Pure-Python bencoder © Petru Paler 33 | * Split out from Bazaar/Breezy by Jelmer Vernooij 34 | * Rust extension © Jelmer Vernooij 35 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Reporting a Vulnerability 4 | 5 | Please report security issues by e-mail to core@breezy-vcs.org. 6 | -------------------------------------------------------------------------------- /disperse.toml: -------------------------------------------------------------------------------- 1 | tag-name = "v$VERSION" 2 | verify-command = "python3 -m unittest fastbencode.tests.test_suite" 3 | tarball-location = [] 4 | release-timeout = 5 5 | 6 | [[update_version]] 7 | path = "fastbencode/__init__.py" 8 | new-line = "__version__ = $TUPLED_VERSION" 9 | -------------------------------------------------------------------------------- /fastbencode/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021-2023 Jelmer Vernooij 2 | # 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU General Public License 14 | # along with this program; if not, write to the Free Software 15 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 16 | 17 | """Wrapper around the bencode Rust and Python implementations.""" 18 | 19 | from typing import Type 20 | 21 | __version__ = (0, 3, 2) 22 | 23 | 24 | Bencached: Type 25 | 26 | try: 27 | from fastbencode._bencode_rs import ( 28 | Bencached, 29 | bdecode, 30 | bdecode_as_tuple, 31 | bdecode_utf8, 32 | bencode, 33 | bencode_utf8, 34 | ) 35 | except ModuleNotFoundError as e: 36 | import warnings 37 | 38 | warnings.warn(f"failed to load compiled extension: {e}", UserWarning) 39 | 40 | # Fall back to pure Python implementation 41 | from ._bencode_py import ( # noqa: F401 42 | Bencached, 43 | bdecode, 44 | bdecode_as_tuple, 45 | bdecode_utf8, 46 | bencode, 47 | bencode_utf8, 48 | ) 49 | -------------------------------------------------------------------------------- /fastbencode/_bencode_py.py: -------------------------------------------------------------------------------- 1 | # bencode structured encoding 2 | # 3 | # Written by Petru Paler 4 | # 5 | # Permission is hereby granted, free of charge, to any person 6 | # obtaining a copy of this software and associated documentation files 7 | # (the "Software"), to deal in the Software without restriction, 8 | # including without limitation the rights to use, copy, modify, merge, 9 | # publish, distribute, sublicense, and/or sell copies of the Software, 10 | # and to permit persons to whom the Software is furnished to do so, 11 | # subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be 14 | # included in all copies or substantial portions of the Software. 15 | # 16 | # Modifications copyright (C) 2008 Canonical Ltd 17 | # Modifications copyright (C) 2021-2023 Jelmer Vernooij 18 | 19 | 20 | from typing import Callable, Dict, List, Type 21 | 22 | 23 | class BDecoder: 24 | def __init__(self, yield_tuples=False, bytestring_encoding=None) -> None: 25 | """Constructor. 26 | 27 | :param yield_tuples: if true, decode "l" elements as tuples rather than 28 | lists. 29 | """ 30 | self.yield_tuples = yield_tuples 31 | self.bytestring_encoding = bytestring_encoding 32 | decode_func = {} 33 | decode_func[b"l"] = self.decode_list 34 | decode_func[b"d"] = self.decode_dict 35 | decode_func[b"i"] = self.decode_int 36 | decode_func[b"0"] = self.decode_bytes 37 | decode_func[b"1"] = self.decode_bytes 38 | decode_func[b"2"] = self.decode_bytes 39 | decode_func[b"3"] = self.decode_bytes 40 | decode_func[b"4"] = self.decode_bytes 41 | decode_func[b"5"] = self.decode_bytes 42 | decode_func[b"6"] = self.decode_bytes 43 | decode_func[b"7"] = self.decode_bytes 44 | decode_func[b"8"] = self.decode_bytes 45 | decode_func[b"9"] = self.decode_bytes 46 | self.decode_func = decode_func 47 | 48 | def decode_int(self, x, f): 49 | f += 1 50 | newf = x.index(b"e", f) 51 | n = int(x[f:newf]) 52 | if x[f : f + 2] == b"-0": 53 | raise ValueError 54 | elif x[f : f + 1] == b"0" and newf != f + 1: 55 | raise ValueError 56 | return (n, newf + 1) 57 | 58 | def decode_bytes(self, x, f): 59 | colon = x.index(b":", f) 60 | n = int(x[f:colon]) 61 | if x[f : f + 1] == b"0" and colon != f + 1: 62 | raise ValueError 63 | colon += 1 64 | d = x[colon : colon + n] 65 | if self.bytestring_encoding: 66 | d = d.decode(self.bytestring_encoding) 67 | return (d, colon + n) 68 | 69 | def decode_list(self, x, f): 70 | r, f = [], f + 1 71 | while x[f : f + 1] != b"e": 72 | v, f = self.decode_func[x[f : f + 1]](x, f) 73 | r.append(v) 74 | if self.yield_tuples: 75 | r = tuple(r) 76 | return (r, f + 1) 77 | 78 | def decode_dict(self, x, f): 79 | r, f = {}, f + 1 80 | lastkey = None 81 | while x[f : f + 1] != b"e": 82 | k, f = self.decode_bytes(x, f) 83 | if lastkey is not None and lastkey >= k: 84 | raise ValueError 85 | lastkey = k 86 | r[k], f = self.decode_func[x[f : f + 1]](x, f) 87 | return (r, f + 1) 88 | 89 | def bdecode(self, x): 90 | if not isinstance(x, bytes): 91 | raise TypeError 92 | try: 93 | r, l = self.decode_func[x[:1]](x, 0) # noqa: E741 94 | except (IndexError, KeyError, OverflowError) as e: 95 | raise ValueError(str(e)) 96 | if l != len(x): # noqa: E741 97 | raise ValueError 98 | return r 99 | 100 | 101 | _decoder = BDecoder() 102 | bdecode = _decoder.bdecode 103 | 104 | _tuple_decoder = BDecoder(True) 105 | bdecode_as_tuple = _tuple_decoder.bdecode 106 | 107 | _utf8_decoder = BDecoder(bytestring_encoding="utf-8") 108 | bdecode_utf8 = _utf8_decoder.bdecode 109 | 110 | 111 | class Bencached: 112 | __slots__ = ["bencoded"] 113 | 114 | def __init__(self, s) -> None: 115 | self.bencoded = s 116 | 117 | 118 | class BEncoder: 119 | def __init__(self, bytestring_encoding=None): 120 | self.bytestring_encoding = bytestring_encoding 121 | self.encode_func: Dict[Type, Callable[[object, List[bytes]], None]] = { 122 | Bencached: self.encode_bencached, 123 | int: self.encode_int, 124 | bytes: self.encode_bytes, 125 | list: self.encode_list, 126 | tuple: self.encode_list, 127 | dict: self.encode_dict, 128 | bool: self.encode_bool, 129 | str: self.encode_str, 130 | } 131 | 132 | def encode_bencached(self, x, r): 133 | r.append(x.bencoded) 134 | 135 | def encode_bool(self, x, r): 136 | self.encode_int(int(x), r) 137 | 138 | def encode_int(self, x, r): 139 | r.extend((b"i", int_to_bytes(x), b"e")) 140 | 141 | def encode_bytes(self, x, r): 142 | r.extend((int_to_bytes(len(x)), b":", x)) 143 | 144 | def encode_list(self, x, r): 145 | r.append(b"l") 146 | for i in x: 147 | self.encode(i, r) 148 | r.append(b"e") 149 | 150 | def encode_dict(self, x, r): 151 | r.append(b"d") 152 | ilist = sorted(x.items()) 153 | for k, v in ilist: 154 | r.extend((int_to_bytes(len(k)), b":", k)) 155 | self.encode(v, r) 156 | r.append(b"e") 157 | 158 | def encode_str(self, x, r): 159 | if self.bytestring_encoding is None: 160 | raise TypeError( 161 | "string found but no encoding specified. " 162 | "Use bencode_utf8 rather bencode?" 163 | ) 164 | return self.encode_bytes(x.encode(self.bytestring_encoding), r) 165 | 166 | def encode(self, x, r): 167 | self.encode_func[type(x)](x, r) 168 | 169 | 170 | def int_to_bytes(n): 171 | return b"%d" % n 172 | 173 | 174 | def bencode(x): 175 | r = [] 176 | encoder = BEncoder() 177 | encoder.encode(x, r) 178 | return b"".join(r) 179 | 180 | 181 | def bencode_utf8(x): 182 | r = [] 183 | encoder = BEncoder(bytestring_encoding="utf-8") 184 | encoder.encode(x, r) 185 | return b"".join(r) 186 | -------------------------------------------------------------------------------- /fastbencode/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/breezy-team/fastbencode/6e7ae16c169bfadf9d5ae995c8f9bc00797247e2/fastbencode/py.typed -------------------------------------------------------------------------------- /fastbencode/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2007, 2009, 2010 Canonical Ltd 2 | # 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU General Public License 14 | # along with this program; if not, write to the Free Software 15 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 16 | # 17 | 18 | """Tests for fastbencode.""" 19 | 20 | import unittest 21 | 22 | 23 | def test_suite(): 24 | names = [ 25 | "test_bencode", 26 | ] 27 | module_names = ["fastbencode.tests." + name for name in names] 28 | result = unittest.TestSuite() 29 | loader = unittest.TestLoader() 30 | suite = loader.loadTestsFromNames(module_names) 31 | result.addTests(suite) 32 | 33 | return result 34 | -------------------------------------------------------------------------------- /fastbencode/tests/test_bencode.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2007, 2009, 2010, 2016 Canonical Ltd 2 | # 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU General Public License 14 | # along with this program; if not, write to the Free Software 15 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 16 | 17 | """Tests for bencode structured encoding.""" 18 | 19 | import copy 20 | import sys 21 | from unittest import TestCase, TestSuite 22 | 23 | 24 | def get_named_object(module_name, member_name=None): 25 | """Get the Python object named by a given module and member name. 26 | 27 | This is usually much more convenient than dealing with ``__import__`` 28 | directly:: 29 | 30 | >>> doc = get_named_object('pyutils', 'get_named_object.__doc__') 31 | >>> doc.splitlines()[0] 32 | 'Get the Python object named by a given module and member name.' 33 | 34 | :param module_name: a module name, as would be found in sys.modules if 35 | the module is already imported. It may contain dots. e.g. 'sys' or 36 | 'os.path'. 37 | :param member_name: (optional) a name of an attribute in that module to 38 | return. It may contain dots. e.g. 'MyClass.some_method'. If not 39 | given, the named module will be returned instead. 40 | :raises: ImportError or AttributeError. 41 | """ 42 | # We may have just a module name, or a module name and a member name, 43 | # and either may contain dots. __import__'s return value is a bit 44 | # unintuitive, so we need to take care to always return the object 45 | # specified by the full combination of module name + member name. 46 | if member_name: 47 | # Give __import__ a from_list. It will return the last module in 48 | # the dotted module name. 49 | attr_chain = member_name.split(".") 50 | from_list = attr_chain[:1] 51 | obj = __import__(module_name, {}, {}, from_list) 52 | for attr in attr_chain: 53 | obj = getattr(obj, attr) 54 | else: 55 | # We're just importing a module, no attributes, so we have no 56 | # from_list. __import__ will return the first module in the dotted 57 | # module name, so we look up the module from sys.modules. 58 | __import__(module_name, globals(), locals(), []) 59 | obj = sys.modules[module_name] 60 | return obj 61 | 62 | 63 | def iter_suite_tests(suite): 64 | """Return all tests in a suite, recursing through nested suites.""" 65 | if isinstance(suite, TestCase): 66 | yield suite 67 | elif isinstance(suite, TestSuite): 68 | for item in suite: 69 | yield from iter_suite_tests(item) 70 | else: 71 | raise Exception(f"unknown type {type(suite)!r} for object {suite!r}") 72 | 73 | 74 | def clone_test(test, new_id): 75 | """Clone a test giving it a new id. 76 | 77 | :param test: The test to clone. 78 | :param new_id: The id to assign to it. 79 | :return: The new test. 80 | """ 81 | new_test = copy.copy(test) 82 | new_test.id = lambda: new_id 83 | # XXX: Workaround , which 84 | # causes cloned tests to share the 'details' dict. This makes it hard to 85 | # read the test output for parameterized tests, because tracebacks will be 86 | # associated with irrelevant tests. 87 | try: 88 | new_test._TestCase__details 89 | except AttributeError: 90 | # must be a different version of testtools than expected. Do nothing. 91 | pass 92 | else: 93 | # Reset the '__details' dict. 94 | new_test._TestCase__details = {} 95 | return new_test 96 | 97 | 98 | def apply_scenario(test, scenario): 99 | """Copy test and apply scenario to it. 100 | 101 | :param test: A test to adapt. 102 | :param scenario: A tuple describing the scenario. 103 | The first element of the tuple is the new test id. 104 | The second element is a dict containing attributes to set on the 105 | test. 106 | :return: The adapted test. 107 | """ 108 | new_id = f"{test.id()}({scenario[0]})" 109 | new_test = clone_test(test, new_id) 110 | for name, value in scenario[1].items(): 111 | setattr(new_test, name, value) 112 | return new_test 113 | 114 | 115 | def apply_scenarios(test, scenarios, result): 116 | """Apply the scenarios in scenarios to test and add to result. 117 | 118 | :param test: The test to apply scenarios to. 119 | :param scenarios: An iterable of scenarios to apply to test. 120 | :return: result 121 | :seealso: apply_scenario 122 | """ 123 | for scenario in scenarios: 124 | result.addTest(apply_scenario(test, scenario)) 125 | return result 126 | 127 | 128 | def multiply_tests(tests, scenarios, result): 129 | """Multiply tests_list by scenarios into result. 130 | 131 | This is the core workhorse for test parameterisation. 132 | 133 | Typically the load_tests() method for a per-implementation test suite will 134 | call multiply_tests and return the result. 135 | 136 | :param tests: The tests to parameterise. 137 | :param scenarios: The scenarios to apply: pairs of (scenario_name, 138 | scenario_param_dict). 139 | :param result: A TestSuite to add created tests to. 140 | 141 | This returns the passed in result TestSuite with the cross product of all 142 | the tests repeated once for each scenario. Each test is adapted by adding 143 | the scenario name at the end of its id(), and updating the test object's 144 | __dict__ with the scenario_param_dict. 145 | 146 | >>> import tests.test_sampler 147 | >>> r = multiply_tests( 148 | ... tests.test_sampler.DemoTest('test_nothing'), 149 | ... [('one', dict(param=1)), 150 | ... ('two', dict(param=2))], 151 | ... TestUtil.TestSuite()) 152 | >>> tests = list(iter_suite_tests(r)) 153 | >>> len(tests) 154 | 2 155 | >>> tests[0].id() 156 | 'tests.test_sampler.DemoTest.test_nothing(one)' 157 | >>> tests[0].param 158 | 1 159 | >>> tests[1].param 160 | 2 161 | """ 162 | for test in iter_suite_tests(tests): 163 | apply_scenarios(test, scenarios, result) 164 | return result 165 | 166 | 167 | def permute_tests_for_extension( 168 | standard_tests, loader, py_module_name, ext_module_name 169 | ): 170 | """Helper for permutating tests against an extension module. 171 | 172 | This is meant to be used inside a modules 'load_tests()' function. It will 173 | create 2 scenarios, and cause all tests in the 'standard_tests' to be run 174 | against both implementations. Setting 'test.module' to the appropriate 175 | module. See tests.test__chk_map.load_tests as an example. 176 | 177 | :param standard_tests: A test suite to permute 178 | :param loader: A TestLoader 179 | :param py_module_name: The python path to a python module that can always 180 | be loaded, and will be considered the 'python' implementation. (eg 181 | '_chk_map_py') 182 | :param ext_module_name: The python path to an extension module. If the 183 | module cannot be loaded, a single test will be added, which notes that 184 | the module is not available. If it can be loaded, all standard_tests 185 | will be run against that module. 186 | :return: (suite, feature) suite is a test-suite that has all the permuted 187 | tests. feature is the Feature object that can be used to determine if 188 | the module is available. 189 | """ 190 | py_module = get_named_object(py_module_name) 191 | scenarios = [ 192 | ("python", {"module": py_module}), 193 | ] 194 | suite = loader.suiteClass() 195 | try: 196 | __import__(ext_module_name) 197 | except ModuleNotFoundError: 198 | pass 199 | else: 200 | scenarios.append(("C", {"module": get_named_object(ext_module_name)})) 201 | result = multiply_tests(standard_tests, scenarios, suite) 202 | return result 203 | 204 | 205 | def load_tests(loader, standard_tests, pattern): 206 | return permute_tests_for_extension( 207 | standard_tests, 208 | loader, 209 | "fastbencode._bencode_py", 210 | "fastbencode._bencode_rs", 211 | ) 212 | 213 | 214 | class RecursionLimit: 215 | """Context manager that lowers recursion limit for testing.""" 216 | 217 | def __init__(self, limit=100) -> None: 218 | self._new_limit = limit 219 | self._old_limit = sys.getrecursionlimit() 220 | 221 | def __enter__(self): 222 | sys.setrecursionlimit(self._new_limit) 223 | return self 224 | 225 | def __exit__(self, *exc_info): 226 | sys.setrecursionlimit(self._old_limit) 227 | 228 | 229 | class TestBencodeDecode(TestCase): 230 | module = None 231 | 232 | def _check(self, expected, source): 233 | self.assertEqual(expected, self.module.bdecode(source)) 234 | 235 | def _run_check_error(self, exc, bad): 236 | """Check that bdecoding a string raises a particular exception.""" 237 | self.assertRaises(exc, self.module.bdecode, bad) 238 | 239 | def test_int(self): 240 | self._check(0, b"i0e") 241 | self._check(4, b"i4e") 242 | self._check(123456789, b"i123456789e") 243 | self._check(-10, b"i-10e") 244 | self._check(int("1" * 1000), b"i" + (b"1" * 1000) + b"e") 245 | 246 | def test_long(self): 247 | self._check(12345678901234567890, b"i12345678901234567890e") 248 | self._check(-12345678901234567890, b"i-12345678901234567890e") 249 | 250 | def test_malformed_int(self): 251 | self._run_check_error(ValueError, b"ie") 252 | self._run_check_error(ValueError, b"i-e") 253 | self._run_check_error(ValueError, b"i-010e") 254 | self._run_check_error(ValueError, b"i-0e") 255 | self._run_check_error(ValueError, b"i00e") 256 | self._run_check_error(ValueError, b"i01e") 257 | self._run_check_error(ValueError, b"i-03e") 258 | self._run_check_error(ValueError, b"i") 259 | self._run_check_error(ValueError, b"i123") 260 | self._run_check_error(ValueError, b"i341foo382e") 261 | 262 | def test_string(self): 263 | self._check(b"", b"0:") 264 | self._check(b"abc", b"3:abc") 265 | self._check(b"1234567890", b"10:1234567890") 266 | 267 | def test_large_string(self): 268 | self.assertRaises(ValueError, self.module.bdecode, b"2147483639:foo") 269 | 270 | def test_malformed_string(self): 271 | self._run_check_error(ValueError, b"10:x") 272 | self._run_check_error(ValueError, b"10:") 273 | self._run_check_error(ValueError, b"10") 274 | self._run_check_error(ValueError, b"01:x") 275 | self._run_check_error(ValueError, b"00:") 276 | self._run_check_error(ValueError, b"35208734823ljdahflajhdf") 277 | self._run_check_error(ValueError, b"432432432432432:foo") 278 | self._run_check_error(ValueError, b" 1:x") # leading whitespace 279 | self._run_check_error(ValueError, b"-1:x") # negative 280 | self._run_check_error(ValueError, b"1 x") # space vs colon 281 | self._run_check_error(ValueError, b"1x") # missing colon 282 | self._run_check_error(ValueError, (b"1" * 1000) + b":") 283 | 284 | def test_list(self): 285 | self._check([], b"le") 286 | self._check([b"", b"", b""], b"l0:0:0:e") 287 | self._check([1, 2, 3], b"li1ei2ei3ee") 288 | self._check([b"asd", b"xy"], b"l3:asd2:xye") 289 | self._check([[b"Alice", b"Bob"], [2, 3]], b"ll5:Alice3:Bobeli2ei3eee") 290 | 291 | def test_list_deepnested(self): 292 | import platform 293 | 294 | if ( 295 | platform.python_implementation() == "PyPy" 296 | or sys.version_info[:2] >= (3, 12) 297 | or self.id().endswith("(C)") 298 | ): 299 | expected = [] 300 | for i in range(99): 301 | expected = [expected] 302 | self._check(expected, (b"l" * 100) + (b"e" * 100)) 303 | else: 304 | with RecursionLimit(): 305 | self._run_check_error( 306 | RuntimeError, (b"l" * 100) + (b"e" * 100) 307 | ) 308 | 309 | def test_malformed_list(self): 310 | self._run_check_error(ValueError, b"l") 311 | self._run_check_error(ValueError, b"l01:ae") 312 | self._run_check_error(ValueError, b"l0:") 313 | self._run_check_error(ValueError, b"li1e") 314 | self._run_check_error(ValueError, b"l-3:e") 315 | 316 | def test_dict(self): 317 | self._check({}, b"de") 318 | self._check({b"": 3}, b"d0:i3ee") 319 | self._check({b"age": 25, b"eyes": b"blue"}, b"d3:agei25e4:eyes4:bluee") 320 | self._check( 321 | {b"spam.mp3": {b"author": b"Alice", b"length": 100000}}, 322 | b"d8:spam.mp3d6:author5:Alice6:lengthi100000eee", 323 | ) 324 | 325 | def test_dict_deepnested(self): 326 | if self.id().endswith("(C)"): 327 | self.skipTest("no limit recursion in Rust code") 328 | 329 | with RecursionLimit(): 330 | self._run_check_error( 331 | RuntimeError, (b"d0:" * 1000) + b"i1e" + (b"e" * 1000) 332 | ) 333 | 334 | def test_malformed_dict(self): 335 | self._run_check_error(ValueError, b"d") 336 | self._run_check_error(ValueError, b"defoobar") 337 | self._run_check_error(ValueError, b"d3:fooe") 338 | self._run_check_error(ValueError, b"di1e0:e") 339 | self._run_check_error(ValueError, b"d1:b0:1:a0:e") 340 | self._run_check_error(ValueError, b"d1:a0:1:a0:e") 341 | self._run_check_error(ValueError, b"d0:0:") 342 | self._run_check_error(ValueError, b"d0:") 343 | self._run_check_error(ValueError, b"d432432432432432432:e") 344 | 345 | def test_empty_string(self): 346 | self.assertRaises(ValueError, self.module.bdecode, b"") 347 | 348 | def test_junk(self): 349 | self._run_check_error(ValueError, b"i6easd") 350 | self._run_check_error(ValueError, b"2:abfdjslhfld") 351 | self._run_check_error(ValueError, b"0:0:") 352 | self._run_check_error(ValueError, b"leanfdldjfh") 353 | 354 | def test_unknown_object(self): 355 | self.assertRaises(ValueError, self.module.bdecode, b"relwjhrlewjh") 356 | 357 | def test_unsupported_type(self): 358 | self._run_check_error(TypeError, 1.5) 359 | self._run_check_error(TypeError, None) 360 | self._run_check_error(TypeError, lambda x: x) 361 | self._run_check_error(TypeError, object) 362 | self._run_check_error(TypeError, "ie") 363 | 364 | def test_decoder_type_error(self): 365 | self.assertRaises(TypeError, self.module.bdecode, 1) 366 | 367 | 368 | class TestBdecodeUtf8(TestCase): 369 | module = None 370 | 371 | def _check(self, expected, source): 372 | self.assertEqual(expected, self.module.bdecode_utf8(source)) 373 | 374 | def _run_check_error(self, exc, bad): 375 | """Check that bdecoding a string raises a particular exception.""" 376 | self.assertRaises(exc, self.module.bdecode_utf8, bad) 377 | 378 | def test_string(self): 379 | self._check("", b"0:") 380 | self._check("aäc", b"4:a\xc3\xa4c") 381 | self._check("1234567890", b"10:1234567890") 382 | 383 | def test_large_string(self): 384 | self.assertRaises( 385 | ValueError, self.module.bdecode_utf8, b"2147483639:foo" 386 | ) 387 | 388 | def test_malformed_string(self): 389 | self._run_check_error(ValueError, b"10:x") 390 | self._run_check_error(ValueError, b"10:") 391 | self._run_check_error(ValueError, b"10") 392 | self._run_check_error(ValueError, b"01:x") 393 | self._run_check_error(ValueError, b"00:") 394 | self._run_check_error(ValueError, b"35208734823ljdahflajhdf") 395 | self._run_check_error(ValueError, b"432432432432432:foo") 396 | self._run_check_error(ValueError, b" 1:x") # leading whitespace 397 | self._run_check_error(ValueError, b"-1:x") # negative 398 | self._run_check_error(ValueError, b"1 x") # space vs colon 399 | self._run_check_error(ValueError, b"1x") # missing colon 400 | self._run_check_error(ValueError, (b"1" * 1000) + b":") 401 | 402 | def test_empty_string(self): 403 | self.assertRaises(ValueError, self.module.bdecode_utf8, b"") 404 | 405 | def test_invalid_utf8(self): 406 | self._run_check_error(UnicodeDecodeError, b"3:\xff\xfe\xfd") 407 | 408 | 409 | class TestBencodeEncode(TestCase): 410 | module = None 411 | 412 | def _check(self, expected, source): 413 | self.assertEqual(expected, self.module.bencode(source)) 414 | 415 | def test_int(self): 416 | self._check(b"i4e", 4) 417 | self._check(b"i0e", 0) 418 | self._check(b"i-10e", -10) 419 | 420 | def test_long(self): 421 | self._check(b"i12345678901234567890e", 12345678901234567890) 422 | self._check(b"i-12345678901234567890e", -12345678901234567890) 423 | 424 | def test_string(self): 425 | self._check(b"0:", b"") 426 | self._check(b"3:abc", b"abc") 427 | self._check(b"10:1234567890", b"1234567890") 428 | 429 | def test_list(self): 430 | self._check(b"le", []) 431 | self._check(b"li1ei2ei3ee", [1, 2, 3]) 432 | self._check(b"ll5:Alice3:Bobeli2ei3eee", [[b"Alice", b"Bob"], [2, 3]]) 433 | 434 | def test_list_as_tuple(self): 435 | self._check(b"le", ()) 436 | self._check(b"li1ei2ei3ee", (1, 2, 3)) 437 | self._check(b"ll5:Alice3:Bobeli2ei3eee", ((b"Alice", b"Bob"), (2, 3))) 438 | 439 | def test_list_deep_nested(self): 440 | if self.id().endswith("(C)"): 441 | self.skipTest("no limit recursion in Rust code") 442 | 443 | top = [] 444 | lst = top 445 | for unused_i in range(1000): 446 | lst.append([]) 447 | lst = lst[0] 448 | with RecursionLimit(): 449 | self.assertRaises(RuntimeError, self.module.bencode, top) 450 | 451 | def test_dict(self): 452 | self._check(b"de", {}) 453 | self._check(b"d3:agei25e4:eyes4:bluee", {b"age": 25, b"eyes": b"blue"}) 454 | self._check( 455 | b"d8:spam.mp3d6:author5:Alice6:lengthi100000eee", 456 | {b"spam.mp3": {b"author": b"Alice", b"length": 100000}}, 457 | ) 458 | 459 | def test_dict_deep_nested(self): 460 | if self.id().endswith("(C)"): 461 | self.skipTest("no limit of recursion in Rust code") 462 | 463 | d = top = {} 464 | for i in range(1000): 465 | d[b""] = {} 466 | d = d[b""] 467 | with RecursionLimit(): 468 | self.assertRaises(RuntimeError, self.module.bencode, top) 469 | 470 | def test_bencached(self): 471 | self._check(b"i3e", self.module.Bencached(self.module.bencode(3))) 472 | 473 | def test_invalid_dict(self): 474 | self.assertRaises(TypeError, self.module.bencode, {1: b"foo"}) 475 | 476 | def test_bool(self): 477 | self._check(b"i1e", True) 478 | self._check(b"i0e", False) 479 | 480 | 481 | class TestBencodeEncodeUtf8(TestCase): 482 | module = None 483 | 484 | def _check(self, expected, source): 485 | self.assertEqual(expected, self.module.bencode_utf8(source)) 486 | 487 | def test_string(self): 488 | self._check(b"0:", "") 489 | self._check(b"3:abc", "abc") 490 | self._check(b"10:1234567890", "1234567890") 491 | 492 | def test_list(self): 493 | self._check(b"le", []) 494 | self._check(b"li1ei2ei3ee", [1, 2, 3]) 495 | self._check(b"ll5:Alice3:Bobeli2ei3eee", [["Alice", "Bob"], [2, 3]]) 496 | 497 | def test_list_as_tuple(self): 498 | self._check(b"le", ()) 499 | self._check(b"li1ei2ei3ee", (1, 2, 3)) 500 | self._check(b"ll5:Alice3:Bobeli2ei3eee", (("Alice", "Bob"), (2, 3))) 501 | 502 | def test_dict(self): 503 | self._check(b"de", {}) 504 | self._check(b"d3:agei25e4:eyes4:bluee", {b"age": 25, b"eyes": "blue"}) 505 | self._check( 506 | b"d8:spam.mp3d6:author5:Alice6:lengthi100000eee", 507 | {b"spam.mp3": {b"author": b"Alice", b"length": 100000}}, 508 | ) 509 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=61.2", 4 | "setuptools-rust>=1.0.0", 5 | ] 6 | build-backend = "setuptools.build_meta" 7 | 8 | [project] 9 | name = "fastbencode" 10 | description = "Implementation of bencode with optional fast Rust extensions" 11 | maintainers = [{name = "Breezy Developers", email = "breezy-core@googlegroups.com"}] 12 | readme = "README.md" 13 | license = "GPL-2.0-or-later" 14 | classifiers = [ 15 | "Programming Language :: Python :: 3.9", 16 | "Programming Language :: Python :: 3.10", 17 | "Programming Language :: Python :: 3.11", 18 | "Programming Language :: Python :: 3.12", 19 | "Programming Language :: Python :: 3.13", 20 | "Programming Language :: Python :: Implementation :: CPython", 21 | "Programming Language :: Python :: Implementation :: PyPy", 22 | "Operating System :: POSIX", 23 | "Operating System :: Microsoft :: Windows", 24 | ] 25 | requires-python = ">=3.9" 26 | dynamic = ["version"] 27 | dependencies = [] 28 | 29 | [project.urls] 30 | Homepage = "https://github.com/breezy-team/fastbencode" 31 | GitHub = "https://github.com/breezy-team/fastbencode" 32 | 33 | [project.optional-dependencies] 34 | rust = ["setuptools-rust>=1.0.0"] 35 | dev = [ 36 | "ruff==0.11.12" 37 | ] 38 | 39 | [tool.setuptools] 40 | packages = ["fastbencode"] 41 | include-package-data = false 42 | 43 | [tool.setuptools.dynamic] 44 | version = {attr = "fastbencode.__version__"} 45 | 46 | [tool.ruff] 47 | target-version = "py37" 48 | line-length = 79 49 | 50 | [tool.ruff.lint] 51 | select = [ 52 | "ANN", 53 | "D", 54 | "E", 55 | "F", 56 | "I", 57 | "UP", 58 | ] 59 | ignore = [ 60 | "ANN001", 61 | "ANN002", 62 | "ANN201", 63 | "ANN202", 64 | "ANN204", 65 | "D100", 66 | "D101", 67 | "D102", 68 | "D103", 69 | "D105", 70 | "D107", 71 | ] 72 | 73 | [tool.ruff.lint.pydocstyle] 74 | convention = "google" 75 | 76 | [tool.cibuildwheel] 77 | environment = {PATH="$HOME/.cargo/bin:$PATH"} 78 | before-build = "pip install -U setuptools-rust && curl https://sh.rustup.rs -sSf | sh -s -- --profile=minimal -y && rustup show" 79 | 80 | [tool.cibuildwheel.linux] 81 | skip = "*-musllinux_*" 82 | archs = ["auto", "aarch64"] 83 | before-build = "pip install -U setuptools-rust && yum -y install libatomic && curl https://sh.rustup.rs -sSf | sh -s -- --profile=minimal -y && rustup show" 84 | 85 | [tool.cibuildwheel.macos] 86 | archs = ["auto", "universal2", "x86_64", "arm64"] 87 | before-all = "rustup target add x86_64-apple-darwin aarch64-apple-darwin" 88 | skip = """\ 89 | cp39-macosx_x86_64 cp39-macosx_universal2 \ 90 | cp310-macosx_x86_64 cp310-macosx_universal2 \ 91 | cp311-macosx_x86_64 cp311-macosx_universal2 \ 92 | cp312-macosx_x86_64 cp312-macosx_universal2 \ 93 | cp313-macosx_x86_64 cp313-macosx_universal2 \ 94 | """ 95 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import sys 4 | 5 | from setuptools import setup 6 | from setuptools_rust import Binding, RustExtension 7 | 8 | setup( 9 | rust_extensions=[ 10 | RustExtension( 11 | "fastbencode._bencode_rs", 12 | binding=Binding.PyO3, 13 | py_limited_api=False, 14 | optional=sys.platform == "win32", 15 | ) 16 | ], 17 | ) 18 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_snake_case)] 2 | use pyo3::exceptions::{PyTypeError, PyValueError}; 3 | use pyo3::prelude::*; 4 | use pyo3::types::{PyBytes, PyDict, PyInt, PyList, PyString, PyTuple}; 5 | 6 | #[pyclass] 7 | struct Bencached { 8 | #[pyo3(get)] 9 | bencoded: Py, 10 | } 11 | 12 | #[pymethods] 13 | impl Bencached { 14 | #[new] 15 | fn new(s: Py) -> Self { 16 | Bencached { bencoded: s } 17 | } 18 | 19 | fn as_bytes(&self, py: Python) -> PyResult<&[u8]> { 20 | Ok(self.bencoded.as_bytes(py)) 21 | } 22 | } 23 | 24 | #[pyclass] 25 | struct Decoder { 26 | data: Vec, 27 | position: usize, 28 | yield_tuples: bool, 29 | bytestring_encoding: Option, 30 | } 31 | 32 | #[pymethods] 33 | impl Decoder { 34 | #[new] 35 | fn new( 36 | s: &Bound, 37 | yield_tuples: Option, 38 | bytestring_encoding: Option, 39 | ) -> Self { 40 | Decoder { 41 | data: s.as_bytes().to_vec(), 42 | position: 0, 43 | yield_tuples: yield_tuples.unwrap_or(false), 44 | bytestring_encoding, 45 | } 46 | } 47 | 48 | fn decode<'py>(&mut self, py: Python<'py>) -> PyResult> { 49 | let result = self.decode_object(py)?; 50 | if self.position < self.data.len() { 51 | return Err(PyValueError::new_err("junk in stream")); 52 | } 53 | Ok(result) 54 | } 55 | 56 | fn decode_object<'py>(&mut self, py: Python<'py>) -> PyResult> { 57 | if self.position >= self.data.len() { 58 | return Err(PyValueError::new_err("stream underflow")); 59 | } 60 | 61 | // Check for recursion - in a real implementation we would track recursion depth 62 | let next_byte = self.data[self.position]; 63 | 64 | match next_byte { 65 | b'0'..=b'9' => Ok(self.decode_bytes(py)?.into_any()), 66 | b'l' => { 67 | self.position += 1; 68 | Ok(self.decode_list(py)?.into_any()) 69 | } 70 | b'i' => { 71 | self.position += 1; 72 | Ok(self.decode_int(py)?.into_any()) 73 | } 74 | b'd' => { 75 | self.position += 1; 76 | Ok(self.decode_dict(py)?.into_any()) 77 | } 78 | _ => Err(PyValueError::new_err(format!( 79 | "unknown object type identifier {:?}", 80 | next_byte as char 81 | ))), 82 | } 83 | } 84 | 85 | fn read_digits(&mut self, stop_char: u8) -> PyResult { 86 | let start = self.position; 87 | while self.position < self.data.len() { 88 | let b = self.data[self.position]; 89 | if b == stop_char { 90 | break; 91 | } 92 | if (b < b'0' || b > b'9') && b != b'-' { 93 | return Err(PyValueError::new_err(format!( 94 | "Stop character {} not found: {}", 95 | stop_char as char, b as char 96 | ))); 97 | } 98 | self.position += 1; 99 | } 100 | 101 | if self.position >= self.data.len() || self.data[self.position] != stop_char { 102 | return Err(PyValueError::new_err(format!( 103 | "Stop character {} not found", 104 | stop_char as char 105 | ))); 106 | } 107 | 108 | // Check for leading zeros 109 | if self.data[start] == b'0' && self.position - start > 1 { 110 | return Err(PyValueError::new_err("leading zeros are not allowed")); 111 | } else if self.data[start] == b'-' 112 | && self.data[start + 1] == b'0' 113 | && self.position - start > 2 114 | { 115 | return Err(PyValueError::new_err("leading zeros are not allowed")); 116 | } 117 | 118 | Ok(String::from_utf8_lossy(&self.data[start..self.position]).to_string()) 119 | } 120 | 121 | fn decode_int<'py>(&mut self, py: Python<'py>) -> PyResult> { 122 | let digits = self.read_digits(b'e')?; 123 | 124 | // Move past the 'e' 125 | self.position += 1; 126 | 127 | // Check for negative zero 128 | if digits == "-0" { 129 | return Err(PyValueError::new_err("negative zero not allowed")); 130 | } 131 | 132 | // Parse the integer directly 133 | let parsed_int = match digits.parse::() { 134 | Ok(n) => n.to_object(py).into_bound(py), 135 | Err(_) => { 136 | // For very large integers, fallback to Python's conversion 137 | let py_str = PyString::new(py, &digits); 138 | 139 | let int_type = py.get_type::(); 140 | int_type.call1((py_str,))? 141 | } 142 | }; 143 | 144 | Ok(parsed_int.into_any()) 145 | } 146 | 147 | fn decode_bytes<'py>(&mut self, py: Python<'py>) -> PyResult> { 148 | let len_end_pos = self.data[self.position..].iter().position(|&b| b == b':'); 149 | if len_end_pos.is_none() { 150 | return Err(PyValueError::new_err("string len not terminated by \":\"")); 151 | } 152 | 153 | let len_end_pos = len_end_pos.unwrap() + self.position; 154 | let len_str = std::str::from_utf8(&self.data[self.position..len_end_pos]) 155 | .map_err(|_| PyValueError::new_err("invalid length string"))?; 156 | 157 | // Check for leading zeros in the length 158 | if len_str.starts_with('0') && len_str.len() > 1 { 159 | return Err(PyValueError::new_err("leading zeros are not allowed")); 160 | } 161 | 162 | let length: usize = len_str 163 | .parse() 164 | .map_err(|_| PyValueError::new_err("invalid length value"))?; 165 | 166 | // Skip past the ':' character 167 | self.position = len_end_pos + 1; 168 | 169 | if length > self.data.len() - self.position { 170 | return Err(PyValueError::new_err("stream underflow")); 171 | } 172 | 173 | let bytes_slice = &self.data[self.position..self.position + length]; 174 | self.position += length; 175 | 176 | let bytes_obj = PyBytes::new(py, bytes_slice).into_any(); 177 | 178 | // Return as bytes or decode depending on bytestring_encoding 179 | if let Some(encoding) = &self.bytestring_encoding { 180 | Ok(PyString::from_object(&bytes_obj, encoding, "strict")?.into_any()) 181 | } else { 182 | Ok(bytes_obj) 183 | } 184 | } 185 | 186 | fn decode_list<'py>(&mut self, py: Python<'py>) -> PyResult> { 187 | let mut result = Vec::new(); 188 | 189 | while self.position < self.data.len() && self.data[self.position] != b'e' { 190 | let item = self.decode_object(py)?; 191 | result.push(item); 192 | } 193 | 194 | if self.position >= self.data.len() { 195 | return Err(PyValueError::new_err("malformed list")); 196 | } 197 | 198 | // Skip the 'e' 199 | self.position += 1; 200 | 201 | if self.yield_tuples { 202 | let tuple = PyTuple::new(py, &result)?; 203 | Ok(tuple.into_any()) 204 | } else { 205 | let list = PyList::new(py, &result)?; 206 | Ok(list.into_any()) 207 | } 208 | } 209 | 210 | fn decode_dict<'py>(&mut self, py: Python<'py>) -> PyResult> { 211 | let dict = PyDict::new(py); 212 | let mut last_key: Option> = None; 213 | 214 | while self.position < self.data.len() && self.data[self.position] != b'e' { 215 | // Keys should be strings only 216 | if self.data[self.position] < b'0' || self.data[self.position] > b'9' { 217 | return Err(PyValueError::new_err("key was not a simple string")); 218 | } 219 | 220 | // Decode key as bytes 221 | let key_obj = self.decode_bytes(py)?; 222 | 223 | // Get bytes representation for comparison 224 | let key_bytes = if let Some(encoding) = &self.bytestring_encoding { 225 | if encoding == "utf-8" { 226 | let key_str = key_obj.extract::<&str>()?; 227 | key_str.as_bytes().to_vec() 228 | } else { 229 | let key_bytes = key_obj.extract::>()?; 230 | key_bytes.as_bytes().to_vec() 231 | } 232 | } else { 233 | let key_bytes = key_obj.extract::>()?; 234 | key_bytes.as_bytes().to_vec() 235 | }; 236 | 237 | // Check key ordering 238 | if let Some(ref last) = last_key { 239 | if last >= &key_bytes { 240 | return Err(PyValueError::new_err("dict keys disordered")); 241 | } 242 | } 243 | 244 | last_key = Some(key_bytes); 245 | 246 | // Decode value 247 | let value = self.decode_object(py)?; 248 | 249 | // Insert into dictionary 250 | dict.set_item(key_obj, value)?; 251 | } 252 | 253 | if self.position >= self.data.len() { 254 | return Err(PyValueError::new_err("malformed dict")); 255 | } 256 | 257 | // Skip the 'e' 258 | self.position += 1; 259 | 260 | Ok(dict) 261 | } 262 | } 263 | 264 | #[pyclass] 265 | struct Encoder { 266 | buffer: Vec, 267 | bytestring_encoding: Option, 268 | } 269 | 270 | #[pymethods] 271 | impl Encoder { 272 | #[new] 273 | fn new(_maxsize: Option, bytestring_encoding: Option) -> Self { 274 | Encoder { 275 | buffer: Vec::new(), 276 | bytestring_encoding, 277 | } 278 | } 279 | 280 | fn to_bytes<'py>(&self, py: Python<'py>) -> Bound<'py, PyBytes> { 281 | PyBytes::new(py, &self.buffer) 282 | } 283 | 284 | fn process(&mut self, py: Python, x: Bound) -> PyResult<()> { 285 | if let Ok(s) = x.extract::>() { 286 | self.encode_bytes(s)?; 287 | } else if let Ok(n) = x.extract::() { 288 | self.encode_int(n)?; 289 | } else if let Ok(n) = x.extract::>() { 290 | self.encode_long(n)?; 291 | } else if x.is_instance_of::() { 292 | self.encode_list(py, x)?; 293 | } else if x.is_instance_of::() { 294 | self.encode_list(py, x)?; 295 | } else if let Ok(d) = x.extract::>() { 296 | self.encode_dict(py, d)?; 297 | } else if let Ok(b) = x.extract::() { 298 | self.encode_int(if b { 1 } else { 0 })?; 299 | } else if let Ok(obj) = x.extract::>() { 300 | self.append_bytes(obj.as_bytes(py)?)?; 301 | } else if let Ok(s) = x.extract::<&str>() { 302 | self.encode_string(s)?; 303 | } else { 304 | return Err(PyTypeError::new_err(format!("unsupported type: {:?}", x))); 305 | } 306 | Ok(()) 307 | } 308 | 309 | fn encode_int(&mut self, x: i64) -> PyResult<()> { 310 | let s = format!("i{}e", x); 311 | self.buffer.extend(s.as_bytes()); 312 | Ok(()) 313 | } 314 | 315 | fn encode_long(&mut self, x: Bound) -> PyResult<()> { 316 | let s = format!("i{}e", x.str()?); 317 | self.buffer.extend(s.as_bytes()); 318 | Ok(()) 319 | } 320 | 321 | fn append_bytes(&mut self, bytes: &[u8]) -> PyResult<()> { 322 | self.buffer.extend(bytes); 323 | Ok(()) 324 | } 325 | 326 | fn encode_bytes(&mut self, bytes: Bound) -> PyResult<()> { 327 | let len_str = format!("{}:", bytes.len()?); 328 | self.buffer.extend(len_str.as_bytes()); 329 | self.buffer.extend(bytes.as_bytes()); 330 | Ok(()) 331 | } 332 | 333 | fn encode_string(&mut self, x: &str) -> PyResult<()> { 334 | if let Some(encoding) = &self.bytestring_encoding { 335 | if encoding == "utf-8" { 336 | let len_str = format!("{}:", x.len()); 337 | self.buffer.extend(len_str.as_bytes()); 338 | self.buffer.extend(x.as_bytes()); 339 | Ok(()) 340 | } else { 341 | Err(PyTypeError::new_err( 342 | "Only utf-8 encoding is supported for string encoding", 343 | )) 344 | } 345 | } else { 346 | Err(PyTypeError::new_err( 347 | "string found but no encoding specified. Use bencode_utf8 rather bencode?", 348 | )) 349 | } 350 | } 351 | 352 | fn encode_list(&mut self, py: Python, sequence: Bound) -> PyResult<()> { 353 | self.buffer.push(b'l'); 354 | 355 | for item in sequence.try_iter()? { 356 | self.process(py, item?.into())?; 357 | } 358 | 359 | self.buffer.push(b'e'); 360 | Ok(()) 361 | } 362 | 363 | fn encode_dict(&mut self, py: Python, dict: Bound) -> PyResult<()> { 364 | self.buffer.push(b'd'); 365 | 366 | // Get all keys and sort them 367 | let mut keys: Vec> = dict 368 | .keys() 369 | .iter() 370 | .map(|key| key.extract::>()) 371 | .collect::>>()?; 372 | keys.sort_by(|a, b| { 373 | let a_str = a.extract::<&[u8]>().unwrap(); 374 | let b_str = b.extract::<&[u8]>().unwrap(); 375 | a_str.cmp(b_str) 376 | }); 377 | 378 | for key in keys { 379 | if let Ok(bytes) = key.extract::>() { 380 | self.encode_bytes(bytes)?; 381 | } else { 382 | return Err(PyTypeError::new_err("key in dict should be string")); 383 | } 384 | 385 | if let Some(value) = dict.get_item(key)? { 386 | self.process(py, value.into())?; 387 | } 388 | } 389 | 390 | self.buffer.push(b'e'); 391 | Ok(()) 392 | } 393 | } 394 | 395 | #[pyfunction] 396 | fn bdecode<'py>(py: Python<'py>, s: &Bound) -> PyResult> { 397 | let mut decoder = Decoder::new(s, None, None); 398 | decoder.decode(py) 399 | } 400 | 401 | #[pyfunction] 402 | fn bdecode_as_tuple<'py>(py: Python<'py>, s: &Bound) -> PyResult> { 403 | let mut decoder = Decoder::new(s, Some(true), None); 404 | decoder.decode(py) 405 | } 406 | 407 | #[pyfunction] 408 | fn bdecode_utf8<'py>(py: Python<'py>, s: &Bound) -> PyResult> { 409 | let mut decoder = Decoder::new(s, None, Some("utf-8".to_string())); 410 | decoder.decode(py) 411 | } 412 | 413 | #[pyfunction] 414 | fn bencode(py: Python, x: Bound) -> PyResult { 415 | let mut encoder = Encoder::new(None, None); 416 | encoder.process(py, x)?; 417 | Ok(encoder.to_bytes(py).into()) 418 | } 419 | 420 | #[pyfunction] 421 | fn bencode_utf8(py: Python, x: Bound) -> PyResult { 422 | let mut encoder = Encoder::new(None, Some("utf-8".to_string())); 423 | encoder.process(py, x)?; 424 | Ok(encoder.to_bytes(py).into()) 425 | } 426 | 427 | #[pymodule] 428 | fn _bencode_rs(m: &Bound) -> PyResult<()> { 429 | m.add_class::()?; 430 | m.add_class::()?; 431 | m.add_class::()?; 432 | m.add_function(wrap_pyfunction!(bdecode, m)?)?; 433 | m.add_function(wrap_pyfunction!(bdecode_as_tuple, m)?)?; 434 | m.add_function(wrap_pyfunction!(bdecode_utf8, m)?)?; 435 | m.add_function(wrap_pyfunction!(bencode, m)?)?; 436 | m.add_function(wrap_pyfunction!(bencode_utf8, m)?)?; 437 | Ok(()) 438 | } 439 | --------------------------------------------------------------------------------