├── .github └── workflows │ └── build.yaml ├── .gitignore ├── .scalafmt.conf ├── LICENSE ├── README.md ├── bleep.yaml ├── native ├── Cargo.lock ├── Cargo.toml ├── build.rs └── src │ ├── jvm_unwrapper.rs │ ├── lib.rs │ └── main.rs ├── scripts └── src │ └── scala │ └── tokenizers │ ├── package.scala │ └── scripts │ ├── GenJniLibrary.scala │ ├── Javah.scala │ ├── Publish.scala │ └── PublishLocal.scala ├── tests └── src │ └── scala │ └── io │ └── brunk │ └── tokenizers │ └── TokenizerSuite.scala └── tokenizers └── src ├── java └── io │ └── brunk │ └── tokenizers │ ├── LoadNativeTokenizers.java │ └── NativeLoader.java └── scala └── io └── brunk └── tokenizers ├── Encoding.scala ├── NativeCleaner.scala ├── Offset.scala └── Tokenizer.scala /.github/workflows/build.yaml: -------------------------------------------------------------------------------- 1 | name: Build 2 | on: 3 | push: 4 | tags: [ 'v*' ] 5 | branches: [ 'main' ] 6 | pull_request: 7 | branches: [ 'main' ] 8 | 9 | jobs: 10 | build: 11 | name: Compile, test, check formatting 12 | timeout-minutes: 15 13 | runs-on: ubuntu-latest 14 | if: "!contains(github.event.head_commit.message, 'ci skip')" 15 | steps: 16 | - uses: actions/checkout@v3 17 | - uses: bleep-build/bleep-setup-action@0.0.1 18 | - uses: coursier/cache-action@v6 19 | with: 20 | extraFiles: bleep.yaml 21 | 22 | - name: Scalafmt Check 23 | run: bleep fmt --check 24 | 25 | - name: Run tests 26 | run: | 27 | bleep compile 28 | bleep test 29 | 30 | build-native: 31 | name: Build JNI library on ${{ matrix.os }} 32 | runs-on: ${{ matrix.os }} 33 | timeout-minutes: 30 34 | strategy: 35 | fail-fast: false 36 | matrix: 37 | include: 38 | - os: ubuntu-20.04 39 | jni-folder: .bleep/generated-resources/native/tokenizers.scripts.GenJniLibrary 40 | - os: macos-latest 41 | jni-folder: .bleep/generated-resources/native/tokenizers.scripts.GenJniLibrary 42 | - os: windows-latest 43 | jni-folder: .bleep\generated-resources\native\tokenizers.scripts.GenJniLibrary 44 | steps: 45 | - uses: actions/checkout@v3 46 | - uses: bleep-build/bleep-setup-action@0.0.1 47 | - uses: coursier/cache-action@v6 48 | with: 49 | extraFiles: bleep.yaml 50 | - name: Set up cargo cache 51 | uses: actions/cache@v3 52 | continue-on-error: false 53 | with: 54 | path: | 55 | ~/.cargo/bin/ 56 | ~/.cargo/registry/index/ 57 | ~/.cargo/registry/cache/ 58 | ~/.cargo/git/db/ 59 | target/ 60 | key: v1-${{ runner.arch }}-${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 61 | restore-keys: ${{ runner.arch }}-${{ runner.os }}-cargo- 62 | 63 | - name: Build jni library 64 | run: bleep compile native 65 | if: runner.os != 'Windows' 66 | 67 | - name: Build jni library (windows) 68 | run: bleep compile native 69 | shell: cmd 70 | if: runner.os == 'Windows' 71 | 72 | - name: Temporarily save package 73 | uses: actions/upload-artifact@v3 74 | with: 75 | name: tokenizers.scripts.GenJniLibrary 76 | path: ${{ matrix.jni-folder }} 77 | retention-days: 1 78 | 79 | publish: 80 | timeout-minutes: 15 81 | runs-on: ubuntu-latest 82 | needs: [ build, build-native ] 83 | if: "startsWith(github.ref, 'refs/tags/v')" 84 | steps: 85 | - uses: actions/checkout@v3 86 | - uses: bleep-build/bleep-setup-action@0.0.1 87 | - id: get_version 88 | uses: battila7/get-version-action@v2 89 | - name: Download artifacts 90 | uses: actions/download-artifact@v3 91 | with: 92 | path: .bleep/generated-resources/native/ 93 | - name: Display structure of downloaded files 94 | run: find .bleep/generated-resources/native 95 | # next two tasks are optimization to avoid compiling rust code again 96 | - name: load build 97 | run: bleep projects 98 | - name: touch all downloaded files (for newer timestamp) 99 | run: find .bleep/generated-resources/native | xargs touch 100 | - name: Release 101 | run: bleep publish 102 | env: 103 | PGP_SECRET: ${{ secrets.PGP_SECRET }} 104 | SONATYPE_PASSWORD: ${{ secrets.SONATYPE_PASSWORD }} 105 | SONATYPE_USERNAME: ${{ secrets.SONATYPE_USERNAME }} 106 | - name: Upload-to-release 107 | uses: softprops/action-gh-release@v1 108 | with: 109 | name: "${{ steps.get_version.outputs.version-without-v }}" 110 | prerelease: false 111 | generate_release_notes: true 112 | files: | 113 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .bleep/ 2 | .bloop/ 3 | .bsp/ 4 | .metals/ 5 | target/ 6 | *.worksheet.sc 7 | -------------------------------------------------------------------------------- /.scalafmt.conf: -------------------------------------------------------------------------------- 1 | version = "3.6.1" 2 | runner.dialect = scala213source3 3 | 4 | maxColumn = 100 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tokenizers-scala 2 | 3 | [![Maven Central](https://img.shields.io/maven-central/v/io.brunk.tokenizers/tokenizers_3)](https://central.sonatype.com/artifact/io.brunk.tokenizers/tokenizers_3/) 4 | 5 | Scala bindings for the Hugging Face [Tokenizers](https://huggingface.co/docs/tokenizers) library, written in Rust. 6 | 7 | ## Usage 8 | 9 | ```scala 10 | import io.brunk.tokenizers.Tokenizer 11 | 12 | val tokenizer = Tokenizer.fromPretrained("bert-base-cased") 13 | val encoding = tokenizer.encode("Hello, y'all! How are you 😁 ?", addSpecialTokens=true) 14 | println(encoding.length) 15 | // 13 16 | println(encoding.ids) 17 | // ArraySeq(101, 8667, 117, 194, 112, 1155, 106, 1731, 1132, 1128, 100, 136, 102) 18 | println(encoding.tokens) 19 | // ArraySeq([CLS], Hello, ,, y, ', all, !, How, are, you, [UNK], ?, [SEP]) 20 | ``` 21 | 22 | ## Installation 23 | 24 | ### sbt 25 | ```scala 26 | libraryDependencies += "io.brunk.tokenizers" %% "tokenizers" % "" 27 | ``` 28 | 29 | ### Scala CLI 30 | 31 | ```scala 32 | //> using lib "io.brunk.tokenizers::tokenizers:" 33 | ``` 34 | 35 | ### Others 36 | 37 | Copy coordinates from Maven Central for [Scala 2.13](https://central.sonatype.com/artifact/io.brunk.tokenizers/tokenizers_2.13/) or [Scala 3](https://central.sonatype.com/artifact/io.brunk.tokenizers/tokenizers_3/). 38 | 39 | ## Status 40 | 41 | Currently, we can only load and run pre-trained tokenizers. Training is not yet possible. 42 | 43 | 44 | ## How to build the project 45 | 46 | 1. Install [bleep](https://bleep.build/docs/installing/) 47 | 2. Install [Rust and Cargo](https://www.rust-lang.org/learn/get-started) 48 | 3. ```bash 49 | bleep compile 50 | bleep test 51 | ``` 52 | -------------------------------------------------------------------------------- /bleep.yaml: -------------------------------------------------------------------------------- 1 | $schema: https://raw.githubusercontent.com/oyvindberg/bleep/master/schema.json 2 | $version: 0.0.1-M27 3 | jvm: 4 | name: graalvm-java17:22.3.1 5 | projects: 6 | scripts: 7 | dependencies: 8 | - build.bleep::bleep-core:${BLEEP_VERSION} 9 | - build.bleep::bleep-plugin-jni:${BLEEP_VERSION} 10 | - build.bleep::bleep-plugin-ci-release:${BLEEP_VERSION} 11 | - com.lihaoyi::os-lib:0.9.1 12 | extends: 13 | - template-scala-3 14 | - template-scala-common 15 | tests: 16 | dependencies: org.scalameta::munit:0.7.29 17 | dependsOn: tokenizers 18 | extends: template-cross-scala 19 | isTestProject: true 20 | tokenizers: 21 | extends: template-cross-scala 22 | dependsOn: native 23 | native: 24 | sourcegen: 25 | - main: tokenizers.scripts.GenJniLibrary 26 | project: scripts 27 | scripts: 28 | javah: 29 | main: tokenizers.scripts.Javah 30 | project: scripts 31 | tokenizers-publish-local: 32 | main: tokenizers.scripts.PublishLocal 33 | project: scripts 34 | publish: 35 | main: tokenizers.scripts.Publish 36 | project: scripts 37 | templates: 38 | template-cross-scala: 39 | cross: 40 | jvm213: 41 | extends: template-scala-2 42 | jvm3: 43 | extends: template-scala-3 44 | extends: template-scala-common 45 | template-scala-2: 46 | scala: 47 | version: 2.13.10 48 | options: -encoding utf8 -feature -unchecked -Xsource:3 49 | template-scala-3: 50 | scala: 51 | version: 3.2.2 52 | options: -encoding utf8 -feature -unchecked 53 | template-scala-common: 54 | platform: 55 | name: jvm 56 | scala: 57 | strict: true -------------------------------------------------------------------------------- /native/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "adler" 7 | version = "1.0.2" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" 10 | 11 | [[package]] 12 | name = "aes" 13 | version = "0.7.5" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "9e8b47f52ea9bae42228d07ec09eb676433d7c4ed1ebdf0f1d1c29ed446f1ab8" 16 | dependencies = [ 17 | "cfg-if", 18 | "cipher", 19 | "cpufeatures", 20 | "opaque-debug", 21 | ] 22 | 23 | [[package]] 24 | name = "aho-corasick" 25 | version = "0.7.20" 26 | source = "registry+https://github.com/rust-lang/crates.io-index" 27 | checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" 28 | dependencies = [ 29 | "memchr", 30 | ] 31 | 32 | [[package]] 33 | name = "anstream" 34 | version = "0.3.0" 35 | source = "registry+https://github.com/rust-lang/crates.io-index" 36 | checksum = "9e579a7752471abc2a8268df8b20005e3eadd975f585398f17efcfd8d4927371" 37 | dependencies = [ 38 | "anstyle", 39 | "anstyle-parse", 40 | "anstyle-query", 41 | "anstyle-wincon", 42 | "colorchoice", 43 | "is-terminal", 44 | "utf8parse", 45 | ] 46 | 47 | [[package]] 48 | name = "anstyle" 49 | version = "1.0.0" 50 | source = "registry+https://github.com/rust-lang/crates.io-index" 51 | checksum = "41ed9a86bf92ae6580e0a31281f65a1b1d867c0cc68d5346e2ae128dddfa6a7d" 52 | 53 | [[package]] 54 | name = "anstyle-parse" 55 | version = "0.2.0" 56 | source = "registry+https://github.com/rust-lang/crates.io-index" 57 | checksum = "e765fd216e48e067936442276d1d57399e37bce53c264d6fefbe298080cb57ee" 58 | dependencies = [ 59 | "utf8parse", 60 | ] 61 | 62 | [[package]] 63 | name = "anstyle-query" 64 | version = "1.0.0" 65 | source = "registry+https://github.com/rust-lang/crates.io-index" 66 | checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" 67 | dependencies = [ 68 | "windows-sys 0.48.0", 69 | ] 70 | 71 | [[package]] 72 | name = "anstyle-wincon" 73 | version = "1.0.0" 74 | source = "registry+https://github.com/rust-lang/crates.io-index" 75 | checksum = "4bcd8291a340dd8ac70e18878bc4501dd7b4ff970cfa21c207d36ece51ea88fd" 76 | dependencies = [ 77 | "anstyle", 78 | "windows-sys 0.48.0", 79 | ] 80 | 81 | [[package]] 82 | name = "atty" 83 | version = "0.2.14" 84 | source = "registry+https://github.com/rust-lang/crates.io-index" 85 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" 86 | dependencies = [ 87 | "hermit-abi 0.1.19", 88 | "libc", 89 | "winapi", 90 | ] 91 | 92 | [[package]] 93 | name = "autocfg" 94 | version = "1.1.0" 95 | source = "registry+https://github.com/rust-lang/crates.io-index" 96 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 97 | 98 | [[package]] 99 | name = "base64" 100 | version = "0.13.1" 101 | source = "registry+https://github.com/rust-lang/crates.io-index" 102 | checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" 103 | 104 | [[package]] 105 | name = "base64" 106 | version = "0.21.0" 107 | source = "registry+https://github.com/rust-lang/crates.io-index" 108 | checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" 109 | 110 | [[package]] 111 | name = "base64ct" 112 | version = "1.6.0" 113 | source = "registry+https://github.com/rust-lang/crates.io-index" 114 | checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" 115 | 116 | [[package]] 117 | name = "bitflags" 118 | version = "1.3.2" 119 | source = "registry+https://github.com/rust-lang/crates.io-index" 120 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 121 | 122 | [[package]] 123 | name = "block-buffer" 124 | version = "0.10.4" 125 | source = "registry+https://github.com/rust-lang/crates.io-index" 126 | checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" 127 | dependencies = [ 128 | "generic-array", 129 | ] 130 | 131 | [[package]] 132 | name = "bumpalo" 133 | version = "3.12.0" 134 | source = "registry+https://github.com/rust-lang/crates.io-index" 135 | checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" 136 | 137 | [[package]] 138 | name = "byteorder" 139 | version = "1.4.3" 140 | source = "registry+https://github.com/rust-lang/crates.io-index" 141 | checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" 142 | 143 | [[package]] 144 | name = "bytes" 145 | version = "1.4.0" 146 | source = "registry+https://github.com/rust-lang/crates.io-index" 147 | checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" 148 | 149 | [[package]] 150 | name = "bzip2" 151 | version = "0.4.4" 152 | source = "registry+https://github.com/rust-lang/crates.io-index" 153 | checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" 154 | dependencies = [ 155 | "bzip2-sys", 156 | "libc", 157 | ] 158 | 159 | [[package]] 160 | name = "bzip2-sys" 161 | version = "0.1.11+1.0.8" 162 | source = "registry+https://github.com/rust-lang/crates.io-index" 163 | checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" 164 | dependencies = [ 165 | "cc", 166 | "libc", 167 | "pkg-config", 168 | ] 169 | 170 | [[package]] 171 | name = "cached-path" 172 | version = "0.6.1" 173 | source = "registry+https://github.com/rust-lang/crates.io-index" 174 | checksum = "097968e38f1319207f057d0f4d76452e4f4f847a5de61c5215379f297fa034f3" 175 | dependencies = [ 176 | "flate2", 177 | "fs2", 178 | "glob", 179 | "indicatif 0.16.2", 180 | "log", 181 | "rand", 182 | "reqwest", 183 | "serde", 184 | "serde_json", 185 | "sha2", 186 | "tar", 187 | "tempfile", 188 | "thiserror", 189 | "zip", 190 | ] 191 | 192 | [[package]] 193 | name = "cbindgen" 194 | version = "0.24.3" 195 | source = "registry+https://github.com/rust-lang/crates.io-index" 196 | checksum = "a6358dedf60f4d9b8db43ad187391afe959746101346fe51bb978126bec61dfb" 197 | dependencies = [ 198 | "clap 3.2.23", 199 | "heck", 200 | "indexmap", 201 | "log", 202 | "proc-macro2", 203 | "quote", 204 | "serde", 205 | "serde_json", 206 | "syn 1.0.109", 207 | "tempfile", 208 | "toml", 209 | ] 210 | 211 | [[package]] 212 | name = "cc" 213 | version = "1.0.79" 214 | source = "registry+https://github.com/rust-lang/crates.io-index" 215 | checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" 216 | dependencies = [ 217 | "jobserver", 218 | ] 219 | 220 | [[package]] 221 | name = "cesu8" 222 | version = "1.1.0" 223 | source = "registry+https://github.com/rust-lang/crates.io-index" 224 | checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" 225 | 226 | [[package]] 227 | name = "cfg-if" 228 | version = "1.0.0" 229 | source = "registry+https://github.com/rust-lang/crates.io-index" 230 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 231 | 232 | [[package]] 233 | name = "cipher" 234 | version = "0.3.0" 235 | source = "registry+https://github.com/rust-lang/crates.io-index" 236 | checksum = "7ee52072ec15386f770805afd189a01c8841be8696bed250fa2f13c4c0d6dfb7" 237 | dependencies = [ 238 | "generic-array", 239 | ] 240 | 241 | [[package]] 242 | name = "clap" 243 | version = "3.2.23" 244 | source = "registry+https://github.com/rust-lang/crates.io-index" 245 | checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5" 246 | dependencies = [ 247 | "atty", 248 | "bitflags", 249 | "clap_lex 0.2.4", 250 | "indexmap", 251 | "strsim", 252 | "termcolor", 253 | "textwrap", 254 | ] 255 | 256 | [[package]] 257 | name = "clap" 258 | version = "4.2.2" 259 | source = "registry+https://github.com/rust-lang/crates.io-index" 260 | checksum = "9b802d85aaf3a1cdb02b224ba472ebdea62014fccfcb269b95a4d76443b5ee5a" 261 | dependencies = [ 262 | "clap_builder", 263 | "clap_derive", 264 | "once_cell", 265 | ] 266 | 267 | [[package]] 268 | name = "clap_builder" 269 | version = "4.2.2" 270 | source = "registry+https://github.com/rust-lang/crates.io-index" 271 | checksum = "14a1a858f532119338887a4b8e1af9c60de8249cd7bafd68036a489e261e37b6" 272 | dependencies = [ 273 | "anstream", 274 | "anstyle", 275 | "bitflags", 276 | "clap_lex 0.4.1", 277 | "strsim", 278 | ] 279 | 280 | [[package]] 281 | name = "clap_derive" 282 | version = "4.2.0" 283 | source = "registry+https://github.com/rust-lang/crates.io-index" 284 | checksum = "3f9644cd56d6b87dbe899ef8b053e331c0637664e9e21a33dfcdc36093f5c5c4" 285 | dependencies = [ 286 | "heck", 287 | "proc-macro2", 288 | "quote", 289 | "syn 2.0.15", 290 | ] 291 | 292 | [[package]] 293 | name = "clap_lex" 294 | version = "0.2.4" 295 | source = "registry+https://github.com/rust-lang/crates.io-index" 296 | checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" 297 | dependencies = [ 298 | "os_str_bytes", 299 | ] 300 | 301 | [[package]] 302 | name = "clap_lex" 303 | version = "0.4.1" 304 | source = "registry+https://github.com/rust-lang/crates.io-index" 305 | checksum = "8a2dd5a6fe8c6e3502f568a6353e5273bbb15193ad9a89e457b9970798efbea1" 306 | 307 | [[package]] 308 | name = "colorchoice" 309 | version = "1.0.0" 310 | source = "registry+https://github.com/rust-lang/crates.io-index" 311 | checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" 312 | 313 | [[package]] 314 | name = "combine" 315 | version = "4.6.6" 316 | source = "registry+https://github.com/rust-lang/crates.io-index" 317 | checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4" 318 | dependencies = [ 319 | "bytes", 320 | "memchr", 321 | ] 322 | 323 | [[package]] 324 | name = "console" 325 | version = "0.15.5" 326 | source = "registry+https://github.com/rust-lang/crates.io-index" 327 | checksum = "c3d79fbe8970a77e3e34151cc13d3b3e248aa0faaecb9f6091fa07ebefe5ad60" 328 | dependencies = [ 329 | "encode_unicode", 330 | "lazy_static", 331 | "libc", 332 | "unicode-width", 333 | "windows-sys 0.42.0", 334 | ] 335 | 336 | [[package]] 337 | name = "constant_time_eq" 338 | version = "0.1.5" 339 | source = "registry+https://github.com/rust-lang/crates.io-index" 340 | checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" 341 | 342 | [[package]] 343 | name = "core-foundation" 344 | version = "0.9.3" 345 | source = "registry+https://github.com/rust-lang/crates.io-index" 346 | checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" 347 | dependencies = [ 348 | "core-foundation-sys", 349 | "libc", 350 | ] 351 | 352 | [[package]] 353 | name = "core-foundation-sys" 354 | version = "0.8.4" 355 | source = "registry+https://github.com/rust-lang/crates.io-index" 356 | checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" 357 | 358 | [[package]] 359 | name = "cpufeatures" 360 | version = "0.2.6" 361 | source = "registry+https://github.com/rust-lang/crates.io-index" 362 | checksum = "280a9f2d8b3a38871a3c8a46fb80db65e5e5ed97da80c4d08bf27fb63e35e181" 363 | dependencies = [ 364 | "libc", 365 | ] 366 | 367 | [[package]] 368 | name = "crc32fast" 369 | version = "1.3.2" 370 | source = "registry+https://github.com/rust-lang/crates.io-index" 371 | checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" 372 | dependencies = [ 373 | "cfg-if", 374 | ] 375 | 376 | [[package]] 377 | name = "crossbeam-channel" 378 | version = "0.5.8" 379 | source = "registry+https://github.com/rust-lang/crates.io-index" 380 | checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" 381 | dependencies = [ 382 | "cfg-if", 383 | "crossbeam-utils", 384 | ] 385 | 386 | [[package]] 387 | name = "crossbeam-deque" 388 | version = "0.8.3" 389 | source = "registry+https://github.com/rust-lang/crates.io-index" 390 | checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" 391 | dependencies = [ 392 | "cfg-if", 393 | "crossbeam-epoch", 394 | "crossbeam-utils", 395 | ] 396 | 397 | [[package]] 398 | name = "crossbeam-epoch" 399 | version = "0.9.14" 400 | source = "registry+https://github.com/rust-lang/crates.io-index" 401 | checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695" 402 | dependencies = [ 403 | "autocfg", 404 | "cfg-if", 405 | "crossbeam-utils", 406 | "memoffset", 407 | "scopeguard", 408 | ] 409 | 410 | [[package]] 411 | name = "crossbeam-utils" 412 | version = "0.8.15" 413 | source = "registry+https://github.com/rust-lang/crates.io-index" 414 | checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b" 415 | dependencies = [ 416 | "cfg-if", 417 | ] 418 | 419 | [[package]] 420 | name = "crypto-common" 421 | version = "0.1.6" 422 | source = "registry+https://github.com/rust-lang/crates.io-index" 423 | checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" 424 | dependencies = [ 425 | "generic-array", 426 | "typenum", 427 | ] 428 | 429 | [[package]] 430 | name = "darling" 431 | version = "0.14.4" 432 | source = "registry+https://github.com/rust-lang/crates.io-index" 433 | checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" 434 | dependencies = [ 435 | "darling_core", 436 | "darling_macro", 437 | ] 438 | 439 | [[package]] 440 | name = "darling_core" 441 | version = "0.14.4" 442 | source = "registry+https://github.com/rust-lang/crates.io-index" 443 | checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" 444 | dependencies = [ 445 | "fnv", 446 | "ident_case", 447 | "proc-macro2", 448 | "quote", 449 | "strsim", 450 | "syn 1.0.109", 451 | ] 452 | 453 | [[package]] 454 | name = "darling_macro" 455 | version = "0.14.4" 456 | source = "registry+https://github.com/rust-lang/crates.io-index" 457 | checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" 458 | dependencies = [ 459 | "darling_core", 460 | "quote", 461 | "syn 1.0.109", 462 | ] 463 | 464 | [[package]] 465 | name = "derive_builder" 466 | version = "0.12.0" 467 | source = "registry+https://github.com/rust-lang/crates.io-index" 468 | checksum = "8d67778784b508018359cbc8696edb3db78160bab2c2a28ba7f56ef6932997f8" 469 | dependencies = [ 470 | "derive_builder_macro", 471 | ] 472 | 473 | [[package]] 474 | name = "derive_builder_core" 475 | version = "0.12.0" 476 | source = "registry+https://github.com/rust-lang/crates.io-index" 477 | checksum = "c11bdc11a0c47bc7d37d582b5285da6849c96681023680b906673c5707af7b0f" 478 | dependencies = [ 479 | "darling", 480 | "proc-macro2", 481 | "quote", 482 | "syn 1.0.109", 483 | ] 484 | 485 | [[package]] 486 | name = "derive_builder_macro" 487 | version = "0.12.0" 488 | source = "registry+https://github.com/rust-lang/crates.io-index" 489 | checksum = "ebcda35c7a396850a55ffeac740804b40ffec779b98fffbb1738f4033f0ee79e" 490 | dependencies = [ 491 | "derive_builder_core", 492 | "syn 1.0.109", 493 | ] 494 | 495 | [[package]] 496 | name = "digest" 497 | version = "0.10.6" 498 | source = "registry+https://github.com/rust-lang/crates.io-index" 499 | checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f" 500 | dependencies = [ 501 | "block-buffer", 502 | "crypto-common", 503 | "subtle", 504 | ] 505 | 506 | [[package]] 507 | name = "dirs" 508 | version = "4.0.0" 509 | source = "registry+https://github.com/rust-lang/crates.io-index" 510 | checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" 511 | dependencies = [ 512 | "dirs-sys", 513 | ] 514 | 515 | [[package]] 516 | name = "dirs-sys" 517 | version = "0.3.7" 518 | source = "registry+https://github.com/rust-lang/crates.io-index" 519 | checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" 520 | dependencies = [ 521 | "libc", 522 | "redox_users", 523 | "winapi", 524 | ] 525 | 526 | [[package]] 527 | name = "either" 528 | version = "1.8.1" 529 | source = "registry+https://github.com/rust-lang/crates.io-index" 530 | checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" 531 | 532 | [[package]] 533 | name = "encode_unicode" 534 | version = "0.3.6" 535 | source = "registry+https://github.com/rust-lang/crates.io-index" 536 | checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" 537 | 538 | [[package]] 539 | name = "encoding_rs" 540 | version = "0.8.32" 541 | source = "registry+https://github.com/rust-lang/crates.io-index" 542 | checksum = "071a31f4ee85403370b58aca746f01041ede6f0da2730960ad001edc2b71b394" 543 | dependencies = [ 544 | "cfg-if", 545 | ] 546 | 547 | [[package]] 548 | name = "errno" 549 | version = "0.3.1" 550 | source = "registry+https://github.com/rust-lang/crates.io-index" 551 | checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" 552 | dependencies = [ 553 | "errno-dragonfly", 554 | "libc", 555 | "windows-sys 0.48.0", 556 | ] 557 | 558 | [[package]] 559 | name = "errno-dragonfly" 560 | version = "0.1.2" 561 | source = "registry+https://github.com/rust-lang/crates.io-index" 562 | checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" 563 | dependencies = [ 564 | "cc", 565 | "libc", 566 | ] 567 | 568 | [[package]] 569 | name = "esaxx-rs" 570 | version = "0.1.8" 571 | source = "registry+https://github.com/rust-lang/crates.io-index" 572 | checksum = "1f748b253ceca9fed5f42f8b5ceb3851e93102199bc25b64b65369f76e5c0a35" 573 | dependencies = [ 574 | "cc", 575 | ] 576 | 577 | [[package]] 578 | name = "fastrand" 579 | version = "1.9.0" 580 | source = "registry+https://github.com/rust-lang/crates.io-index" 581 | checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" 582 | dependencies = [ 583 | "instant", 584 | ] 585 | 586 | [[package]] 587 | name = "filetime" 588 | version = "0.2.21" 589 | source = "registry+https://github.com/rust-lang/crates.io-index" 590 | checksum = "5cbc844cecaee9d4443931972e1289c8ff485cb4cc2767cb03ca139ed6885153" 591 | dependencies = [ 592 | "cfg-if", 593 | "libc", 594 | "redox_syscall 0.2.16", 595 | "windows-sys 0.48.0", 596 | ] 597 | 598 | [[package]] 599 | name = "flate2" 600 | version = "1.0.25" 601 | source = "registry+https://github.com/rust-lang/crates.io-index" 602 | checksum = "a8a2db397cb1c8772f31494cb8917e48cd1e64f0fa7efac59fbd741a0a8ce841" 603 | dependencies = [ 604 | "crc32fast", 605 | "miniz_oxide", 606 | ] 607 | 608 | [[package]] 609 | name = "fnv" 610 | version = "1.0.7" 611 | source = "registry+https://github.com/rust-lang/crates.io-index" 612 | checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" 613 | 614 | [[package]] 615 | name = "foreign-types" 616 | version = "0.3.2" 617 | source = "registry+https://github.com/rust-lang/crates.io-index" 618 | checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" 619 | dependencies = [ 620 | "foreign-types-shared", 621 | ] 622 | 623 | [[package]] 624 | name = "foreign-types-shared" 625 | version = "0.1.1" 626 | source = "registry+https://github.com/rust-lang/crates.io-index" 627 | checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" 628 | 629 | [[package]] 630 | name = "form_urlencoded" 631 | version = "1.1.0" 632 | source = "registry+https://github.com/rust-lang/crates.io-index" 633 | checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8" 634 | dependencies = [ 635 | "percent-encoding", 636 | ] 637 | 638 | [[package]] 639 | name = "fs2" 640 | version = "0.4.3" 641 | source = "registry+https://github.com/rust-lang/crates.io-index" 642 | checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" 643 | dependencies = [ 644 | "libc", 645 | "winapi", 646 | ] 647 | 648 | [[package]] 649 | name = "futures-channel" 650 | version = "0.3.28" 651 | source = "registry+https://github.com/rust-lang/crates.io-index" 652 | checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" 653 | dependencies = [ 654 | "futures-core", 655 | ] 656 | 657 | [[package]] 658 | name = "futures-core" 659 | version = "0.3.28" 660 | source = "registry+https://github.com/rust-lang/crates.io-index" 661 | checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" 662 | 663 | [[package]] 664 | name = "futures-io" 665 | version = "0.3.28" 666 | source = "registry+https://github.com/rust-lang/crates.io-index" 667 | checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" 668 | 669 | [[package]] 670 | name = "futures-sink" 671 | version = "0.3.28" 672 | source = "registry+https://github.com/rust-lang/crates.io-index" 673 | checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" 674 | 675 | [[package]] 676 | name = "futures-task" 677 | version = "0.3.28" 678 | source = "registry+https://github.com/rust-lang/crates.io-index" 679 | checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" 680 | 681 | [[package]] 682 | name = "futures-util" 683 | version = "0.3.28" 684 | source = "registry+https://github.com/rust-lang/crates.io-index" 685 | checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" 686 | dependencies = [ 687 | "futures-core", 688 | "futures-io", 689 | "futures-task", 690 | "memchr", 691 | "pin-project-lite", 692 | "pin-utils", 693 | "slab", 694 | ] 695 | 696 | [[package]] 697 | name = "generic-array" 698 | version = "0.14.7" 699 | source = "registry+https://github.com/rust-lang/crates.io-index" 700 | checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" 701 | dependencies = [ 702 | "typenum", 703 | "version_check", 704 | ] 705 | 706 | [[package]] 707 | name = "getrandom" 708 | version = "0.2.9" 709 | source = "registry+https://github.com/rust-lang/crates.io-index" 710 | checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" 711 | dependencies = [ 712 | "cfg-if", 713 | "libc", 714 | "wasi", 715 | ] 716 | 717 | [[package]] 718 | name = "glob" 719 | version = "0.3.1" 720 | source = "registry+https://github.com/rust-lang/crates.io-index" 721 | checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" 722 | 723 | [[package]] 724 | name = "h2" 725 | version = "0.3.18" 726 | source = "registry+https://github.com/rust-lang/crates.io-index" 727 | checksum = "17f8a914c2987b688368b5138aa05321db91f4090cf26118185672ad588bce21" 728 | dependencies = [ 729 | "bytes", 730 | "fnv", 731 | "futures-core", 732 | "futures-sink", 733 | "futures-util", 734 | "http", 735 | "indexmap", 736 | "slab", 737 | "tokio", 738 | "tokio-util", 739 | "tracing", 740 | ] 741 | 742 | [[package]] 743 | name = "hashbrown" 744 | version = "0.12.3" 745 | source = "registry+https://github.com/rust-lang/crates.io-index" 746 | checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" 747 | 748 | [[package]] 749 | name = "heck" 750 | version = "0.4.1" 751 | source = "registry+https://github.com/rust-lang/crates.io-index" 752 | checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" 753 | 754 | [[package]] 755 | name = "hermit-abi" 756 | version = "0.1.19" 757 | source = "registry+https://github.com/rust-lang/crates.io-index" 758 | checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" 759 | dependencies = [ 760 | "libc", 761 | ] 762 | 763 | [[package]] 764 | name = "hermit-abi" 765 | version = "0.2.6" 766 | source = "registry+https://github.com/rust-lang/crates.io-index" 767 | checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" 768 | dependencies = [ 769 | "libc", 770 | ] 771 | 772 | [[package]] 773 | name = "hermit-abi" 774 | version = "0.3.1" 775 | source = "registry+https://github.com/rust-lang/crates.io-index" 776 | checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" 777 | 778 | [[package]] 779 | name = "hmac" 780 | version = "0.12.1" 781 | source = "registry+https://github.com/rust-lang/crates.io-index" 782 | checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" 783 | dependencies = [ 784 | "digest", 785 | ] 786 | 787 | [[package]] 788 | name = "http" 789 | version = "0.2.9" 790 | source = "registry+https://github.com/rust-lang/crates.io-index" 791 | checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" 792 | dependencies = [ 793 | "bytes", 794 | "fnv", 795 | "itoa", 796 | ] 797 | 798 | [[package]] 799 | name = "http-body" 800 | version = "0.4.5" 801 | source = "registry+https://github.com/rust-lang/crates.io-index" 802 | checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" 803 | dependencies = [ 804 | "bytes", 805 | "http", 806 | "pin-project-lite", 807 | ] 808 | 809 | [[package]] 810 | name = "httparse" 811 | version = "1.8.0" 812 | source = "registry+https://github.com/rust-lang/crates.io-index" 813 | checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" 814 | 815 | [[package]] 816 | name = "httpdate" 817 | version = "1.0.2" 818 | source = "registry+https://github.com/rust-lang/crates.io-index" 819 | checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" 820 | 821 | [[package]] 822 | name = "hyper" 823 | version = "0.14.26" 824 | source = "registry+https://github.com/rust-lang/crates.io-index" 825 | checksum = "ab302d72a6f11a3b910431ff93aae7e773078c769f0a3ef15fb9ec692ed147d4" 826 | dependencies = [ 827 | "bytes", 828 | "futures-channel", 829 | "futures-core", 830 | "futures-util", 831 | "h2", 832 | "http", 833 | "http-body", 834 | "httparse", 835 | "httpdate", 836 | "itoa", 837 | "pin-project-lite", 838 | "socket2", 839 | "tokio", 840 | "tower-service", 841 | "tracing", 842 | "want", 843 | ] 844 | 845 | [[package]] 846 | name = "hyper-tls" 847 | version = "0.5.0" 848 | source = "registry+https://github.com/rust-lang/crates.io-index" 849 | checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" 850 | dependencies = [ 851 | "bytes", 852 | "hyper", 853 | "native-tls", 854 | "tokio", 855 | "tokio-native-tls", 856 | ] 857 | 858 | [[package]] 859 | name = "ident_case" 860 | version = "1.0.1" 861 | source = "registry+https://github.com/rust-lang/crates.io-index" 862 | checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" 863 | 864 | [[package]] 865 | name = "idna" 866 | version = "0.3.0" 867 | source = "registry+https://github.com/rust-lang/crates.io-index" 868 | checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6" 869 | dependencies = [ 870 | "unicode-bidi", 871 | "unicode-normalization", 872 | ] 873 | 874 | [[package]] 875 | name = "indexmap" 876 | version = "1.9.3" 877 | source = "registry+https://github.com/rust-lang/crates.io-index" 878 | checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" 879 | dependencies = [ 880 | "autocfg", 881 | "hashbrown", 882 | ] 883 | 884 | [[package]] 885 | name = "indicatif" 886 | version = "0.15.0" 887 | source = "registry+https://github.com/rust-lang/crates.io-index" 888 | checksum = "7baab56125e25686df467fe470785512329883aab42696d661247aca2a2896e4" 889 | dependencies = [ 890 | "console", 891 | "lazy_static", 892 | "number_prefix 0.3.0", 893 | "regex", 894 | ] 895 | 896 | [[package]] 897 | name = "indicatif" 898 | version = "0.16.2" 899 | source = "registry+https://github.com/rust-lang/crates.io-index" 900 | checksum = "2d207dc617c7a380ab07ff572a6e52fa202a2a8f355860ac9c38e23f8196be1b" 901 | dependencies = [ 902 | "console", 903 | "lazy_static", 904 | "number_prefix 0.4.0", 905 | "regex", 906 | ] 907 | 908 | [[package]] 909 | name = "instant" 910 | version = "0.1.12" 911 | source = "registry+https://github.com/rust-lang/crates.io-index" 912 | checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" 913 | dependencies = [ 914 | "cfg-if", 915 | ] 916 | 917 | [[package]] 918 | name = "io-lifetimes" 919 | version = "1.0.10" 920 | source = "registry+https://github.com/rust-lang/crates.io-index" 921 | checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220" 922 | dependencies = [ 923 | "hermit-abi 0.3.1", 924 | "libc", 925 | "windows-sys 0.48.0", 926 | ] 927 | 928 | [[package]] 929 | name = "ipnet" 930 | version = "2.7.2" 931 | source = "registry+https://github.com/rust-lang/crates.io-index" 932 | checksum = "12b6ee2129af8d4fb011108c73d99a1b83a85977f23b82460c0ae2e25bb4b57f" 933 | 934 | [[package]] 935 | name = "is-terminal" 936 | version = "0.4.7" 937 | source = "registry+https://github.com/rust-lang/crates.io-index" 938 | checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" 939 | dependencies = [ 940 | "hermit-abi 0.3.1", 941 | "io-lifetimes", 942 | "rustix", 943 | "windows-sys 0.48.0", 944 | ] 945 | 946 | [[package]] 947 | name = "itertools" 948 | version = "0.8.2" 949 | source = "registry+https://github.com/rust-lang/crates.io-index" 950 | checksum = "f56a2d0bc861f9165be4eb3442afd3c236d8a98afd426f65d92324ae1091a484" 951 | dependencies = [ 952 | "either", 953 | ] 954 | 955 | [[package]] 956 | name = "itertools" 957 | version = "0.9.0" 958 | source = "registry+https://github.com/rust-lang/crates.io-index" 959 | checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" 960 | dependencies = [ 961 | "either", 962 | ] 963 | 964 | [[package]] 965 | name = "itoa" 966 | version = "1.0.6" 967 | source = "registry+https://github.com/rust-lang/crates.io-index" 968 | checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" 969 | 970 | [[package]] 971 | name = "jni" 972 | version = "0.21.1" 973 | source = "registry+https://github.com/rust-lang/crates.io-index" 974 | checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97" 975 | dependencies = [ 976 | "cesu8", 977 | "cfg-if", 978 | "combine", 979 | "jni-sys", 980 | "log", 981 | "thiserror", 982 | "walkdir", 983 | "windows-sys 0.45.0", 984 | ] 985 | 986 | [[package]] 987 | name = "jni-sys" 988 | version = "0.3.0" 989 | source = "registry+https://github.com/rust-lang/crates.io-index" 990 | checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" 991 | 992 | [[package]] 993 | name = "jobserver" 994 | version = "0.1.26" 995 | source = "registry+https://github.com/rust-lang/crates.io-index" 996 | checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2" 997 | dependencies = [ 998 | "libc", 999 | ] 1000 | 1001 | [[package]] 1002 | name = "js-sys" 1003 | version = "0.3.61" 1004 | source = "registry+https://github.com/rust-lang/crates.io-index" 1005 | checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" 1006 | dependencies = [ 1007 | "wasm-bindgen", 1008 | ] 1009 | 1010 | [[package]] 1011 | name = "lazy_static" 1012 | version = "1.4.0" 1013 | source = "registry+https://github.com/rust-lang/crates.io-index" 1014 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 1015 | 1016 | [[package]] 1017 | name = "libc" 1018 | version = "0.2.141" 1019 | source = "registry+https://github.com/rust-lang/crates.io-index" 1020 | checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5" 1021 | 1022 | [[package]] 1023 | name = "linux-raw-sys" 1024 | version = "0.3.2" 1025 | source = "registry+https://github.com/rust-lang/crates.io-index" 1026 | checksum = "3f508063cc7bb32987c71511216bd5a32be15bccb6a80b52df8b9d7f01fc3aa2" 1027 | 1028 | [[package]] 1029 | name = "log" 1030 | version = "0.4.17" 1031 | source = "registry+https://github.com/rust-lang/crates.io-index" 1032 | checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" 1033 | dependencies = [ 1034 | "cfg-if", 1035 | ] 1036 | 1037 | [[package]] 1038 | name = "macro_rules_attribute" 1039 | version = "0.1.3" 1040 | source = "registry+https://github.com/rust-lang/crates.io-index" 1041 | checksum = "cf0c9b980bf4f3a37fd7b1c066941dd1b1d0152ce6ee6e8fe8c49b9f6810d862" 1042 | dependencies = [ 1043 | "macro_rules_attribute-proc_macro", 1044 | "paste", 1045 | ] 1046 | 1047 | [[package]] 1048 | name = "macro_rules_attribute-proc_macro" 1049 | version = "0.1.3" 1050 | source = "registry+https://github.com/rust-lang/crates.io-index" 1051 | checksum = "58093314a45e00c77d5c508f76e77c3396afbbc0d01506e7fae47b018bac2b1d" 1052 | 1053 | [[package]] 1054 | name = "memchr" 1055 | version = "2.5.0" 1056 | source = "registry+https://github.com/rust-lang/crates.io-index" 1057 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" 1058 | 1059 | [[package]] 1060 | name = "memoffset" 1061 | version = "0.8.0" 1062 | source = "registry+https://github.com/rust-lang/crates.io-index" 1063 | checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1" 1064 | dependencies = [ 1065 | "autocfg", 1066 | ] 1067 | 1068 | [[package]] 1069 | name = "mime" 1070 | version = "0.3.17" 1071 | source = "registry+https://github.com/rust-lang/crates.io-index" 1072 | checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" 1073 | 1074 | [[package]] 1075 | name = "minimal-lexical" 1076 | version = "0.2.1" 1077 | source = "registry+https://github.com/rust-lang/crates.io-index" 1078 | checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" 1079 | 1080 | [[package]] 1081 | name = "miniz_oxide" 1082 | version = "0.6.2" 1083 | source = "registry+https://github.com/rust-lang/crates.io-index" 1084 | checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa" 1085 | dependencies = [ 1086 | "adler", 1087 | ] 1088 | 1089 | [[package]] 1090 | name = "mio" 1091 | version = "0.8.6" 1092 | source = "registry+https://github.com/rust-lang/crates.io-index" 1093 | checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9" 1094 | dependencies = [ 1095 | "libc", 1096 | "log", 1097 | "wasi", 1098 | "windows-sys 0.45.0", 1099 | ] 1100 | 1101 | [[package]] 1102 | name = "monostate" 1103 | version = "0.1.6" 1104 | source = "registry+https://github.com/rust-lang/crates.io-index" 1105 | checksum = "0230b703f1ac35df1e24f6d0d2255472bcccaf657ecdfa4f1fcbcad1ad5bb98a" 1106 | dependencies = [ 1107 | "monostate-impl", 1108 | "serde", 1109 | ] 1110 | 1111 | [[package]] 1112 | name = "monostate-impl" 1113 | version = "0.1.6" 1114 | source = "registry+https://github.com/rust-lang/crates.io-index" 1115 | checksum = "8795add3e14028f11f8e848bd3294898a8294767b3776b6f733560d33bd2530b" 1116 | dependencies = [ 1117 | "proc-macro2", 1118 | "quote", 1119 | "syn 2.0.15", 1120 | ] 1121 | 1122 | [[package]] 1123 | name = "native-tls" 1124 | version = "0.2.11" 1125 | source = "registry+https://github.com/rust-lang/crates.io-index" 1126 | checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e" 1127 | dependencies = [ 1128 | "lazy_static", 1129 | "libc", 1130 | "log", 1131 | "openssl", 1132 | "openssl-probe", 1133 | "openssl-sys", 1134 | "schannel", 1135 | "security-framework", 1136 | "security-framework-sys", 1137 | "tempfile", 1138 | ] 1139 | 1140 | [[package]] 1141 | name = "nom" 1142 | version = "7.1.3" 1143 | source = "registry+https://github.com/rust-lang/crates.io-index" 1144 | checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" 1145 | dependencies = [ 1146 | "memchr", 1147 | "minimal-lexical", 1148 | ] 1149 | 1150 | [[package]] 1151 | name = "num_cpus" 1152 | version = "1.15.0" 1153 | source = "registry+https://github.com/rust-lang/crates.io-index" 1154 | checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" 1155 | dependencies = [ 1156 | "hermit-abi 0.2.6", 1157 | "libc", 1158 | ] 1159 | 1160 | [[package]] 1161 | name = "number_prefix" 1162 | version = "0.3.0" 1163 | source = "registry+https://github.com/rust-lang/crates.io-index" 1164 | checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a" 1165 | 1166 | [[package]] 1167 | name = "number_prefix" 1168 | version = "0.4.0" 1169 | source = "registry+https://github.com/rust-lang/crates.io-index" 1170 | checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" 1171 | 1172 | [[package]] 1173 | name = "once_cell" 1174 | version = "1.17.1" 1175 | source = "registry+https://github.com/rust-lang/crates.io-index" 1176 | checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" 1177 | 1178 | [[package]] 1179 | name = "onig" 1180 | version = "6.4.0" 1181 | source = "registry+https://github.com/rust-lang/crates.io-index" 1182 | checksum = "8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f" 1183 | dependencies = [ 1184 | "bitflags", 1185 | "libc", 1186 | "once_cell", 1187 | "onig_sys", 1188 | ] 1189 | 1190 | [[package]] 1191 | name = "onig_sys" 1192 | version = "69.8.1" 1193 | source = "registry+https://github.com/rust-lang/crates.io-index" 1194 | checksum = "7b829e3d7e9cc74c7e315ee8edb185bf4190da5acde74afd7fc59c35b1f086e7" 1195 | dependencies = [ 1196 | "cc", 1197 | "pkg-config", 1198 | ] 1199 | 1200 | [[package]] 1201 | name = "opaque-debug" 1202 | version = "0.3.0" 1203 | source = "registry+https://github.com/rust-lang/crates.io-index" 1204 | checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" 1205 | 1206 | [[package]] 1207 | name = "openssl" 1208 | version = "0.10.50" 1209 | source = "registry+https://github.com/rust-lang/crates.io-index" 1210 | checksum = "7e30d8bc91859781f0a943411186324d580f2bbeb71b452fe91ae344806af3f1" 1211 | dependencies = [ 1212 | "bitflags", 1213 | "cfg-if", 1214 | "foreign-types", 1215 | "libc", 1216 | "once_cell", 1217 | "openssl-macros", 1218 | "openssl-sys", 1219 | ] 1220 | 1221 | [[package]] 1222 | name = "openssl-macros" 1223 | version = "0.1.1" 1224 | source = "registry+https://github.com/rust-lang/crates.io-index" 1225 | checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" 1226 | dependencies = [ 1227 | "proc-macro2", 1228 | "quote", 1229 | "syn 2.0.15", 1230 | ] 1231 | 1232 | [[package]] 1233 | name = "openssl-probe" 1234 | version = "0.1.5" 1235 | source = "registry+https://github.com/rust-lang/crates.io-index" 1236 | checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" 1237 | 1238 | [[package]] 1239 | name = "openssl-sys" 1240 | version = "0.9.85" 1241 | source = "registry+https://github.com/rust-lang/crates.io-index" 1242 | checksum = "0d3d193fb1488ad46ffe3aaabc912cc931d02ee8518fe2959aea8ef52718b0c0" 1243 | dependencies = [ 1244 | "cc", 1245 | "libc", 1246 | "pkg-config", 1247 | "vcpkg", 1248 | ] 1249 | 1250 | [[package]] 1251 | name = "os_str_bytes" 1252 | version = "6.5.0" 1253 | source = "registry+https://github.com/rust-lang/crates.io-index" 1254 | checksum = "ceedf44fb00f2d1984b0bc98102627ce622e083e49a5bacdb3e514fa4238e267" 1255 | 1256 | [[package]] 1257 | name = "password-hash" 1258 | version = "0.4.2" 1259 | source = "registry+https://github.com/rust-lang/crates.io-index" 1260 | checksum = "7676374caaee8a325c9e7a2ae557f216c5563a171d6997b0ef8a65af35147700" 1261 | dependencies = [ 1262 | "base64ct", 1263 | "rand_core", 1264 | "subtle", 1265 | ] 1266 | 1267 | [[package]] 1268 | name = "paste" 1269 | version = "1.0.12" 1270 | source = "registry+https://github.com/rust-lang/crates.io-index" 1271 | checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79" 1272 | 1273 | [[package]] 1274 | name = "pbkdf2" 1275 | version = "0.11.0" 1276 | source = "registry+https://github.com/rust-lang/crates.io-index" 1277 | checksum = "83a0692ec44e4cf1ef28ca317f14f8f07da2d95ec3fa01f86e4467b725e60917" 1278 | dependencies = [ 1279 | "digest", 1280 | "hmac", 1281 | "password-hash", 1282 | "sha2", 1283 | ] 1284 | 1285 | [[package]] 1286 | name = "percent-encoding" 1287 | version = "2.2.0" 1288 | source = "registry+https://github.com/rust-lang/crates.io-index" 1289 | checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" 1290 | 1291 | [[package]] 1292 | name = "pin-project-lite" 1293 | version = "0.2.9" 1294 | source = "registry+https://github.com/rust-lang/crates.io-index" 1295 | checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" 1296 | 1297 | [[package]] 1298 | name = "pin-utils" 1299 | version = "0.1.0" 1300 | source = "registry+https://github.com/rust-lang/crates.io-index" 1301 | checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" 1302 | 1303 | [[package]] 1304 | name = "pkg-config" 1305 | version = "0.3.26" 1306 | source = "registry+https://github.com/rust-lang/crates.io-index" 1307 | checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" 1308 | 1309 | [[package]] 1310 | name = "ppv-lite86" 1311 | version = "0.2.17" 1312 | source = "registry+https://github.com/rust-lang/crates.io-index" 1313 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" 1314 | 1315 | [[package]] 1316 | name = "proc-macro2" 1317 | version = "1.0.56" 1318 | source = "registry+https://github.com/rust-lang/crates.io-index" 1319 | checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" 1320 | dependencies = [ 1321 | "unicode-ident", 1322 | ] 1323 | 1324 | [[package]] 1325 | name = "quote" 1326 | version = "1.0.26" 1327 | source = "registry+https://github.com/rust-lang/crates.io-index" 1328 | checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" 1329 | dependencies = [ 1330 | "proc-macro2", 1331 | ] 1332 | 1333 | [[package]] 1334 | name = "rand" 1335 | version = "0.8.5" 1336 | source = "registry+https://github.com/rust-lang/crates.io-index" 1337 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 1338 | dependencies = [ 1339 | "libc", 1340 | "rand_chacha", 1341 | "rand_core", 1342 | ] 1343 | 1344 | [[package]] 1345 | name = "rand_chacha" 1346 | version = "0.3.1" 1347 | source = "registry+https://github.com/rust-lang/crates.io-index" 1348 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 1349 | dependencies = [ 1350 | "ppv-lite86", 1351 | "rand_core", 1352 | ] 1353 | 1354 | [[package]] 1355 | name = "rand_core" 1356 | version = "0.6.4" 1357 | source = "registry+https://github.com/rust-lang/crates.io-index" 1358 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 1359 | dependencies = [ 1360 | "getrandom", 1361 | ] 1362 | 1363 | [[package]] 1364 | name = "rayon" 1365 | version = "1.7.0" 1366 | source = "registry+https://github.com/rust-lang/crates.io-index" 1367 | checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" 1368 | dependencies = [ 1369 | "either", 1370 | "rayon-core", 1371 | ] 1372 | 1373 | [[package]] 1374 | name = "rayon-cond" 1375 | version = "0.1.0" 1376 | source = "registry+https://github.com/rust-lang/crates.io-index" 1377 | checksum = "fd1259362c9065e5ea39a789ef40b1e3fd934c94beb7b5ab3ac6629d3b5e7cb7" 1378 | dependencies = [ 1379 | "either", 1380 | "itertools 0.8.2", 1381 | "rayon", 1382 | ] 1383 | 1384 | [[package]] 1385 | name = "rayon-core" 1386 | version = "1.11.0" 1387 | source = "registry+https://github.com/rust-lang/crates.io-index" 1388 | checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" 1389 | dependencies = [ 1390 | "crossbeam-channel", 1391 | "crossbeam-deque", 1392 | "crossbeam-utils", 1393 | "num_cpus", 1394 | ] 1395 | 1396 | [[package]] 1397 | name = "redox_syscall" 1398 | version = "0.2.16" 1399 | source = "registry+https://github.com/rust-lang/crates.io-index" 1400 | checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" 1401 | dependencies = [ 1402 | "bitflags", 1403 | ] 1404 | 1405 | [[package]] 1406 | name = "redox_syscall" 1407 | version = "0.3.5" 1408 | source = "registry+https://github.com/rust-lang/crates.io-index" 1409 | checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" 1410 | dependencies = [ 1411 | "bitflags", 1412 | ] 1413 | 1414 | [[package]] 1415 | name = "redox_users" 1416 | version = "0.4.3" 1417 | source = "registry+https://github.com/rust-lang/crates.io-index" 1418 | checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" 1419 | dependencies = [ 1420 | "getrandom", 1421 | "redox_syscall 0.2.16", 1422 | "thiserror", 1423 | ] 1424 | 1425 | [[package]] 1426 | name = "regex" 1427 | version = "1.7.3" 1428 | source = "registry+https://github.com/rust-lang/crates.io-index" 1429 | checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" 1430 | dependencies = [ 1431 | "aho-corasick", 1432 | "memchr", 1433 | "regex-syntax", 1434 | ] 1435 | 1436 | [[package]] 1437 | name = "regex-syntax" 1438 | version = "0.6.29" 1439 | source = "registry+https://github.com/rust-lang/crates.io-index" 1440 | checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" 1441 | 1442 | [[package]] 1443 | name = "reqwest" 1444 | version = "0.11.16" 1445 | source = "registry+https://github.com/rust-lang/crates.io-index" 1446 | checksum = "27b71749df584b7f4cac2c426c127a7c785a5106cc98f7a8feb044115f0fa254" 1447 | dependencies = [ 1448 | "base64 0.21.0", 1449 | "bytes", 1450 | "encoding_rs", 1451 | "futures-core", 1452 | "futures-util", 1453 | "h2", 1454 | "http", 1455 | "http-body", 1456 | "hyper", 1457 | "hyper-tls", 1458 | "ipnet", 1459 | "js-sys", 1460 | "log", 1461 | "mime", 1462 | "native-tls", 1463 | "once_cell", 1464 | "percent-encoding", 1465 | "pin-project-lite", 1466 | "serde", 1467 | "serde_json", 1468 | "serde_urlencoded", 1469 | "tokio", 1470 | "tokio-native-tls", 1471 | "tower-service", 1472 | "url", 1473 | "wasm-bindgen", 1474 | "wasm-bindgen-futures", 1475 | "web-sys", 1476 | "winreg", 1477 | ] 1478 | 1479 | [[package]] 1480 | name = "rustix" 1481 | version = "0.37.12" 1482 | source = "registry+https://github.com/rust-lang/crates.io-index" 1483 | checksum = "722529a737f5a942fdbac3a46cee213053196737c5eaa3386d52e85b786f2659" 1484 | dependencies = [ 1485 | "bitflags", 1486 | "errno", 1487 | "io-lifetimes", 1488 | "libc", 1489 | "linux-raw-sys", 1490 | "windows-sys 0.48.0", 1491 | ] 1492 | 1493 | [[package]] 1494 | name = "ryu" 1495 | version = "1.0.13" 1496 | source = "registry+https://github.com/rust-lang/crates.io-index" 1497 | checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" 1498 | 1499 | [[package]] 1500 | name = "same-file" 1501 | version = "1.0.6" 1502 | source = "registry+https://github.com/rust-lang/crates.io-index" 1503 | checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" 1504 | dependencies = [ 1505 | "winapi-util", 1506 | ] 1507 | 1508 | [[package]] 1509 | name = "schannel" 1510 | version = "0.1.21" 1511 | source = "registry+https://github.com/rust-lang/crates.io-index" 1512 | checksum = "713cfb06c7059f3588fb8044c0fad1d09e3c01d225e25b9220dbfdcf16dbb1b3" 1513 | dependencies = [ 1514 | "windows-sys 0.42.0", 1515 | ] 1516 | 1517 | [[package]] 1518 | name = "scopeguard" 1519 | version = "1.1.0" 1520 | source = "registry+https://github.com/rust-lang/crates.io-index" 1521 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" 1522 | 1523 | [[package]] 1524 | name = "security-framework" 1525 | version = "2.8.2" 1526 | source = "registry+https://github.com/rust-lang/crates.io-index" 1527 | checksum = "a332be01508d814fed64bf28f798a146d73792121129962fdf335bb3c49a4254" 1528 | dependencies = [ 1529 | "bitflags", 1530 | "core-foundation", 1531 | "core-foundation-sys", 1532 | "libc", 1533 | "security-framework-sys", 1534 | ] 1535 | 1536 | [[package]] 1537 | name = "security-framework-sys" 1538 | version = "2.8.0" 1539 | source = "registry+https://github.com/rust-lang/crates.io-index" 1540 | checksum = "31c9bb296072e961fcbd8853511dd39c2d8be2deb1e17c6860b1d30732b323b4" 1541 | dependencies = [ 1542 | "core-foundation-sys", 1543 | "libc", 1544 | ] 1545 | 1546 | [[package]] 1547 | name = "serde" 1548 | version = "1.0.160" 1549 | source = "registry+https://github.com/rust-lang/crates.io-index" 1550 | checksum = "bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c" 1551 | dependencies = [ 1552 | "serde_derive", 1553 | ] 1554 | 1555 | [[package]] 1556 | name = "serde_derive" 1557 | version = "1.0.160" 1558 | source = "registry+https://github.com/rust-lang/crates.io-index" 1559 | checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df" 1560 | dependencies = [ 1561 | "proc-macro2", 1562 | "quote", 1563 | "syn 2.0.15", 1564 | ] 1565 | 1566 | [[package]] 1567 | name = "serde_json" 1568 | version = "1.0.96" 1569 | source = "registry+https://github.com/rust-lang/crates.io-index" 1570 | checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1" 1571 | dependencies = [ 1572 | "itoa", 1573 | "ryu", 1574 | "serde", 1575 | ] 1576 | 1577 | [[package]] 1578 | name = "serde_urlencoded" 1579 | version = "0.7.1" 1580 | source = "registry+https://github.com/rust-lang/crates.io-index" 1581 | checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" 1582 | dependencies = [ 1583 | "form_urlencoded", 1584 | "itoa", 1585 | "ryu", 1586 | "serde", 1587 | ] 1588 | 1589 | [[package]] 1590 | name = "sha1" 1591 | version = "0.10.5" 1592 | source = "registry+https://github.com/rust-lang/crates.io-index" 1593 | checksum = "f04293dc80c3993519f2d7f6f511707ee7094fe0c6d3406feb330cdb3540eba3" 1594 | dependencies = [ 1595 | "cfg-if", 1596 | "cpufeatures", 1597 | "digest", 1598 | ] 1599 | 1600 | [[package]] 1601 | name = "sha2" 1602 | version = "0.10.6" 1603 | source = "registry+https://github.com/rust-lang/crates.io-index" 1604 | checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0" 1605 | dependencies = [ 1606 | "cfg-if", 1607 | "cpufeatures", 1608 | "digest", 1609 | ] 1610 | 1611 | [[package]] 1612 | name = "slab" 1613 | version = "0.4.8" 1614 | source = "registry+https://github.com/rust-lang/crates.io-index" 1615 | checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d" 1616 | dependencies = [ 1617 | "autocfg", 1618 | ] 1619 | 1620 | [[package]] 1621 | name = "smallvec" 1622 | version = "1.10.0" 1623 | source = "registry+https://github.com/rust-lang/crates.io-index" 1624 | checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" 1625 | 1626 | [[package]] 1627 | name = "socket2" 1628 | version = "0.4.9" 1629 | source = "registry+https://github.com/rust-lang/crates.io-index" 1630 | checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" 1631 | dependencies = [ 1632 | "libc", 1633 | "winapi", 1634 | ] 1635 | 1636 | [[package]] 1637 | name = "spm_precompiled" 1638 | version = "0.1.4" 1639 | source = "registry+https://github.com/rust-lang/crates.io-index" 1640 | checksum = "5851699c4033c63636f7ea4cf7b7c1f1bf06d0cc03cfb42e711de5a5c46cf326" 1641 | dependencies = [ 1642 | "base64 0.13.1", 1643 | "nom", 1644 | "serde", 1645 | "unicode-segmentation", 1646 | ] 1647 | 1648 | [[package]] 1649 | name = "strsim" 1650 | version = "0.10.0" 1651 | source = "registry+https://github.com/rust-lang/crates.io-index" 1652 | checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" 1653 | 1654 | [[package]] 1655 | name = "subtle" 1656 | version = "2.4.1" 1657 | source = "registry+https://github.com/rust-lang/crates.io-index" 1658 | checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" 1659 | 1660 | [[package]] 1661 | name = "syn" 1662 | version = "1.0.109" 1663 | source = "registry+https://github.com/rust-lang/crates.io-index" 1664 | checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" 1665 | dependencies = [ 1666 | "proc-macro2", 1667 | "quote", 1668 | "unicode-ident", 1669 | ] 1670 | 1671 | [[package]] 1672 | name = "syn" 1673 | version = "2.0.15" 1674 | source = "registry+https://github.com/rust-lang/crates.io-index" 1675 | checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822" 1676 | dependencies = [ 1677 | "proc-macro2", 1678 | "quote", 1679 | "unicode-ident", 1680 | ] 1681 | 1682 | [[package]] 1683 | name = "tar" 1684 | version = "0.4.38" 1685 | source = "registry+https://github.com/rust-lang/crates.io-index" 1686 | checksum = "4b55807c0344e1e6c04d7c965f5289c39a8d94ae23ed5c0b57aabac549f871c6" 1687 | dependencies = [ 1688 | "filetime", 1689 | "libc", 1690 | "xattr", 1691 | ] 1692 | 1693 | [[package]] 1694 | name = "tempfile" 1695 | version = "3.5.0" 1696 | source = "registry+https://github.com/rust-lang/crates.io-index" 1697 | checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998" 1698 | dependencies = [ 1699 | "cfg-if", 1700 | "fastrand", 1701 | "redox_syscall 0.3.5", 1702 | "rustix", 1703 | "windows-sys 0.45.0", 1704 | ] 1705 | 1706 | [[package]] 1707 | name = "termcolor" 1708 | version = "1.2.0" 1709 | source = "registry+https://github.com/rust-lang/crates.io-index" 1710 | checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" 1711 | dependencies = [ 1712 | "winapi-util", 1713 | ] 1714 | 1715 | [[package]] 1716 | name = "textwrap" 1717 | version = "0.16.0" 1718 | source = "registry+https://github.com/rust-lang/crates.io-index" 1719 | checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" 1720 | 1721 | [[package]] 1722 | name = "thiserror" 1723 | version = "1.0.40" 1724 | source = "registry+https://github.com/rust-lang/crates.io-index" 1725 | checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" 1726 | dependencies = [ 1727 | "thiserror-impl", 1728 | ] 1729 | 1730 | [[package]] 1731 | name = "thiserror-impl" 1732 | version = "1.0.40" 1733 | source = "registry+https://github.com/rust-lang/crates.io-index" 1734 | checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" 1735 | dependencies = [ 1736 | "proc-macro2", 1737 | "quote", 1738 | "syn 2.0.15", 1739 | ] 1740 | 1741 | [[package]] 1742 | name = "time" 1743 | version = "0.3.20" 1744 | source = "registry+https://github.com/rust-lang/crates.io-index" 1745 | checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890" 1746 | dependencies = [ 1747 | "serde", 1748 | "time-core", 1749 | ] 1750 | 1751 | [[package]] 1752 | name = "time-core" 1753 | version = "0.1.0" 1754 | source = "registry+https://github.com/rust-lang/crates.io-index" 1755 | checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" 1756 | 1757 | [[package]] 1758 | name = "tinyvec" 1759 | version = "1.6.0" 1760 | source = "registry+https://github.com/rust-lang/crates.io-index" 1761 | checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" 1762 | dependencies = [ 1763 | "tinyvec_macros", 1764 | ] 1765 | 1766 | [[package]] 1767 | name = "tinyvec_macros" 1768 | version = "0.1.1" 1769 | source = "registry+https://github.com/rust-lang/crates.io-index" 1770 | checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" 1771 | 1772 | [[package]] 1773 | name = "tokenizers" 1774 | version = "0.13.3" 1775 | source = "registry+https://github.com/rust-lang/crates.io-index" 1776 | checksum = "5cf49017523bf0bc01c9966f172c5f120bbb7b96cccd1708772dd42e767fb9f5" 1777 | dependencies = [ 1778 | "aho-corasick", 1779 | "cached-path", 1780 | "clap 4.2.2", 1781 | "derive_builder", 1782 | "dirs", 1783 | "esaxx-rs", 1784 | "getrandom", 1785 | "indicatif 0.15.0", 1786 | "itertools 0.9.0", 1787 | "lazy_static", 1788 | "log", 1789 | "macro_rules_attribute", 1790 | "monostate", 1791 | "onig", 1792 | "paste", 1793 | "rand", 1794 | "rayon", 1795 | "rayon-cond", 1796 | "regex", 1797 | "regex-syntax", 1798 | "reqwest", 1799 | "serde", 1800 | "serde_json", 1801 | "spm_precompiled", 1802 | "thiserror", 1803 | "unicode-normalization-alignments", 1804 | "unicode-segmentation", 1805 | "unicode_categories", 1806 | ] 1807 | 1808 | [[package]] 1809 | name = "tokenizers-scala" 1810 | version = "0.1.0" 1811 | dependencies = [ 1812 | "cbindgen", 1813 | "jni", 1814 | "tokenizers", 1815 | ] 1816 | 1817 | [[package]] 1818 | name = "tokio" 1819 | version = "1.27.0" 1820 | source = "registry+https://github.com/rust-lang/crates.io-index" 1821 | checksum = "d0de47a4eecbe11f498978a9b29d792f0d2692d1dd003650c24c76510e3bc001" 1822 | dependencies = [ 1823 | "autocfg", 1824 | "bytes", 1825 | "libc", 1826 | "mio", 1827 | "num_cpus", 1828 | "pin-project-lite", 1829 | "socket2", 1830 | "windows-sys 0.45.0", 1831 | ] 1832 | 1833 | [[package]] 1834 | name = "tokio-native-tls" 1835 | version = "0.3.1" 1836 | source = "registry+https://github.com/rust-lang/crates.io-index" 1837 | checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" 1838 | dependencies = [ 1839 | "native-tls", 1840 | "tokio", 1841 | ] 1842 | 1843 | [[package]] 1844 | name = "tokio-util" 1845 | version = "0.7.7" 1846 | source = "registry+https://github.com/rust-lang/crates.io-index" 1847 | checksum = "5427d89453009325de0d8f342c9490009f76e999cb7672d77e46267448f7e6b2" 1848 | dependencies = [ 1849 | "bytes", 1850 | "futures-core", 1851 | "futures-sink", 1852 | "pin-project-lite", 1853 | "tokio", 1854 | "tracing", 1855 | ] 1856 | 1857 | [[package]] 1858 | name = "toml" 1859 | version = "0.5.11" 1860 | source = "registry+https://github.com/rust-lang/crates.io-index" 1861 | checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" 1862 | dependencies = [ 1863 | "serde", 1864 | ] 1865 | 1866 | [[package]] 1867 | name = "tower-service" 1868 | version = "0.3.2" 1869 | source = "registry+https://github.com/rust-lang/crates.io-index" 1870 | checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" 1871 | 1872 | [[package]] 1873 | name = "tracing" 1874 | version = "0.1.37" 1875 | source = "registry+https://github.com/rust-lang/crates.io-index" 1876 | checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" 1877 | dependencies = [ 1878 | "cfg-if", 1879 | "pin-project-lite", 1880 | "tracing-core", 1881 | ] 1882 | 1883 | [[package]] 1884 | name = "tracing-core" 1885 | version = "0.1.30" 1886 | source = "registry+https://github.com/rust-lang/crates.io-index" 1887 | checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a" 1888 | dependencies = [ 1889 | "once_cell", 1890 | ] 1891 | 1892 | [[package]] 1893 | name = "try-lock" 1894 | version = "0.2.4" 1895 | source = "registry+https://github.com/rust-lang/crates.io-index" 1896 | checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" 1897 | 1898 | [[package]] 1899 | name = "typenum" 1900 | version = "1.16.0" 1901 | source = "registry+https://github.com/rust-lang/crates.io-index" 1902 | checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" 1903 | 1904 | [[package]] 1905 | name = "unicode-bidi" 1906 | version = "0.3.13" 1907 | source = "registry+https://github.com/rust-lang/crates.io-index" 1908 | checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" 1909 | 1910 | [[package]] 1911 | name = "unicode-ident" 1912 | version = "1.0.8" 1913 | source = "registry+https://github.com/rust-lang/crates.io-index" 1914 | checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" 1915 | 1916 | [[package]] 1917 | name = "unicode-normalization" 1918 | version = "0.1.22" 1919 | source = "registry+https://github.com/rust-lang/crates.io-index" 1920 | checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" 1921 | dependencies = [ 1922 | "tinyvec", 1923 | ] 1924 | 1925 | [[package]] 1926 | name = "unicode-normalization-alignments" 1927 | version = "0.1.12" 1928 | source = "registry+https://github.com/rust-lang/crates.io-index" 1929 | checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de" 1930 | dependencies = [ 1931 | "smallvec", 1932 | ] 1933 | 1934 | [[package]] 1935 | name = "unicode-segmentation" 1936 | version = "1.10.1" 1937 | source = "registry+https://github.com/rust-lang/crates.io-index" 1938 | checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" 1939 | 1940 | [[package]] 1941 | name = "unicode-width" 1942 | version = "0.1.10" 1943 | source = "registry+https://github.com/rust-lang/crates.io-index" 1944 | checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" 1945 | 1946 | [[package]] 1947 | name = "unicode_categories" 1948 | version = "0.1.1" 1949 | source = "registry+https://github.com/rust-lang/crates.io-index" 1950 | checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" 1951 | 1952 | [[package]] 1953 | name = "url" 1954 | version = "2.3.1" 1955 | source = "registry+https://github.com/rust-lang/crates.io-index" 1956 | checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643" 1957 | dependencies = [ 1958 | "form_urlencoded", 1959 | "idna", 1960 | "percent-encoding", 1961 | ] 1962 | 1963 | [[package]] 1964 | name = "utf8parse" 1965 | version = "0.2.1" 1966 | source = "registry+https://github.com/rust-lang/crates.io-index" 1967 | checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" 1968 | 1969 | [[package]] 1970 | name = "vcpkg" 1971 | version = "0.2.15" 1972 | source = "registry+https://github.com/rust-lang/crates.io-index" 1973 | checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" 1974 | 1975 | [[package]] 1976 | name = "version_check" 1977 | version = "0.9.4" 1978 | source = "registry+https://github.com/rust-lang/crates.io-index" 1979 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 1980 | 1981 | [[package]] 1982 | name = "walkdir" 1983 | version = "2.3.3" 1984 | source = "registry+https://github.com/rust-lang/crates.io-index" 1985 | checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" 1986 | dependencies = [ 1987 | "same-file", 1988 | "winapi-util", 1989 | ] 1990 | 1991 | [[package]] 1992 | name = "want" 1993 | version = "0.3.0" 1994 | source = "registry+https://github.com/rust-lang/crates.io-index" 1995 | checksum = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0" 1996 | dependencies = [ 1997 | "log", 1998 | "try-lock", 1999 | ] 2000 | 2001 | [[package]] 2002 | name = "wasi" 2003 | version = "0.11.0+wasi-snapshot-preview1" 2004 | source = "registry+https://github.com/rust-lang/crates.io-index" 2005 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 2006 | 2007 | [[package]] 2008 | name = "wasm-bindgen" 2009 | version = "0.2.84" 2010 | source = "registry+https://github.com/rust-lang/crates.io-index" 2011 | checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" 2012 | dependencies = [ 2013 | "cfg-if", 2014 | "wasm-bindgen-macro", 2015 | ] 2016 | 2017 | [[package]] 2018 | name = "wasm-bindgen-backend" 2019 | version = "0.2.84" 2020 | source = "registry+https://github.com/rust-lang/crates.io-index" 2021 | checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" 2022 | dependencies = [ 2023 | "bumpalo", 2024 | "log", 2025 | "once_cell", 2026 | "proc-macro2", 2027 | "quote", 2028 | "syn 1.0.109", 2029 | "wasm-bindgen-shared", 2030 | ] 2031 | 2032 | [[package]] 2033 | name = "wasm-bindgen-futures" 2034 | version = "0.4.34" 2035 | source = "registry+https://github.com/rust-lang/crates.io-index" 2036 | checksum = "f219e0d211ba40266969f6dbdd90636da12f75bee4fc9d6c23d1260dadb51454" 2037 | dependencies = [ 2038 | "cfg-if", 2039 | "js-sys", 2040 | "wasm-bindgen", 2041 | "web-sys", 2042 | ] 2043 | 2044 | [[package]] 2045 | name = "wasm-bindgen-macro" 2046 | version = "0.2.84" 2047 | source = "registry+https://github.com/rust-lang/crates.io-index" 2048 | checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" 2049 | dependencies = [ 2050 | "quote", 2051 | "wasm-bindgen-macro-support", 2052 | ] 2053 | 2054 | [[package]] 2055 | name = "wasm-bindgen-macro-support" 2056 | version = "0.2.84" 2057 | source = "registry+https://github.com/rust-lang/crates.io-index" 2058 | checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" 2059 | dependencies = [ 2060 | "proc-macro2", 2061 | "quote", 2062 | "syn 1.0.109", 2063 | "wasm-bindgen-backend", 2064 | "wasm-bindgen-shared", 2065 | ] 2066 | 2067 | [[package]] 2068 | name = "wasm-bindgen-shared" 2069 | version = "0.2.84" 2070 | source = "registry+https://github.com/rust-lang/crates.io-index" 2071 | checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" 2072 | 2073 | [[package]] 2074 | name = "web-sys" 2075 | version = "0.3.61" 2076 | source = "registry+https://github.com/rust-lang/crates.io-index" 2077 | checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97" 2078 | dependencies = [ 2079 | "js-sys", 2080 | "wasm-bindgen", 2081 | ] 2082 | 2083 | [[package]] 2084 | name = "winapi" 2085 | version = "0.3.9" 2086 | source = "registry+https://github.com/rust-lang/crates.io-index" 2087 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 2088 | dependencies = [ 2089 | "winapi-i686-pc-windows-gnu", 2090 | "winapi-x86_64-pc-windows-gnu", 2091 | ] 2092 | 2093 | [[package]] 2094 | name = "winapi-i686-pc-windows-gnu" 2095 | version = "0.4.0" 2096 | source = "registry+https://github.com/rust-lang/crates.io-index" 2097 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 2098 | 2099 | [[package]] 2100 | name = "winapi-util" 2101 | version = "0.1.5" 2102 | source = "registry+https://github.com/rust-lang/crates.io-index" 2103 | checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" 2104 | dependencies = [ 2105 | "winapi", 2106 | ] 2107 | 2108 | [[package]] 2109 | name = "winapi-x86_64-pc-windows-gnu" 2110 | version = "0.4.0" 2111 | source = "registry+https://github.com/rust-lang/crates.io-index" 2112 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 2113 | 2114 | [[package]] 2115 | name = "windows-sys" 2116 | version = "0.42.0" 2117 | source = "registry+https://github.com/rust-lang/crates.io-index" 2118 | checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" 2119 | dependencies = [ 2120 | "windows_aarch64_gnullvm 0.42.2", 2121 | "windows_aarch64_msvc 0.42.2", 2122 | "windows_i686_gnu 0.42.2", 2123 | "windows_i686_msvc 0.42.2", 2124 | "windows_x86_64_gnu 0.42.2", 2125 | "windows_x86_64_gnullvm 0.42.2", 2126 | "windows_x86_64_msvc 0.42.2", 2127 | ] 2128 | 2129 | [[package]] 2130 | name = "windows-sys" 2131 | version = "0.45.0" 2132 | source = "registry+https://github.com/rust-lang/crates.io-index" 2133 | checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" 2134 | dependencies = [ 2135 | "windows-targets 0.42.2", 2136 | ] 2137 | 2138 | [[package]] 2139 | name = "windows-sys" 2140 | version = "0.48.0" 2141 | source = "registry+https://github.com/rust-lang/crates.io-index" 2142 | checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" 2143 | dependencies = [ 2144 | "windows-targets 0.48.0", 2145 | ] 2146 | 2147 | [[package]] 2148 | name = "windows-targets" 2149 | version = "0.42.2" 2150 | source = "registry+https://github.com/rust-lang/crates.io-index" 2151 | checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" 2152 | dependencies = [ 2153 | "windows_aarch64_gnullvm 0.42.2", 2154 | "windows_aarch64_msvc 0.42.2", 2155 | "windows_i686_gnu 0.42.2", 2156 | "windows_i686_msvc 0.42.2", 2157 | "windows_x86_64_gnu 0.42.2", 2158 | "windows_x86_64_gnullvm 0.42.2", 2159 | "windows_x86_64_msvc 0.42.2", 2160 | ] 2161 | 2162 | [[package]] 2163 | name = "windows-targets" 2164 | version = "0.48.0" 2165 | source = "registry+https://github.com/rust-lang/crates.io-index" 2166 | checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" 2167 | dependencies = [ 2168 | "windows_aarch64_gnullvm 0.48.0", 2169 | "windows_aarch64_msvc 0.48.0", 2170 | "windows_i686_gnu 0.48.0", 2171 | "windows_i686_msvc 0.48.0", 2172 | "windows_x86_64_gnu 0.48.0", 2173 | "windows_x86_64_gnullvm 0.48.0", 2174 | "windows_x86_64_msvc 0.48.0", 2175 | ] 2176 | 2177 | [[package]] 2178 | name = "windows_aarch64_gnullvm" 2179 | version = "0.42.2" 2180 | source = "registry+https://github.com/rust-lang/crates.io-index" 2181 | checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" 2182 | 2183 | [[package]] 2184 | name = "windows_aarch64_gnullvm" 2185 | version = "0.48.0" 2186 | source = "registry+https://github.com/rust-lang/crates.io-index" 2187 | checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" 2188 | 2189 | [[package]] 2190 | name = "windows_aarch64_msvc" 2191 | version = "0.42.2" 2192 | source = "registry+https://github.com/rust-lang/crates.io-index" 2193 | checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" 2194 | 2195 | [[package]] 2196 | name = "windows_aarch64_msvc" 2197 | version = "0.48.0" 2198 | source = "registry+https://github.com/rust-lang/crates.io-index" 2199 | checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" 2200 | 2201 | [[package]] 2202 | name = "windows_i686_gnu" 2203 | version = "0.42.2" 2204 | source = "registry+https://github.com/rust-lang/crates.io-index" 2205 | checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" 2206 | 2207 | [[package]] 2208 | name = "windows_i686_gnu" 2209 | version = "0.48.0" 2210 | source = "registry+https://github.com/rust-lang/crates.io-index" 2211 | checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" 2212 | 2213 | [[package]] 2214 | name = "windows_i686_msvc" 2215 | version = "0.42.2" 2216 | source = "registry+https://github.com/rust-lang/crates.io-index" 2217 | checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" 2218 | 2219 | [[package]] 2220 | name = "windows_i686_msvc" 2221 | version = "0.48.0" 2222 | source = "registry+https://github.com/rust-lang/crates.io-index" 2223 | checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" 2224 | 2225 | [[package]] 2226 | name = "windows_x86_64_gnu" 2227 | version = "0.42.2" 2228 | source = "registry+https://github.com/rust-lang/crates.io-index" 2229 | checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" 2230 | 2231 | [[package]] 2232 | name = "windows_x86_64_gnu" 2233 | version = "0.48.0" 2234 | source = "registry+https://github.com/rust-lang/crates.io-index" 2235 | checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" 2236 | 2237 | [[package]] 2238 | name = "windows_x86_64_gnullvm" 2239 | version = "0.42.2" 2240 | source = "registry+https://github.com/rust-lang/crates.io-index" 2241 | checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" 2242 | 2243 | [[package]] 2244 | name = "windows_x86_64_gnullvm" 2245 | version = "0.48.0" 2246 | source = "registry+https://github.com/rust-lang/crates.io-index" 2247 | checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" 2248 | 2249 | [[package]] 2250 | name = "windows_x86_64_msvc" 2251 | version = "0.42.2" 2252 | source = "registry+https://github.com/rust-lang/crates.io-index" 2253 | checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" 2254 | 2255 | [[package]] 2256 | name = "windows_x86_64_msvc" 2257 | version = "0.48.0" 2258 | source = "registry+https://github.com/rust-lang/crates.io-index" 2259 | checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" 2260 | 2261 | [[package]] 2262 | name = "winreg" 2263 | version = "0.10.1" 2264 | source = "registry+https://github.com/rust-lang/crates.io-index" 2265 | checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d" 2266 | dependencies = [ 2267 | "winapi", 2268 | ] 2269 | 2270 | [[package]] 2271 | name = "xattr" 2272 | version = "0.2.3" 2273 | source = "registry+https://github.com/rust-lang/crates.io-index" 2274 | checksum = "6d1526bbe5aaeb5eb06885f4d987bcdfa5e23187055de9b83fe00156a821fabc" 2275 | dependencies = [ 2276 | "libc", 2277 | ] 2278 | 2279 | [[package]] 2280 | name = "zip" 2281 | version = "0.6.4" 2282 | source = "registry+https://github.com/rust-lang/crates.io-index" 2283 | checksum = "0445d0fbc924bb93539b4316c11afb121ea39296f99a3c4c9edad09e3658cdef" 2284 | dependencies = [ 2285 | "aes", 2286 | "byteorder", 2287 | "bzip2", 2288 | "constant_time_eq", 2289 | "crc32fast", 2290 | "crossbeam-utils", 2291 | "flate2", 2292 | "hmac", 2293 | "pbkdf2", 2294 | "sha1", 2295 | "time", 2296 | "zstd", 2297 | ] 2298 | 2299 | [[package]] 2300 | name = "zstd" 2301 | version = "0.11.2+zstd.1.5.2" 2302 | source = "registry+https://github.com/rust-lang/crates.io-index" 2303 | checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" 2304 | dependencies = [ 2305 | "zstd-safe", 2306 | ] 2307 | 2308 | [[package]] 2309 | name = "zstd-safe" 2310 | version = "5.0.2+zstd.1.5.2" 2311 | source = "registry+https://github.com/rust-lang/crates.io-index" 2312 | checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" 2313 | dependencies = [ 2314 | "libc", 2315 | "zstd-sys", 2316 | ] 2317 | 2318 | [[package]] 2319 | name = "zstd-sys" 2320 | version = "2.0.8+zstd.1.5.5" 2321 | source = "registry+https://github.com/rust-lang/crates.io-index" 2322 | checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c" 2323 | dependencies = [ 2324 | "cc", 2325 | "libc", 2326 | "pkg-config", 2327 | ] 2328 | -------------------------------------------------------------------------------- /native/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "tokenizers-scala" 3 | version = "0.1.0" 4 | authors = ["Sören Brunk "] 5 | edition = "2021" 6 | 7 | [dependencies] 8 | tokenizers = "0.13.3" 9 | jni = "0.21.1" 10 | 11 | [lib] 12 | crate_type = ["cdylib"] 13 | 14 | [build-dependencies] 15 | cbindgen = "0.24.0" -------------------------------------------------------------------------------- /native/build.rs: -------------------------------------------------------------------------------- 1 | extern crate cbindgen; 2 | 3 | use std::env; 4 | 5 | fn main() { 6 | let crate_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); 7 | 8 | cbindgen::Builder::new() 9 | .with_crate(crate_dir) 10 | .with_language(cbindgen::Language::C) 11 | .with_include("types.h") 12 | .generate() 13 | .expect("Unable to generate bindings") 14 | .write_to_file("target/headers-gen/lib.h"); 15 | } 16 | -------------------------------------------------------------------------------- /native/src/jvm_unwrapper.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error; 2 | 3 | use jni::JNIEnv; 4 | 5 | /// Convienence function to allow using error handling via Result and ? inside our JNI functions. 6 | /// JNI functions have to return a value, even if we throw a Java exception 7 | pub fn wrap_errors(mut f: F) -> tokenizers::Result 8 | where 9 | F: FnMut() -> tokenizers::Result, 10 | { 11 | f() 12 | } 13 | 14 | // ensure that we always throw a JVM exception instead of `panic`ing 15 | pub trait JvmUnwrapper { 16 | fn jvm_unwrap(self, env: &mut JNIEnv, default: T) -> T; 17 | } 18 | 19 | fn throw(e: Box, env: &mut JNIEnv) { 20 | let description = e.to_string(); 21 | // don't `unwrap` `throw_new`, another JVM exception might have already been thrown, in which case the `Result` is `Err` 22 | let _ = env.throw_new("java/lang/RuntimeException", description); 23 | } 24 | 25 | impl JvmUnwrapper for Result> { 26 | fn jvm_unwrap(self, env: &mut JNIEnv, default: T) -> T { 27 | self.unwrap_or_else(|e| { 28 | throw(e, env); 29 | default 30 | }) 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /native/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Expose the tokenizers API to the JVM via J4RS/JNI 2 | //! 3 | 4 | pub mod jvm_unwrapper; 5 | 6 | use jni::objects::{JLongArray, JObject, JObjectArray, JString, JValue}; 7 | use jni::sys::{jboolean, jint, jlong, jlongArray, jobject, jobjectArray}; 8 | use jni::JNIEnv; 9 | use jvm_unwrapper::{wrap_errors, JvmUnwrapper}; 10 | use tokenizers::{Encoding, Tokenizer}; 11 | 12 | #[no_mangle] 13 | pub extern "system" fn Java_io_brunk_tokenizers_Tokenizer_00024_fromPretrainedNative( 14 | mut env: JNIEnv, 15 | _object: JObject, 16 | identifier: JString, 17 | ) -> jlong { 18 | wrap_errors(|| { 19 | let identifier: String = env.get_string(&identifier)?.into(); 20 | Tokenizer::from_pretrained(identifier, None).map(to_boxed_ptr) 21 | }) 22 | .jvm_unwrap(&mut env, -1) 23 | } 24 | 25 | #[no_mangle] 26 | pub extern "system" fn Java_io_brunk_tokenizers_Tokenizer_00024_fromFile( 27 | mut env: JNIEnv, 28 | _object: JObject, 29 | path: JString, 30 | ) -> jlong { 31 | wrap_errors(|| { 32 | let identifier: String = env.get_string(&path)?.into(); 33 | Tokenizer::from_file(identifier).map(to_boxed_ptr) 34 | }) 35 | .jvm_unwrap(&mut env, -1) 36 | } 37 | 38 | #[no_mangle] 39 | pub extern "system" fn Java_io_brunk_tokenizers_Tokenizer_encode( 40 | mut env: JNIEnv, 41 | _object: JObject, 42 | tokenizer_ptr: jlong, 43 | input: JString, 44 | add_special_tokens: jboolean, 45 | ) -> jlong { 46 | wrap_errors(|| { 47 | let tokenizer = from_boxed_ptr::(tokenizer_ptr); 48 | let input: String = env.get_string(&input)?.into(); 49 | let encoding = tokenizer.encode_char_offsets(input, add_special_tokens != 0); 50 | encoding.map(to_boxed_ptr) 51 | }) 52 | .jvm_unwrap(&mut env, -1) 53 | } 54 | 55 | #[no_mangle] 56 | pub extern "system" fn Java_io_brunk_tokenizers_Tokenizer_decode<'a>( 57 | mut env: JNIEnv<'a>, 58 | _object: JObject, 59 | tokenizer_ptr: jlong, 60 | ids: JLongArray, 61 | skip_special_tokens: jboolean, 62 | ) -> JString<'a> { 63 | wrap_errors(|| { 64 | let tokenizer = from_boxed_ptr::(tokenizer_ptr); 65 | // All elements can be initialized to the same value. 66 | let len = env.get_array_length(&ids)? as usize; 67 | let mut buf: Vec = vec![0; len]; 68 | env.get_long_array_region(&ids, 0, &mut buf)?; 69 | let decoded = tokenizer.decode( 70 | buf.iter().map(|&e| e as u32).collect(), 71 | skip_special_tokens != 0, 72 | )?; 73 | env.new_string(decoded).map_err(|e| e.into()) 74 | }) 75 | .jvm_unwrap(&mut env, JString::default()) 76 | } 77 | 78 | #[no_mangle] 79 | pub extern "system" fn Java_io_brunk_tokenizers_Tokenizer_encodeBatch<'local>( 80 | mut env: JNIEnv<'local>, 81 | _object: JObject<'local>, 82 | tokenizer_ptr: jlong, 83 | inputs: JObjectArray<'local>, 84 | add_special_tokens: jboolean, 85 | ) -> JLongArray<'local> { 86 | let tokenizer = from_boxed_ptr::(tokenizer_ptr); 87 | wrap_errors(|| { 88 | let inputs = (0..env.get_array_length(&inputs)?) 89 | .map(|i| { 90 | let input = env.get_object_array_element(&inputs, i)?; 91 | let identifier: Result = 92 | unsafe { env.get_string_unchecked((&input).into()).map(|i| i.into()) }; 93 | identifier 94 | }) 95 | .collect::, _>>()?; 96 | let len = inputs.len(); 97 | let encodings: Vec<_> = tokenizer 98 | .encode_batch_char_offsets(inputs, add_special_tokens != 0)? 99 | .into_iter() // it is important to move the ecodings out of the vector or they will be deallocated 100 | .map(|encoding| Box::into_raw(Box::new(encoding)) as jlong) 101 | .collect(); 102 | let encodings_java = env.new_long_array(len as i32)?; 103 | env.set_long_array_region(&encodings_java, 0, &encodings)?; 104 | Ok(encodings_java) 105 | }) 106 | .jvm_unwrap(&mut env, JLongArray::default()) 107 | } 108 | 109 | #[no_mangle] 110 | pub extern "system" fn Java_io_brunk_tokenizers_Tokenizer_00024_free( 111 | _env: JNIEnv, 112 | _object: JObject, 113 | tokenizer_ptr: jlong, 114 | ) { 115 | free::(tokenizer_ptr) 116 | } 117 | 118 | #[no_mangle] 119 | pub extern "system" fn Java_io_brunk_tokenizers_Encoding_length( 120 | _env: JNIEnv, 121 | _object: JObject, 122 | encoding_ptr: jlong, 123 | ) -> jint { 124 | let encoding = from_boxed_ptr::(encoding_ptr); 125 | encoding.len() as i32 126 | } 127 | 128 | #[no_mangle] 129 | pub extern "system" fn Java_io_brunk_tokenizers_Encoding_nSequences( 130 | _env: JNIEnv, 131 | _object: JObject, 132 | encoding_ptr: jlong, 133 | ) -> jint { 134 | let encoding = from_boxed_ptr::(encoding_ptr); 135 | encoding.n_sequences() as i32 136 | } 137 | 138 | /// helper to convert int arrays from an encoding 139 | fn vector_to_java(mut env: JNIEnv, encoding_ptr: jlong, extractor: F) -> jlongArray 140 | where 141 | F: Fn(&Encoding) -> Vec, 142 | { 143 | wrap_errors(|| { 144 | let encoding = from_boxed_ptr::(encoding_ptr); 145 | let ids: Vec = extractor(encoding).iter().map(|&e| e as i64).collect(); 146 | let ids_java = env.new_long_array(ids.len() as i32)?; 147 | env.set_long_array_region(&ids_java, 0, &ids)?; 148 | Ok(ids_java.into_raw()) 149 | }) 150 | .jvm_unwrap(&mut env, JObject::null().into_raw()) 151 | } 152 | 153 | #[no_mangle] 154 | pub extern "system" fn Java_io_brunk_tokenizers_Encoding_ids( 155 | env: JNIEnv, 156 | _object: JObject, 157 | encoding_ptr: jlong, 158 | ) -> jlongArray { 159 | vector_to_java(env, encoding_ptr, |e| e.get_ids().to_vec()) 160 | } 161 | 162 | #[no_mangle] 163 | pub extern "system" fn Java_io_brunk_tokenizers_Encoding_typeIds( 164 | env: JNIEnv, 165 | _object: JObject, 166 | encoding_ptr: jlong, 167 | ) -> jlongArray { 168 | vector_to_java(env, encoding_ptr, |e| e.get_type_ids().to_vec()) 169 | } 170 | 171 | #[no_mangle] 172 | pub extern "system" fn Java_io_brunk_tokenizers_Encoding_attentionMask( 173 | env: JNIEnv, 174 | _object: JObject, 175 | encoding_ptr: jlong, 176 | ) -> jlongArray { 177 | vector_to_java(env, encoding_ptr, |e| e.get_attention_mask().to_vec()) 178 | } 179 | 180 | #[no_mangle] 181 | pub extern "system" fn Java_io_brunk_tokenizers_Encoding_specialTokensMask( 182 | env: JNIEnv, 183 | _object: JObject, 184 | encoding_ptr: jlong, 185 | ) -> jlongArray { 186 | vector_to_java(env, encoding_ptr, |e| e.get_special_tokens_mask().to_vec()) 187 | } 188 | 189 | #[no_mangle] 190 | pub extern "system" fn Java_io_brunk_tokenizers_Encoding_tokens( 191 | mut env: JNIEnv, 192 | _object: JObject, 193 | encoding_ptr: jlong, 194 | ) -> jobjectArray { 195 | wrap_errors(|| { 196 | let encoding = from_boxed_ptr::(encoding_ptr); 197 | let tokens = encoding.get_tokens().to_vec(); 198 | let string_class = env.find_class("java/lang/String")?; 199 | let empty_string = env.new_string("")?; 200 | let tokens_java = env.new_object_array(tokens.len() as i32, string_class, empty_string)?; 201 | for (i, type_id) in tokens.iter().enumerate() { 202 | env.set_object_array_element(&tokens_java, i as i32, env.new_string(type_id)?)?; 203 | } 204 | Ok(tokens_java.into_raw()) 205 | }) 206 | .jvm_unwrap(&mut env, JObject::null().into_raw()) 207 | } 208 | 209 | #[no_mangle] 210 | pub extern "system" fn Java_io_brunk_tokenizers_Encoding_wordIds( 211 | mut env: JNIEnv, 212 | _object: JObject, 213 | encoding_ptr: jlong, 214 | ) -> jlongArray { 215 | wrap_errors(|| { 216 | let encoding = from_boxed_ptr::(encoding_ptr); 217 | let ids: Vec<_> = encoding 218 | .get_word_ids() 219 | .iter() 220 | .map(|e| e.map(|e| e as i64).unwrap_or(-1)) 221 | .collect(); 222 | let ids_java = env.new_long_array(ids.len() as i32)?; 223 | env.set_long_array_region(&ids_java, 0, &ids)?; 224 | Ok(ids_java.into_raw()) 225 | }) 226 | .jvm_unwrap(&mut env, JObject::null().into_raw()) 227 | } 228 | 229 | #[no_mangle] 230 | pub extern "system" fn Java_io_brunk_tokenizers_Encoding_offsets( 231 | mut env: JNIEnv, 232 | _object: JObject, 233 | encoding_ptr: jlong, 234 | ) -> jobject { 235 | let encoding = from_boxed_ptr::(encoding_ptr); 236 | let offsets = encoding.get_offsets(); 237 | let start_offsets: Vec<_> = offsets.iter().map(|o| o.0 as i64).collect(); 238 | let end_offsets: Vec<_> = offsets.iter().map(|o| o.1 as i64).collect(); 239 | wrap_errors(|| { 240 | let start_offsets_java = env.new_long_array(start_offsets.len() as i32)?; 241 | env.set_long_array_region(&start_offsets_java, 0, &start_offsets)?; 242 | let end_offsets_java = env.new_long_array(end_offsets.len() as i32)?; 243 | env.set_long_array_region(&end_offsets_java, 0, &end_offsets)?; 244 | let tokenizer_java = env.new_object( 245 | "io/brunk/tokenizers/NativeOffsets", 246 | "([J[J)V", 247 | &[ 248 | JValue::Object(&start_offsets_java), 249 | JValue::Object(&end_offsets_java), 250 | ], 251 | )?; 252 | Ok(tokenizer_java.into_raw()) 253 | }) 254 | .jvm_unwrap(&mut env, JObject::null().into_raw()) 255 | } 256 | 257 | #[no_mangle] 258 | pub extern "system" fn Java_io_brunk_tokenizers_Encoding_00024_free( 259 | _env: JNIEnv, 260 | _object: JObject, 261 | encoding_ptr: jlong, 262 | ) { 263 | free::(encoding_ptr); 264 | } 265 | 266 | fn free(ptr: i64) { 267 | drop(unsafe { Box::from_raw(ptr as *mut T) }) 268 | } 269 | 270 | fn to_boxed_ptr(value: T) -> i64 { 271 | Box::into_raw(Box::new(value)) as jlong 272 | } 273 | 274 | fn from_boxed_ptr(ptr: i64) -> &'static mut T { 275 | unsafe { &mut *(ptr as *mut T) } 276 | } 277 | -------------------------------------------------------------------------------- /native/src/main.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | println!("Hello world!"); 3 | } -------------------------------------------------------------------------------- /scripts/src/scala/tokenizers/package.scala: -------------------------------------------------------------------------------- 1 | package tokenizers 2 | 3 | import bleep.model 4 | import bleep.model.CrossProjectName 5 | 6 | package object scripts { 7 | val nativeProject: model.CrossProjectName = 8 | model.CrossProjectName(model.ProjectName("native"), None) 9 | val mainProject: model.CrossProjectName = 10 | model.CrossProjectName(model.ProjectName("tokenizers"), None) 11 | 12 | // will publish these with dependencies 13 | def projectsToPublish(crossName: model.CrossProjectName): Boolean = 14 | crossName.name.value match { 15 | case "native" | "tokenizers" => true 16 | case _ => false 17 | } 18 | 19 | val groupId = "io.brunk.tokenizers" 20 | } 21 | -------------------------------------------------------------------------------- /scripts/src/scala/tokenizers/scripts/GenJniLibrary.scala: -------------------------------------------------------------------------------- 1 | // derived from https://github.com/oyvindberg/tui-scala/blob/9f6b67db089ac10a183fe9f992a6bc81df125bc9/scripts/src/scala/tui/scripts/GenJniLibrary.scala 2 | 3 | package tokenizers 4 | package scripts 5 | 6 | import bleep._ 7 | import bleep.plugin.jni.{Cargo, JniNative, JniPackage} 8 | 9 | import java.nio.file.Path 10 | 11 | /** Build the native library with cargo and add it to classpath resources */ 12 | object GenJniLibrary extends bleep.BleepCodegenScript("GenJniLibrary") { 13 | 14 | def tokenizersJniNativeLib(started: Started): JniNative = 15 | new JniNative( 16 | logger = started.logger, 17 | nativeCompileSourceDirectory = started.projectPaths(nativeProject).dir, 18 | nativeTargetDirectory = started.buildPaths.dotBleepDir, 19 | nativeBuildTool = new Cargo(release = true), 20 | libName = "tokenizers", 21 | env = sys.env.toList 22 | ) { 23 | override lazy val nativePlatform: String = 24 | OsArch.current match { 25 | case OsArch.LinuxAmd64 => "x86_64-linux" 26 | case OsArch.WindowsAmd64 => "x86_64-windows" 27 | case OsArch.MacosAmd64 => "x86_64-darwin" 28 | case OsArch.MacosArm64(_) => "arm64-darwin" 29 | case other: OsArch.Other => sys.error(s"not implemented: $other") 30 | } 31 | } 32 | 33 | override def run( 34 | started: Started, 35 | commands: Commands, 36 | targets: List[GenJniLibrary.Target], 37 | args: List[String] 38 | ): Unit = { 39 | val jniPackage = new JniPackage(started.buildPaths.buildDir, tokenizersJniNativeLib(started)) { 40 | // override naming standard to match `NativeLoader.java` 41 | override lazy val managedNativeLibraries: Seq[(Path, RelPath)] = { 42 | val library: Path = jniNative.nativeCompile() 43 | val name = System.mapLibraryName(s"native-${jniNative.nativePlatform}-${jniNative.libName}") 44 | Seq(library -> new RelPath(List(name))) 45 | } 46 | } 47 | 48 | targets.foreach { target => 49 | // copy into place in resources directories 50 | val writtenPaths = jniPackage.copyTo(target.resources) 51 | writtenPaths.foreach(path => started.logger.withContext(path).info("wrote")) 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /scripts/src/scala/tokenizers/scripts/Javah.scala: -------------------------------------------------------------------------------- 1 | // derived from https://github.com/oyvindberg/tui-scala/blob/9f6b67db089ac10a183fe9f992a6bc81df125bc9/scripts/src/scala/tui/scripts/GenJniLibrary.scala 2 | 3 | package tokenizers 4 | package scripts 5 | 6 | import bleep._ 7 | import bleep.plugin.jni.{Cargo, JniNative, JniPackage} 8 | 9 | import java.nio.file.Path 10 | import bleep.plugin.jni.JniJavah 11 | 12 | /** Generate JNI C headers for native functions. We still need to convert them to Rust, but we can 13 | * use the signatures. 14 | */ 15 | object Javah extends bleep.BleepScript("Javah") { 16 | 17 | override def run(started: Started, commands: Commands, args: List[String]): Unit = 18 | JniJavah( 19 | logger = started.logger, 20 | projectPaths = started.projectPaths(mainProject), 21 | bloopProject = started.bloopProject(mainProject) 22 | ).javah() 23 | 24 | } 25 | -------------------------------------------------------------------------------- /scripts/src/scala/tokenizers/scripts/Publish.scala: -------------------------------------------------------------------------------- 1 | package tokenizers 2 | package scripts 3 | 4 | import bleep._ 5 | import bleep.packaging.{packageLibraries, CoordinatesFor, PackagedLibrary, PublishLayout} 6 | import bleep.plugin.cirelease.CiReleasePlugin 7 | import bleep.plugin.dynver.DynVerPlugin 8 | import bleep.plugin.nosbt.InteractionService 9 | import bleep.plugin.pgp.PgpPlugin 10 | import bleep.plugin.sonatype.Sonatype 11 | import coursier.Info 12 | 13 | import scala.collection.immutable.SortedMap 14 | 15 | object Publish extends BleepScript("Publish") { 16 | 17 | def run(started: Started, commands: Commands, args: List[String]): Unit = { 18 | commands.compile(started.build.explodedProjects.keys.filter(projectsToPublish).toList) 19 | 20 | val dynVer = new DynVerPlugin( 21 | baseDirectory = started.buildPaths.buildDir.toFile, 22 | dynverSonatypeSnapshots = true 23 | ) 24 | val pgp = new PgpPlugin( 25 | logger = started.logger, 26 | maybeCredentials = None, 27 | interactionService = InteractionService.DoesNotMaskYourPasswordExclamationOneOne 28 | ) 29 | val sonatype = new Sonatype( 30 | logger = started.logger, 31 | sonatypeBundleDirectory = started.buildPaths.dotBleepDir / "sonatype-bundle", 32 | sonatypeProfileName = "io.brunk", 33 | bundleName = "tokenizers", 34 | version = dynVer.version, 35 | sonatypeCredentialHost = Sonatype.sonatype01 36 | ) 37 | val ciRelease = new CiReleasePlugin(started.logger, sonatype, dynVer, pgp) 38 | 39 | started.logger.info(dynVer.version) 40 | 41 | val info = Info( 42 | description = "Scala bindings for Huggingface Tokenizers", 43 | homePage = "https://github.com/sbrunk/tokenizers-scala/", 44 | developers = List( 45 | Info.Developer( 46 | "sbrunk", 47 | "Sören Brunk", 48 | "https://github.com/sbrunk" 49 | ) 50 | ), 51 | publication = None, 52 | scm = CiReleasePlugin.inferScmInfo, 53 | licenseInfo = List( 54 | Info.License( 55 | "Apache", 56 | Some("https://opensource.org/license/apache-2-0/"), 57 | distribution = Some("repo"), 58 | comments = None 59 | ) 60 | ) 61 | ) 62 | 63 | val packagedLibraries: SortedMap[model.CrossProjectName, PackagedLibrary] = 64 | packageLibraries( 65 | started, 66 | coordinatesFor = CoordinatesFor.Default(groupId = groupId, version = dynVer.version), 67 | shouldInclude = projectsToPublish, 68 | publishLayout = PublishLayout.Maven(info) 69 | ) 70 | 71 | val files: Map[RelPath, Array[Byte]] = 72 | packagedLibraries.flatMap { case (_, PackagedLibrary(_, files)) => files.all } 73 | 74 | files.foreach { case (path, bytes) => 75 | started.logger.withContext(path)(_.asString).withContext(bytes.length).debug("will publish") 76 | } 77 | ciRelease.ciRelease(files) 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /scripts/src/scala/tokenizers/scripts/PublishLocal.scala: -------------------------------------------------------------------------------- 1 | package tokenizers 2 | package scripts 3 | 4 | import bleep._ 5 | import bleep.plugin.dynver.DynVerPlugin 6 | 7 | object PublishLocal extends BleepScript("PublishLocal") { 8 | def run(started: Started, commands: Commands, args: List[String]): Unit = { 9 | val dynVer = new DynVerPlugin( 10 | baseDirectory = started.buildPaths.buildDir.toFile, 11 | dynverSonatypeSnapshots = true 12 | ) 13 | val projects = started.build.explodedProjects.keys.toArray.filter(projectsToPublish) 14 | 15 | commands.publishLocal( 16 | bleep.commands.PublishLocal.Options( 17 | groupId = groupId, 18 | version = dynVer.version, 19 | publishTarget = bleep.commands.PublishLocal.LocalIvy, 20 | projects = projects 21 | ) 22 | ) 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /tests/src/scala/io/brunk/tokenizers/TokenizerSuite.scala: -------------------------------------------------------------------------------- 1 | package io.brunk.tokenizers 2 | 3 | class TokenizerSuite extends munit.FunSuite { 4 | 5 | // assert encoding for 6 | // Hello, y'all! How are you 😁 ? 7 | def assertEncoding(encoding: Encoding) = { 8 | assertEquals(encoding.length, 13) 9 | 10 | assertEquals( 11 | encoding.ids, 12 | Seq[Long](101, 8667, 117, 194, 112, 1155, 106, 1731, 1132, 1128, 100, 136, 102) 13 | ) 14 | 15 | assert(encoding.typeIds.forall(_ == 0)) 16 | 17 | assert(encoding.attentionMask.forall(_ == 1)) 18 | 19 | assertEquals(encoding.specialTokensMask, 1L +: Seq.fill(11)(0L) :+ 1L) 20 | 21 | val expectedTokens = 22 | Seq("[CLS]", "Hello", ",", "y", "'", "all", "!", "How", "are", "you", "[UNK]", "?", "[SEP]") 23 | assertEquals(encoding.tokens, expectedTokens) 24 | 25 | val expectedWordIds = None +: (0 to 10).map(id => Some(id.toLong)) :+ None 26 | assertEquals(encoding.wordIds, expectedWordIds) 27 | 28 | assertEquals(encoding.nSequences, 1) 29 | 30 | assertEquals( 31 | encoding.offsets, 32 | Seq( 33 | (0L, 0L), 34 | (0L, 5L), 35 | (5L, 6L), 36 | (7L, 8L), 37 | (8L, 9L), 38 | (9L, 12L), 39 | (12L, 13L), 40 | (14L, 17L), 41 | (18L, 21L), 42 | (22L, 25L), 43 | (26L, 27L), 44 | (28L, 29L), 45 | (0L, 0L) 46 | ) 47 | ) 48 | } 49 | 50 | test("pretrained-tokenizer-encode") { 51 | val tokenizer = Tokenizer.fromPretrained("bert-base-cased") 52 | val encoding = tokenizer.encode("Hello, y'all! How are you 😁 ?") 53 | 54 | assertEncoding(encoding) 55 | } 56 | 57 | test("pretrained-tokenizer-encode-decode") { 58 | val tokenizer = Tokenizer.fromPretrained("bert-base-cased") 59 | val encoding = tokenizer.encode("Hello, y'all! How are you 😁 ?") 60 | val decoded = tokenizer.decode(encoding.ids) 61 | 62 | assertEquals(decoded, "Hello, y ' all! How are you?") 63 | } 64 | 65 | test("pretrained-tokenizer-encode-batch") { 66 | val tokenizer = Tokenizer.fromPretrained("bert-base-cased") 67 | val encodings = tokenizer.encodeBatch(Seq("Hi all", "Hello, y'all! How are you 😁 ?")) 68 | 69 | assertEquals(encodings.length, 2) 70 | 71 | assertEncoding(encodings(1)) 72 | } 73 | 74 | test("pretrained-tokenizer-fail-on-invalid") { 75 | interceptMessage[java.lang.RuntimeException]( 76 | """Model "invalid-tokenizer-123" on the Hub doesn't have a tokenizer""" 77 | ) { 78 | Tokenizer.fromPretrained("invalid-tokenizer-123") 79 | } 80 | } 81 | 82 | } 83 | -------------------------------------------------------------------------------- /tokenizers/src/java/io/brunk/tokenizers/LoadNativeTokenizers.java: -------------------------------------------------------------------------------- 1 | package io.brunk.tokenizers; 2 | 3 | public class LoadNativeTokenizers { 4 | static { 5 | try { 6 | NativeLoader.load("tokenizers"); 7 | } catch (RuntimeException e) { 8 | throw e; 9 | } catch (Exception e) { 10 | throw new RuntimeException(e); 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /tokenizers/src/java/io/brunk/tokenizers/NativeLoader.java: -------------------------------------------------------------------------------- 1 | // Copied from https://github.com/oyvindberg/tui-scala/blob/9f6b67db089ac10a183fe9f992a6bc81df125bc9/crossterm/src/java/tui/crossterm/NativeLoader.java 2 | package io.brunk.tokenizers; 3 | 4 | import java.nio.file.Files; 5 | import java.nio.file.Path; 6 | 7 | class NativeLoader { 8 | public static void load(String nativeLibrary) throws Exception { 9 | try { 10 | System.loadLibrary(nativeLibrary); 11 | } catch (UnsatisfiedLinkError e) { 12 | loadPackaged(nativeLibrary); 13 | } 14 | } 15 | 16 | static void loadPackaged(String nativeLibrary) throws Exception { 17 | String lib = System.mapLibraryName("native-" + getPlatform() + "-" + nativeLibrary); 18 | var resourcePath = "/" + lib; 19 | var resourceStream = NativeLoader.class.getResourceAsStream(resourcePath); 20 | if (resourceStream == null) { 21 | throw new UnsatisfiedLinkError( 22 | "Native library " + lib + " (" + resourcePath + ") cannot be found on the classpath." 23 | ); 24 | } 25 | 26 | Path tmp = Files.createTempDirectory("jni-"); 27 | Path extractedPath = tmp.resolve(lib); 28 | 29 | try { 30 | Files.copy(resourceStream, extractedPath); 31 | } catch (Exception ex) { 32 | throw new UnsatisfiedLinkError("Error while extracting native library: " + ex.getMessage()); 33 | } 34 | 35 | System.load(extractedPath.toAbsolutePath().toString()); 36 | } 37 | 38 | private static String getPlatform() { 39 | if (System.getenv().containsKey("TOKENIZERS_SCALA_PLATFORM")) { 40 | return System.getenv().get("TOKZENIZERS_SCALA_PLATFORM"); 41 | } 42 | String arch = System.getProperty("os.arch"); 43 | String name = System.getProperty("os.name"); 44 | String nameLower = name.toLowerCase(); 45 | boolean isAmd64 = arch.equals("x86_64") || arch.equals("amd64"); 46 | boolean isArm64 = arch.equals("aarch64") || arch.equals("arm64"); 47 | 48 | if (isAmd64 && nameLower.contains("win")) return "x86_64-windows"; 49 | if (isAmd64 && nameLower.contains("lin")) return "x86_64-linux"; 50 | if (isAmd64 && nameLower.contains("mac")) return "x86_64-darwin"; 51 | if (isArm64 && nameLower.contains("mac")) return "arm64-darwin"; 52 | throw new RuntimeException( 53 | "Platform detection does not understand os.name = " + name + " and os.arch = " + arch + ". " + 54 | "You can set environment variable TUI_SCALA_PLATFORM to x86_64-windows, x86_64-linux, x86_64-darwin, arm64-darwin to override. " + 55 | "Open an issue at https://github.com/oyvindberg/tui-scala/issues ." 56 | ); 57 | } 58 | } -------------------------------------------------------------------------------- /tokenizers/src/scala/io/brunk/tokenizers/Encoding.scala: -------------------------------------------------------------------------------- 1 | package io.brunk.tokenizers 2 | 3 | import scala.collection.immutable.ArraySeq 4 | import io.brunk.tokenizers.Encoding.freeAction 5 | import io.brunk.tokenizers.Encoding.Offset 6 | 7 | /* Represents the output of a [Tokenizer]. */ 8 | class Encoding private[tokenizers] (nativePtr: Long) { 9 | 10 | NativeCleaner.cleaner.register(this, freeAction(nativePtr)) 11 | 12 | @native private def length(encodingPtr: Long): Int 13 | 14 | @native private def ids(encodingPtr: Long): Array[Long] 15 | 16 | @native private def typeIds(encodingPtr: Long): Array[Long] 17 | 18 | @native private def attentionMask(encodingPtr: Long): Array[Long] 19 | 20 | @native private def specialTokensMask(encodingPtr: Long): Array[Long] 21 | 22 | @native private def tokens(encodingPtr: Long): Array[String] 23 | 24 | @native private def wordIds(encodingPtr: Long): Array[Long] 25 | 26 | @native private def nSequences(encodingPtr: Long): Int 27 | 28 | @native private def offsets(encodingPtr: Long): NativeOffsets 29 | 30 | /** the total length of this Encoding */ 31 | def length: Int = length(nativePtr) 32 | 33 | /** The number of sequences in this Encoding */ 34 | def nSequences: Int = nSequences(nativePtr) 35 | 36 | // /** IDs produced by the `Tokenizer` */ 37 | def ids: Seq[Long] = ArraySeq.unsafeWrapArray(ids(nativePtr)) 38 | 39 | /* Type of the IDs */ 40 | def typeIds: Seq[Long] = ArraySeq.unsafeWrapArray(typeIds(nativePtr)) 41 | 42 | /** Tokens associated with each ID */ 43 | def tokens: Seq[String] = ArraySeq.unsafeWrapArray(tokens(nativePtr)) 44 | 45 | /** Indice of the word associated with each token/ID */ 46 | def wordIds: Seq[Option[Long]] = 47 | ArraySeq.unsafeWrapArray( 48 | wordIds(nativePtr).map(wordId => if (wordId == -1) None else Some(wordId)) 49 | ) 50 | 51 | /** Offsets of the token/ID from the NormalizedString */ 52 | def offsets: Seq[Offset] = { 53 | val nativeOffsets = offsets(nativePtr) 54 | ArraySeq.unsafeWrapArray(nativeOffsets.starts.zip(nativeOffsets.ends)) 55 | } 56 | 57 | /** Mask identifying padding tokens for the attention mechanism */ 58 | def attentionMask: Seq[Long] = ArraySeq.unsafeWrapArray(attentionMask(nativePtr)) 59 | 60 | // /** Mask identifying special tokens */ 61 | def specialTokensMask: Seq[Long] = ArraySeq.unsafeWrapArray(specialTokensMask(nativePtr)) 62 | 63 | // /** A list of overflowing Encoding generated when we got truncated */ 64 | // def overflowing: Seq[Encoding] = ??? 65 | 66 | // /* Ranges of tokens covered by each sequence. If this is empty we consider 67 | // there is only one sequence in this Encoding, and that it covers the entire range. */ 68 | // // sequenceRanges: HashMap[usize, Range[usize]], 69 | } 70 | 71 | object Encoding { 72 | type Offset = (Long, Long) 73 | 74 | @native 75 | private def free(nativePtr: Long): Unit 76 | 77 | private def freeAction(nativePtr: Long): Runnable = () => free(nativePtr) 78 | } 79 | -------------------------------------------------------------------------------- /tokenizers/src/scala/io/brunk/tokenizers/NativeCleaner.scala: -------------------------------------------------------------------------------- 1 | package io.brunk.tokenizers 2 | 3 | import java.lang.ref.Cleaner 4 | 5 | object NativeCleaner { 6 | val cleaner = Cleaner.create() 7 | } 8 | -------------------------------------------------------------------------------- /tokenizers/src/scala/io/brunk/tokenizers/Offset.scala: -------------------------------------------------------------------------------- 1 | package io.brunk.tokenizers 2 | 3 | private[tokenizers] class NativeOffsets(val starts: Array[Long], val ends: Array[Long]) 4 | -------------------------------------------------------------------------------- /tokenizers/src/scala/io/brunk/tokenizers/Tokenizer.scala: -------------------------------------------------------------------------------- 1 | package io.brunk.tokenizers 2 | 3 | import io.brunk.tokenizers.Tokenizer.freeAction 4 | 5 | import scala.collection.immutable.ArraySeq 6 | import java.nio.file.Path 7 | 8 | class Tokenizer private (nativePtr: Long) { 9 | 10 | NativeCleaner.cleaner.register(this, freeAction(nativePtr)) 11 | 12 | @native 13 | private def encode( 14 | tokenizerPtr: Long, 15 | input: String, 16 | addSpecialTokens: Boolean 17 | ): Long 18 | 19 | def encode(input: String, addSpecialTokens: Boolean = true): Encoding = { 20 | val encodingPtr = encode(nativePtr, input, addSpecialTokens) 21 | new Encoding(encodingPtr) 22 | } 23 | 24 | @native 25 | private def encodeBatch( 26 | tokenizerPtr: Long, 27 | input: Array[String], 28 | addSpecialTokens: Boolean 29 | ): Array[Long] 30 | 31 | def encodeBatch(input: Seq[String], addSpecialTokens: Boolean = true): Seq[Encoding] = { 32 | val encodingsPtr = encodeBatch(nativePtr, input.toArray, addSpecialTokens) 33 | ArraySeq.unsafeWrapArray(encodingsPtr.map(ptr => new Encoding(ptr: Long))) 34 | } 35 | 36 | @native 37 | private def decode( 38 | tokenizerPtr: Long, 39 | ids: Array[Long], 40 | skipSpecialTokens: Boolean 41 | ): String 42 | 43 | def decode(ids: Seq[Long], skipSpecialTokens: Boolean = true): String = 44 | decode(nativePtr, ids.toArray, skipSpecialTokens) 45 | } 46 | 47 | object Tokenizer { 48 | 49 | new LoadNativeTokenizers() 50 | 51 | /** Instantiate a new Tokenizer from an existing file on the Hugging Face Hub. 52 | * 53 | * @param identifier 54 | * The identifier of a Model on the Hugging Face Hub, that contains a tokenizer.json file 55 | * @return 56 | * The new tokenizer 57 | * 58 | * TODO revision and auth token 59 | */ 60 | def fromPretrained(identifier: String): Tokenizer = { 61 | val nativePtr = fromPretrainedNative(identifier) 62 | new Tokenizer(nativePtr) 63 | } 64 | 65 | /** Instantiate a new Tokenizer from the file at the given path. 66 | * 67 | * @param path 68 | * A path to a local JSON file representing a previously serialized [[Tokenizer]] 69 | * @return 70 | * The new tokenizer 71 | */ 72 | def fromFile(path: Path): Tokenizer = new Tokenizer(fromFile(path.toString())) 73 | 74 | @native 75 | private def fromPretrainedNative(identifier: String): Long 76 | 77 | @native 78 | private def fromFile(path: String): Long 79 | 80 | @native 81 | private def free(nativePtr: Long): Unit 82 | 83 | private def freeAction(nativePtr: Long): Runnable = () => free(nativePtr) 84 | 85 | } 86 | --------------------------------------------------------------------------------