├── .github └── workflows │ └── ci.yaml ├── .gitignore ├── BUILD.bazel ├── CONTRIBUTING.md ├── Cargo.Bazel.lock ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── MODULE.bazel ├── MODULE.bazel.lock ├── Makefile ├── README.md ├── WORKSPACE ├── benches └── decode_benchmark.rs ├── benchmark.py ├── deps.bzl ├── example ├── BUILD.bazel ├── Dockerfile ├── README.md ├── go.mod ├── go.sum └── main.go ├── go.mod ├── go.sum ├── release ├── Dockerfile ├── go.mod └── main.go ├── src └── lib.rs ├── test ├── BUILD.bazel ├── benchmark │ ├── 1b502b65573ea00125eac62fa301c480402be19c.txt │ ├── 217df12c164da67a24fea94c7d9af6332c616f03.txt │ ├── 3188ded27885d1002698a0e25f0b32306c430e88.txt │ ├── 38a9a14c1c56b113461b0c7350c72de949e23cc2.txt │ ├── 7bb47dd52e68ae3349c0461d494921d6a07f7181.txt │ ├── go_results.txt │ └── rust_results.txt └── data │ ├── Meta-Llama-3-8B-Instruct.json │ ├── Meta-Llama-3-8B-Instruct.model │ ├── all-minilm-l6-v2.json │ ├── bert-base-uncased.json │ ├── cohere-tokenizer.json │ ├── long_text.txt │ └── sentence-transformers-labse.json ├── tokenizer.go ├── tokenizer_test.go └── tokenizers.h /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: {} 7 | merge_group: {} 8 | workflow_dispatch: {} 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | 17 | - name: Mount bazel cache 18 | uses: actions/cache@v4 19 | with: 20 | path: "/home/runner/.cache/bazel" 21 | key: bazel 22 | 23 | - name: Install bazelisk 24 | run: | 25 | curl -LO "https://github.com/bazelbuild/bazelisk/releases/download/v1.18.0/bazelisk-linux-amd64" 26 | mkdir -p "${GITHUB_WORKSPACE}/bin/" 27 | mv bazelisk-linux-amd64 "${GITHUB_WORKSPACE}/bin/bazel" 28 | chmod +x "${GITHUB_WORKSPACE}/bin/bazel" 29 | 30 | - name: Test 31 | run: | 32 | "${GITHUB_WORKSPACE}/bin/bazel" test --test_output=errors //... 33 | 34 | - name: Build 35 | run: | 36 | "${GITHUB_WORKSPACE}/bin/bazel" build //... 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | libtokenizers.a 2 | /artifacts 3 | 4 | # Rust 5 | target 6 | 7 | # Bazel 8 | /bazel-* 9 | 10 | # Python 11 | .env 12 | -------------------------------------------------------------------------------- /BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@crate_index//:defs.bzl", "aliases", "all_crate_deps") 2 | load("@gazelle//:def.bzl", "gazelle", "gazelle_binary") 3 | load("@rules_go//go:def.bzl", "go_library", "go_test") 4 | load("@rules_rust//rust:defs.bzl", "rust_static_library") 5 | 6 | # gazelle:exclude release 7 | # gazelle:prefix github.com/daulet/tokenizers 8 | gazelle( 9 | name = "gazelle", 10 | ) 11 | 12 | gazelle( 13 | name = "gazelle-update-repos", 14 | args = [ 15 | "-from_file=go.mod", 16 | "-to_macro=deps.bzl%go_dependencies", 17 | "-prune", 18 | ], 19 | command = "update-repos", 20 | ) 21 | 22 | rust_static_library( 23 | name = "tokenizers_rs", 24 | srcs = glob([ 25 | "src/**/*.rs", 26 | ]), 27 | version = "1.20.2", 28 | aliases = aliases(), 29 | proc_macro_deps = all_crate_deps( 30 | proc_macro = True, 31 | ), 32 | visibility = ["//visibility:public"], 33 | deps = all_crate_deps(), 34 | ) 35 | 36 | go_test( 37 | name = "tokenizers_test", 38 | srcs = ["tokenizer_test.go"], 39 | data = ["//test:data"], 40 | embedsrcs = ["//test:embeddata"], 41 | deps = [ 42 | ":tokenizers", 43 | "@com_github_stretchr_testify//assert", 44 | "@com_github_stretchr_testify//require", 45 | ], 46 | ) 47 | 48 | go_library( 49 | name = "tokenizers", 50 | srcs = [ 51 | "tokenizer.go", 52 | "tokenizers.h", 53 | ], 54 | cdeps = [ 55 | ":tokenizers_rs", 56 | ], 57 | cgo = True, 58 | importpath = "github.com/daulet/tokenizers", 59 | visibility = ["//visibility:public"], 60 | ) 61 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 2 | # Capture benchmarks 3 | 4 | Run the following command to capture benchmarks for your current commit: 5 | ``` 6 | make build 7 | go test . -bench=. -benchmem -count=6 -timeout 30m | tee benchmarks/$(git rev-parse HEAD).txt 8 | ``` 9 | 10 | Then do the same for the previous commit in upstream/main and then publish the diff along with your PR: 11 | ``` 12 | git checkout . 13 | benchstat benchmarks/$(git rev-parse HEAD^1).txt benchmarks/$(git rev-parse HEAD).txt 14 | ``` 15 | 16 | It should look something like this: 17 | ``` 18 | goos: darwin 19 | goarch: arm64 20 | pkg: github.com/daulet/tokenizers 21 | │ benchmarks/786da4095f5ca3d598db1236c46401b63874f640.txt │ benchmarks/38a9a14c1c56b113461b0c7350c72de949e23cc2.txt │ 22 | │ sec/op │ sec/op vs base │ 23 | EncodeNTimes-10 13.26µ ± 4% 13.11µ ± 1% -1.09% (p=0.041 n=6) 24 | EncodeNChars-10 3.170n ± 530% 2.989n ± 272% ~ (p=0.937 n=6) 25 | DecodeNTimes-10 4.496µ ± 4% 4.535µ ± 2% ~ (p=0.132 n=6) 26 | DecodeNTokens-10 646.8n ± 6% 656.1n ± 3% ~ (p=0.589 n=6) 27 | geomean 591.2n 584.3n -1.17% 28 | 29 | │ benchmarks/786da4095f5ca3d598db1236c46401b63874f640.txt │ benchmarks/38a9a14c1c56b113461b0c7350c72de949e23cc2.txt │ 30 | │ B/op │ B/op vs base │ 31 | EncodeNTimes-10 232.0 ± 0% 232.0 ± 0% ~ (p=1.000 n=6) ¹ 32 | EncodeNChars-10 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=6) ¹ 33 | DecodeNTimes-10 96.00 ± 0% 96.00 ± 0% ~ (p=1.000 n=6) ¹ 34 | DecodeNTokens-10 7.000 ± 0% 7.000 ± 0% ~ (p=1.000 n=6) ¹ 35 | geomean ² +0.00% ² 36 | ¹ all samples are equal 37 | ² summaries must be >0 to compute geomean 38 | 39 | │ benchmarks/786da4095f5ca3d598db1236c46401b63874f640.txt │ benchmarks/38a9a14c1c56b113461b0c7350c72de949e23cc2.txt │ 40 | │ allocs/op │ allocs/op vs base │ 41 | EncodeNTimes-10 12.00 ± 0% 12.00 ± 0% ~ (p=1.000 n=6) ¹ 42 | EncodeNChars-10 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=6) ¹ 43 | DecodeNTimes-10 3.000 ± 0% 3.000 ± 0% ~ (p=1.000 n=6) ¹ 44 | DecodeNTokens-10 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=6) ¹ 45 | geomean ² +0.00% ² 46 | ¹ all samples are equal 47 | ² summaries must be >0 to compute geomean 48 | ``` 49 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 4 4 | 5 | [[package]] 6 | name = "aho-corasick" 7 | version = "1.1.3" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 10 | dependencies = [ 11 | "memchr", 12 | ] 13 | 14 | [[package]] 15 | name = "anes" 16 | version = "0.1.6" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" 19 | 20 | [[package]] 21 | name = "anstyle" 22 | version = "1.0.11" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" 25 | 26 | [[package]] 27 | name = "autocfg" 28 | version = "1.4.0" 29 | source = "registry+https://github.com/rust-lang/crates.io-index" 30 | checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" 31 | 32 | [[package]] 33 | name = "base64" 34 | version = "0.13.1" 35 | source = "registry+https://github.com/rust-lang/crates.io-index" 36 | checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" 37 | 38 | [[package]] 39 | name = "bitflags" 40 | version = "2.9.1" 41 | source = "registry+https://github.com/rust-lang/crates.io-index" 42 | checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" 43 | 44 | [[package]] 45 | name = "bumpalo" 46 | version = "3.18.1" 47 | source = "registry+https://github.com/rust-lang/crates.io-index" 48 | checksum = "793db76d6187cd04dff33004d8e6c9cc4e05cd330500379d2394209271b4aeee" 49 | 50 | [[package]] 51 | name = "cast" 52 | version = "0.3.0" 53 | source = "registry+https://github.com/rust-lang/crates.io-index" 54 | checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" 55 | 56 | [[package]] 57 | name = "cc" 58 | version = "1.2.26" 59 | source = "registry+https://github.com/rust-lang/crates.io-index" 60 | checksum = "956a5e21988b87f372569b66183b78babf23ebc2e744b733e4350a752c4dafac" 61 | dependencies = [ 62 | "shlex", 63 | ] 64 | 65 | [[package]] 66 | name = "cfg-if" 67 | version = "1.0.1" 68 | source = "registry+https://github.com/rust-lang/crates.io-index" 69 | checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" 70 | 71 | [[package]] 72 | name = "ciborium" 73 | version = "0.2.2" 74 | source = "registry+https://github.com/rust-lang/crates.io-index" 75 | checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" 76 | dependencies = [ 77 | "ciborium-io", 78 | "ciborium-ll", 79 | "serde", 80 | ] 81 | 82 | [[package]] 83 | name = "ciborium-io" 84 | version = "0.2.2" 85 | source = "registry+https://github.com/rust-lang/crates.io-index" 86 | checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" 87 | 88 | [[package]] 89 | name = "ciborium-ll" 90 | version = "0.2.2" 91 | source = "registry+https://github.com/rust-lang/crates.io-index" 92 | checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" 93 | dependencies = [ 94 | "ciborium-io", 95 | "half", 96 | ] 97 | 98 | [[package]] 99 | name = "clap" 100 | version = "4.5.40" 101 | source = "registry+https://github.com/rust-lang/crates.io-index" 102 | checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f" 103 | dependencies = [ 104 | "clap_builder", 105 | ] 106 | 107 | [[package]] 108 | name = "clap_builder" 109 | version = "4.5.40" 110 | source = "registry+https://github.com/rust-lang/crates.io-index" 111 | checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e" 112 | dependencies = [ 113 | "anstyle", 114 | "clap_lex", 115 | ] 116 | 117 | [[package]] 118 | name = "clap_lex" 119 | version = "0.7.5" 120 | source = "registry+https://github.com/rust-lang/crates.io-index" 121 | checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" 122 | 123 | [[package]] 124 | name = "console" 125 | version = "0.15.11" 126 | source = "registry+https://github.com/rust-lang/crates.io-index" 127 | checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" 128 | dependencies = [ 129 | "encode_unicode", 130 | "libc", 131 | "once_cell", 132 | "unicode-width", 133 | "windows-sys", 134 | ] 135 | 136 | [[package]] 137 | name = "criterion" 138 | version = "0.5.1" 139 | source = "registry+https://github.com/rust-lang/crates.io-index" 140 | checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" 141 | dependencies = [ 142 | "anes", 143 | "cast", 144 | "ciborium", 145 | "clap", 146 | "criterion-plot", 147 | "is-terminal", 148 | "itertools 0.10.5", 149 | "num-traits", 150 | "once_cell", 151 | "oorandom", 152 | "plotters", 153 | "rayon", 154 | "regex", 155 | "serde", 156 | "serde_derive", 157 | "serde_json", 158 | "tinytemplate", 159 | "walkdir", 160 | ] 161 | 162 | [[package]] 163 | name = "criterion-plot" 164 | version = "0.5.0" 165 | source = "registry+https://github.com/rust-lang/crates.io-index" 166 | checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" 167 | dependencies = [ 168 | "cast", 169 | "itertools 0.10.5", 170 | ] 171 | 172 | [[package]] 173 | name = "crossbeam-deque" 174 | version = "0.8.6" 175 | source = "registry+https://github.com/rust-lang/crates.io-index" 176 | checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" 177 | dependencies = [ 178 | "crossbeam-epoch", 179 | "crossbeam-utils", 180 | ] 181 | 182 | [[package]] 183 | name = "crossbeam-epoch" 184 | version = "0.9.18" 185 | source = "registry+https://github.com/rust-lang/crates.io-index" 186 | checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" 187 | dependencies = [ 188 | "crossbeam-utils", 189 | ] 190 | 191 | [[package]] 192 | name = "crossbeam-utils" 193 | version = "0.8.21" 194 | source = "registry+https://github.com/rust-lang/crates.io-index" 195 | checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" 196 | 197 | [[package]] 198 | name = "crunchy" 199 | version = "0.2.3" 200 | source = "registry+https://github.com/rust-lang/crates.io-index" 201 | checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" 202 | 203 | [[package]] 204 | name = "darling" 205 | version = "0.20.11" 206 | source = "registry+https://github.com/rust-lang/crates.io-index" 207 | checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" 208 | dependencies = [ 209 | "darling_core", 210 | "darling_macro", 211 | ] 212 | 213 | [[package]] 214 | name = "darling_core" 215 | version = "0.20.11" 216 | source = "registry+https://github.com/rust-lang/crates.io-index" 217 | checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" 218 | dependencies = [ 219 | "fnv", 220 | "ident_case", 221 | "proc-macro2", 222 | "quote", 223 | "strsim", 224 | "syn", 225 | ] 226 | 227 | [[package]] 228 | name = "darling_macro" 229 | version = "0.20.11" 230 | source = "registry+https://github.com/rust-lang/crates.io-index" 231 | checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" 232 | dependencies = [ 233 | "darling_core", 234 | "quote", 235 | "syn", 236 | ] 237 | 238 | [[package]] 239 | name = "derive_builder" 240 | version = "0.20.2" 241 | source = "registry+https://github.com/rust-lang/crates.io-index" 242 | checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" 243 | dependencies = [ 244 | "derive_builder_macro", 245 | ] 246 | 247 | [[package]] 248 | name = "derive_builder_core" 249 | version = "0.20.2" 250 | source = "registry+https://github.com/rust-lang/crates.io-index" 251 | checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" 252 | dependencies = [ 253 | "darling", 254 | "proc-macro2", 255 | "quote", 256 | "syn", 257 | ] 258 | 259 | [[package]] 260 | name = "derive_builder_macro" 261 | version = "0.20.2" 262 | source = "registry+https://github.com/rust-lang/crates.io-index" 263 | checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" 264 | dependencies = [ 265 | "derive_builder_core", 266 | "syn", 267 | ] 268 | 269 | [[package]] 270 | name = "either" 271 | version = "1.15.0" 272 | source = "registry+https://github.com/rust-lang/crates.io-index" 273 | checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" 274 | 275 | [[package]] 276 | name = "encode_unicode" 277 | version = "1.0.0" 278 | source = "registry+https://github.com/rust-lang/crates.io-index" 279 | checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" 280 | 281 | [[package]] 282 | name = "esaxx-rs" 283 | version = "0.1.10" 284 | source = "registry+https://github.com/rust-lang/crates.io-index" 285 | checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6" 286 | dependencies = [ 287 | "cc", 288 | ] 289 | 290 | [[package]] 291 | name = "fnv" 292 | version = "1.0.7" 293 | source = "registry+https://github.com/rust-lang/crates.io-index" 294 | checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" 295 | 296 | [[package]] 297 | name = "getrandom" 298 | version = "0.2.16" 299 | source = "registry+https://github.com/rust-lang/crates.io-index" 300 | checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" 301 | dependencies = [ 302 | "cfg-if", 303 | "libc", 304 | "wasi", 305 | ] 306 | 307 | [[package]] 308 | name = "half" 309 | version = "2.6.0" 310 | source = "registry+https://github.com/rust-lang/crates.io-index" 311 | checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" 312 | dependencies = [ 313 | "cfg-if", 314 | "crunchy", 315 | ] 316 | 317 | [[package]] 318 | name = "hermit-abi" 319 | version = "0.5.1" 320 | source = "registry+https://github.com/rust-lang/crates.io-index" 321 | checksum = "f154ce46856750ed433c8649605bf7ed2de3bc35fd9d2a9f30cddd873c80cb08" 322 | 323 | [[package]] 324 | name = "ident_case" 325 | version = "1.0.1" 326 | source = "registry+https://github.com/rust-lang/crates.io-index" 327 | checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" 328 | 329 | [[package]] 330 | name = "indicatif" 331 | version = "0.17.11" 332 | source = "registry+https://github.com/rust-lang/crates.io-index" 333 | checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" 334 | dependencies = [ 335 | "console", 336 | "number_prefix", 337 | "portable-atomic", 338 | "unicode-width", 339 | "web-time", 340 | ] 341 | 342 | [[package]] 343 | name = "is-terminal" 344 | version = "0.4.16" 345 | source = "registry+https://github.com/rust-lang/crates.io-index" 346 | checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" 347 | dependencies = [ 348 | "hermit-abi", 349 | "libc", 350 | "windows-sys", 351 | ] 352 | 353 | [[package]] 354 | name = "itertools" 355 | version = "0.10.5" 356 | source = "registry+https://github.com/rust-lang/crates.io-index" 357 | checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" 358 | dependencies = [ 359 | "either", 360 | ] 361 | 362 | [[package]] 363 | name = "itertools" 364 | version = "0.11.0" 365 | source = "registry+https://github.com/rust-lang/crates.io-index" 366 | checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" 367 | dependencies = [ 368 | "either", 369 | ] 370 | 371 | [[package]] 372 | name = "itertools" 373 | version = "0.12.1" 374 | source = "registry+https://github.com/rust-lang/crates.io-index" 375 | checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" 376 | dependencies = [ 377 | "either", 378 | ] 379 | 380 | [[package]] 381 | name = "itoa" 382 | version = "1.0.15" 383 | source = "registry+https://github.com/rust-lang/crates.io-index" 384 | checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" 385 | 386 | [[package]] 387 | name = "js-sys" 388 | version = "0.3.77" 389 | source = "registry+https://github.com/rust-lang/crates.io-index" 390 | checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" 391 | dependencies = [ 392 | "once_cell", 393 | "wasm-bindgen", 394 | ] 395 | 396 | [[package]] 397 | name = "lazy_static" 398 | version = "1.5.0" 399 | source = "registry+https://github.com/rust-lang/crates.io-index" 400 | checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" 401 | 402 | [[package]] 403 | name = "libc" 404 | version = "0.2.172" 405 | source = "registry+https://github.com/rust-lang/crates.io-index" 406 | checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" 407 | 408 | [[package]] 409 | name = "log" 410 | version = "0.4.27" 411 | source = "registry+https://github.com/rust-lang/crates.io-index" 412 | checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" 413 | 414 | [[package]] 415 | name = "macro_rules_attribute" 416 | version = "0.2.2" 417 | source = "registry+https://github.com/rust-lang/crates.io-index" 418 | checksum = "65049d7923698040cd0b1ddcced9b0eb14dd22c5f86ae59c3740eab64a676520" 419 | dependencies = [ 420 | "macro_rules_attribute-proc_macro", 421 | "paste", 422 | ] 423 | 424 | [[package]] 425 | name = "macro_rules_attribute-proc_macro" 426 | version = "0.2.2" 427 | source = "registry+https://github.com/rust-lang/crates.io-index" 428 | checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30" 429 | 430 | [[package]] 431 | name = "memchr" 432 | version = "2.7.4" 433 | source = "registry+https://github.com/rust-lang/crates.io-index" 434 | checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" 435 | 436 | [[package]] 437 | name = "minimal-lexical" 438 | version = "0.2.1" 439 | source = "registry+https://github.com/rust-lang/crates.io-index" 440 | checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" 441 | 442 | [[package]] 443 | name = "monostate" 444 | version = "0.1.14" 445 | source = "registry+https://github.com/rust-lang/crates.io-index" 446 | checksum = "aafe1be9d0c75642e3e50fedc7ecadf1ef1cbce6eb66462153fc44245343fbee" 447 | dependencies = [ 448 | "monostate-impl", 449 | "serde", 450 | ] 451 | 452 | [[package]] 453 | name = "monostate-impl" 454 | version = "0.1.14" 455 | source = "registry+https://github.com/rust-lang/crates.io-index" 456 | checksum = "c402a4092d5e204f32c9e155431046831fa712637043c58cb73bc6bc6c9663b5" 457 | dependencies = [ 458 | "proc-macro2", 459 | "quote", 460 | "syn", 461 | ] 462 | 463 | [[package]] 464 | name = "nom" 465 | version = "7.1.3" 466 | source = "registry+https://github.com/rust-lang/crates.io-index" 467 | checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" 468 | dependencies = [ 469 | "memchr", 470 | "minimal-lexical", 471 | ] 472 | 473 | [[package]] 474 | name = "num-traits" 475 | version = "0.2.19" 476 | source = "registry+https://github.com/rust-lang/crates.io-index" 477 | checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" 478 | dependencies = [ 479 | "autocfg", 480 | ] 481 | 482 | [[package]] 483 | name = "number_prefix" 484 | version = "0.4.0" 485 | source = "registry+https://github.com/rust-lang/crates.io-index" 486 | checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" 487 | 488 | [[package]] 489 | name = "once_cell" 490 | version = "1.21.3" 491 | source = "registry+https://github.com/rust-lang/crates.io-index" 492 | checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" 493 | 494 | [[package]] 495 | name = "onig" 496 | version = "6.5.1" 497 | source = "registry+https://github.com/rust-lang/crates.io-index" 498 | checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0" 499 | dependencies = [ 500 | "bitflags", 501 | "libc", 502 | "once_cell", 503 | "onig_sys", 504 | ] 505 | 506 | [[package]] 507 | name = "onig_sys" 508 | version = "69.9.1" 509 | source = "registry+https://github.com/rust-lang/crates.io-index" 510 | checksum = "c7f86c6eef3d6df15f23bcfb6af487cbd2fed4e5581d58d5bf1f5f8b7f6727dc" 511 | dependencies = [ 512 | "cc", 513 | "pkg-config", 514 | ] 515 | 516 | [[package]] 517 | name = "oorandom" 518 | version = "11.1.5" 519 | source = "registry+https://github.com/rust-lang/crates.io-index" 520 | checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" 521 | 522 | [[package]] 523 | name = "paste" 524 | version = "1.0.15" 525 | source = "registry+https://github.com/rust-lang/crates.io-index" 526 | checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" 527 | 528 | [[package]] 529 | name = "pkg-config" 530 | version = "0.3.32" 531 | source = "registry+https://github.com/rust-lang/crates.io-index" 532 | checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" 533 | 534 | [[package]] 535 | name = "plotters" 536 | version = "0.3.7" 537 | source = "registry+https://github.com/rust-lang/crates.io-index" 538 | checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" 539 | dependencies = [ 540 | "num-traits", 541 | "plotters-backend", 542 | "plotters-svg", 543 | "wasm-bindgen", 544 | "web-sys", 545 | ] 546 | 547 | [[package]] 548 | name = "plotters-backend" 549 | version = "0.3.7" 550 | source = "registry+https://github.com/rust-lang/crates.io-index" 551 | checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" 552 | 553 | [[package]] 554 | name = "plotters-svg" 555 | version = "0.3.7" 556 | source = "registry+https://github.com/rust-lang/crates.io-index" 557 | checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" 558 | dependencies = [ 559 | "plotters-backend", 560 | ] 561 | 562 | [[package]] 563 | name = "portable-atomic" 564 | version = "1.11.1" 565 | source = "registry+https://github.com/rust-lang/crates.io-index" 566 | checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" 567 | 568 | [[package]] 569 | name = "ppv-lite86" 570 | version = "0.2.21" 571 | source = "registry+https://github.com/rust-lang/crates.io-index" 572 | checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" 573 | dependencies = [ 574 | "zerocopy", 575 | ] 576 | 577 | [[package]] 578 | name = "proc-macro2" 579 | version = "1.0.95" 580 | source = "registry+https://github.com/rust-lang/crates.io-index" 581 | checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" 582 | dependencies = [ 583 | "unicode-ident", 584 | ] 585 | 586 | [[package]] 587 | name = "quote" 588 | version = "1.0.40" 589 | source = "registry+https://github.com/rust-lang/crates.io-index" 590 | checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" 591 | dependencies = [ 592 | "proc-macro2", 593 | ] 594 | 595 | [[package]] 596 | name = "rand" 597 | version = "0.8.5" 598 | source = "registry+https://github.com/rust-lang/crates.io-index" 599 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 600 | dependencies = [ 601 | "libc", 602 | "rand_chacha", 603 | "rand_core", 604 | ] 605 | 606 | [[package]] 607 | name = "rand_chacha" 608 | version = "0.3.1" 609 | source = "registry+https://github.com/rust-lang/crates.io-index" 610 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 611 | dependencies = [ 612 | "ppv-lite86", 613 | "rand_core", 614 | ] 615 | 616 | [[package]] 617 | name = "rand_core" 618 | version = "0.6.4" 619 | source = "registry+https://github.com/rust-lang/crates.io-index" 620 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 621 | dependencies = [ 622 | "getrandom", 623 | ] 624 | 625 | [[package]] 626 | name = "rayon" 627 | version = "1.10.0" 628 | source = "registry+https://github.com/rust-lang/crates.io-index" 629 | checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" 630 | dependencies = [ 631 | "either", 632 | "rayon-core", 633 | ] 634 | 635 | [[package]] 636 | name = "rayon-cond" 637 | version = "0.3.0" 638 | source = "registry+https://github.com/rust-lang/crates.io-index" 639 | checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9" 640 | dependencies = [ 641 | "either", 642 | "itertools 0.11.0", 643 | "rayon", 644 | ] 645 | 646 | [[package]] 647 | name = "rayon-core" 648 | version = "1.12.1" 649 | source = "registry+https://github.com/rust-lang/crates.io-index" 650 | checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" 651 | dependencies = [ 652 | "crossbeam-deque", 653 | "crossbeam-utils", 654 | ] 655 | 656 | [[package]] 657 | name = "regex" 658 | version = "1.11.1" 659 | source = "registry+https://github.com/rust-lang/crates.io-index" 660 | checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" 661 | dependencies = [ 662 | "aho-corasick", 663 | "memchr", 664 | "regex-automata", 665 | "regex-syntax", 666 | ] 667 | 668 | [[package]] 669 | name = "regex-automata" 670 | version = "0.4.9" 671 | source = "registry+https://github.com/rust-lang/crates.io-index" 672 | checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" 673 | dependencies = [ 674 | "aho-corasick", 675 | "memchr", 676 | "regex-syntax", 677 | ] 678 | 679 | [[package]] 680 | name = "regex-syntax" 681 | version = "0.8.5" 682 | source = "registry+https://github.com/rust-lang/crates.io-index" 683 | checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" 684 | 685 | [[package]] 686 | name = "rustversion" 687 | version = "1.0.21" 688 | source = "registry+https://github.com/rust-lang/crates.io-index" 689 | checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" 690 | 691 | [[package]] 692 | name = "ryu" 693 | version = "1.0.20" 694 | source = "registry+https://github.com/rust-lang/crates.io-index" 695 | checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" 696 | 697 | [[package]] 698 | name = "same-file" 699 | version = "1.0.6" 700 | source = "registry+https://github.com/rust-lang/crates.io-index" 701 | checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" 702 | dependencies = [ 703 | "winapi-util", 704 | ] 705 | 706 | [[package]] 707 | name = "serde" 708 | version = "1.0.219" 709 | source = "registry+https://github.com/rust-lang/crates.io-index" 710 | checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" 711 | dependencies = [ 712 | "serde_derive", 713 | ] 714 | 715 | [[package]] 716 | name = "serde_derive" 717 | version = "1.0.219" 718 | source = "registry+https://github.com/rust-lang/crates.io-index" 719 | checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" 720 | dependencies = [ 721 | "proc-macro2", 722 | "quote", 723 | "syn", 724 | ] 725 | 726 | [[package]] 727 | name = "serde_json" 728 | version = "1.0.140" 729 | source = "registry+https://github.com/rust-lang/crates.io-index" 730 | checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" 731 | dependencies = [ 732 | "itoa", 733 | "memchr", 734 | "ryu", 735 | "serde", 736 | ] 737 | 738 | [[package]] 739 | name = "shlex" 740 | version = "1.3.0" 741 | source = "registry+https://github.com/rust-lang/crates.io-index" 742 | checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 743 | 744 | [[package]] 745 | name = "smallvec" 746 | version = "1.15.1" 747 | source = "registry+https://github.com/rust-lang/crates.io-index" 748 | checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" 749 | 750 | [[package]] 751 | name = "spm_precompiled" 752 | version = "0.1.4" 753 | source = "registry+https://github.com/rust-lang/crates.io-index" 754 | checksum = "5851699c4033c63636f7ea4cf7b7c1f1bf06d0cc03cfb42e711de5a5c46cf326" 755 | dependencies = [ 756 | "base64", 757 | "nom", 758 | "serde", 759 | "unicode-segmentation", 760 | ] 761 | 762 | [[package]] 763 | name = "strsim" 764 | version = "0.11.1" 765 | source = "registry+https://github.com/rust-lang/crates.io-index" 766 | checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" 767 | 768 | [[package]] 769 | name = "syn" 770 | version = "2.0.102" 771 | source = "registry+https://github.com/rust-lang/crates.io-index" 772 | checksum = "f6397daf94fa90f058bd0fd88429dd9e5738999cca8d701813c80723add80462" 773 | dependencies = [ 774 | "proc-macro2", 775 | "quote", 776 | "unicode-ident", 777 | ] 778 | 779 | [[package]] 780 | name = "thiserror" 781 | version = "1.0.69" 782 | source = "registry+https://github.com/rust-lang/crates.io-index" 783 | checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" 784 | dependencies = [ 785 | "thiserror-impl", 786 | ] 787 | 788 | [[package]] 789 | name = "thiserror-impl" 790 | version = "1.0.69" 791 | source = "registry+https://github.com/rust-lang/crates.io-index" 792 | checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" 793 | dependencies = [ 794 | "proc-macro2", 795 | "quote", 796 | "syn", 797 | ] 798 | 799 | [[package]] 800 | name = "tinytemplate" 801 | version = "1.2.1" 802 | source = "registry+https://github.com/rust-lang/crates.io-index" 803 | checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" 804 | dependencies = [ 805 | "serde", 806 | "serde_json", 807 | ] 808 | 809 | [[package]] 810 | name = "tokenizers" 811 | version = "0.20.4" 812 | source = "registry+https://github.com/rust-lang/crates.io-index" 813 | checksum = "3b08cc37428a476fc9e20ac850132a513a2e1ce32b6a31addf2b74fa7033b905" 814 | dependencies = [ 815 | "aho-corasick", 816 | "derive_builder", 817 | "esaxx-rs", 818 | "getrandom", 819 | "indicatif", 820 | "itertools 0.12.1", 821 | "lazy_static", 822 | "log", 823 | "macro_rules_attribute", 824 | "monostate", 825 | "onig", 826 | "paste", 827 | "rand", 828 | "rayon", 829 | "rayon-cond", 830 | "regex", 831 | "regex-syntax", 832 | "serde", 833 | "serde_json", 834 | "spm_precompiled", 835 | "thiserror", 836 | "unicode-normalization-alignments", 837 | "unicode-segmentation", 838 | "unicode_categories", 839 | ] 840 | 841 | [[package]] 842 | name = "tokenizers" 843 | version = "2.20.2" 844 | dependencies = [ 845 | "criterion", 846 | "libc", 847 | "rand", 848 | "tokenizers 0.20.4", 849 | ] 850 | 851 | [[package]] 852 | name = "unicode-ident" 853 | version = "1.0.18" 854 | source = "registry+https://github.com/rust-lang/crates.io-index" 855 | checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" 856 | 857 | [[package]] 858 | name = "unicode-normalization-alignments" 859 | version = "0.1.12" 860 | source = "registry+https://github.com/rust-lang/crates.io-index" 861 | checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de" 862 | dependencies = [ 863 | "smallvec", 864 | ] 865 | 866 | [[package]] 867 | name = "unicode-segmentation" 868 | version = "1.12.0" 869 | source = "registry+https://github.com/rust-lang/crates.io-index" 870 | checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" 871 | 872 | [[package]] 873 | name = "unicode-width" 874 | version = "0.2.1" 875 | source = "registry+https://github.com/rust-lang/crates.io-index" 876 | checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" 877 | 878 | [[package]] 879 | name = "unicode_categories" 880 | version = "0.1.1" 881 | source = "registry+https://github.com/rust-lang/crates.io-index" 882 | checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" 883 | 884 | [[package]] 885 | name = "walkdir" 886 | version = "2.5.0" 887 | source = "registry+https://github.com/rust-lang/crates.io-index" 888 | checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" 889 | dependencies = [ 890 | "same-file", 891 | "winapi-util", 892 | ] 893 | 894 | [[package]] 895 | name = "wasi" 896 | version = "0.11.1+wasi-snapshot-preview1" 897 | source = "registry+https://github.com/rust-lang/crates.io-index" 898 | checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" 899 | 900 | [[package]] 901 | name = "wasm-bindgen" 902 | version = "0.2.100" 903 | source = "registry+https://github.com/rust-lang/crates.io-index" 904 | checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" 905 | dependencies = [ 906 | "cfg-if", 907 | "once_cell", 908 | "rustversion", 909 | "wasm-bindgen-macro", 910 | ] 911 | 912 | [[package]] 913 | name = "wasm-bindgen-backend" 914 | version = "0.2.100" 915 | source = "registry+https://github.com/rust-lang/crates.io-index" 916 | checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" 917 | dependencies = [ 918 | "bumpalo", 919 | "log", 920 | "proc-macro2", 921 | "quote", 922 | "syn", 923 | "wasm-bindgen-shared", 924 | ] 925 | 926 | [[package]] 927 | name = "wasm-bindgen-macro" 928 | version = "0.2.100" 929 | source = "registry+https://github.com/rust-lang/crates.io-index" 930 | checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" 931 | dependencies = [ 932 | "quote", 933 | "wasm-bindgen-macro-support", 934 | ] 935 | 936 | [[package]] 937 | name = "wasm-bindgen-macro-support" 938 | version = "0.2.100" 939 | source = "registry+https://github.com/rust-lang/crates.io-index" 940 | checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" 941 | dependencies = [ 942 | "proc-macro2", 943 | "quote", 944 | "syn", 945 | "wasm-bindgen-backend", 946 | "wasm-bindgen-shared", 947 | ] 948 | 949 | [[package]] 950 | name = "wasm-bindgen-shared" 951 | version = "0.2.100" 952 | source = "registry+https://github.com/rust-lang/crates.io-index" 953 | checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" 954 | dependencies = [ 955 | "unicode-ident", 956 | ] 957 | 958 | [[package]] 959 | name = "web-sys" 960 | version = "0.3.77" 961 | source = "registry+https://github.com/rust-lang/crates.io-index" 962 | checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" 963 | dependencies = [ 964 | "js-sys", 965 | "wasm-bindgen", 966 | ] 967 | 968 | [[package]] 969 | name = "web-time" 970 | version = "1.1.0" 971 | source = "registry+https://github.com/rust-lang/crates.io-index" 972 | checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" 973 | dependencies = [ 974 | "js-sys", 975 | "wasm-bindgen", 976 | ] 977 | 978 | [[package]] 979 | name = "winapi-util" 980 | version = "0.1.9" 981 | source = "registry+https://github.com/rust-lang/crates.io-index" 982 | checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" 983 | dependencies = [ 984 | "windows-sys", 985 | ] 986 | 987 | [[package]] 988 | name = "windows-sys" 989 | version = "0.59.0" 990 | source = "registry+https://github.com/rust-lang/crates.io-index" 991 | checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" 992 | dependencies = [ 993 | "windows-targets", 994 | ] 995 | 996 | [[package]] 997 | name = "windows-targets" 998 | version = "0.52.6" 999 | source = "registry+https://github.com/rust-lang/crates.io-index" 1000 | checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 1001 | dependencies = [ 1002 | "windows_aarch64_gnullvm", 1003 | "windows_aarch64_msvc", 1004 | "windows_i686_gnu", 1005 | "windows_i686_gnullvm", 1006 | "windows_i686_msvc", 1007 | "windows_x86_64_gnu", 1008 | "windows_x86_64_gnullvm", 1009 | "windows_x86_64_msvc", 1010 | ] 1011 | 1012 | [[package]] 1013 | name = "windows_aarch64_gnullvm" 1014 | version = "0.52.6" 1015 | source = "registry+https://github.com/rust-lang/crates.io-index" 1016 | checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 1017 | 1018 | [[package]] 1019 | name = "windows_aarch64_msvc" 1020 | version = "0.52.6" 1021 | source = "registry+https://github.com/rust-lang/crates.io-index" 1022 | checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 1023 | 1024 | [[package]] 1025 | name = "windows_i686_gnu" 1026 | version = "0.52.6" 1027 | source = "registry+https://github.com/rust-lang/crates.io-index" 1028 | checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 1029 | 1030 | [[package]] 1031 | name = "windows_i686_gnullvm" 1032 | version = "0.52.6" 1033 | source = "registry+https://github.com/rust-lang/crates.io-index" 1034 | checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 1035 | 1036 | [[package]] 1037 | name = "windows_i686_msvc" 1038 | version = "0.52.6" 1039 | source = "registry+https://github.com/rust-lang/crates.io-index" 1040 | checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 1041 | 1042 | [[package]] 1043 | name = "windows_x86_64_gnu" 1044 | version = "0.52.6" 1045 | source = "registry+https://github.com/rust-lang/crates.io-index" 1046 | checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 1047 | 1048 | [[package]] 1049 | name = "windows_x86_64_gnullvm" 1050 | version = "0.52.6" 1051 | source = "registry+https://github.com/rust-lang/crates.io-index" 1052 | checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 1053 | 1054 | [[package]] 1055 | name = "windows_x86_64_msvc" 1056 | version = "0.52.6" 1057 | source = "registry+https://github.com/rust-lang/crates.io-index" 1058 | checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 1059 | 1060 | [[package]] 1061 | name = "zerocopy" 1062 | version = "0.8.25" 1063 | source = "registry+https://github.com/rust-lang/crates.io-index" 1064 | checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb" 1065 | dependencies = [ 1066 | "zerocopy-derive", 1067 | ] 1068 | 1069 | [[package]] 1070 | name = "zerocopy-derive" 1071 | version = "0.8.25" 1072 | source = "registry+https://github.com/rust-lang/crates.io-index" 1073 | checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" 1074 | dependencies = [ 1075 | "proc-macro2", 1076 | "quote", 1077 | "syn", 1078 | ] 1079 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "tokenizers" 3 | # Bump major version every time we change the behavior of the library. 4 | # Bump minor.patch version every time we bump tokenizers dependency version. 5 | version = "2.20.2" 6 | edition = "2021" 7 | 8 | [lib] 9 | crate-type = ["staticlib"] 10 | 11 | [dependencies] 12 | libc = "0.2.162" 13 | tokenizers = {version = "0.20.2" } 14 | 15 | [dev-dependencies] 16 | criterion = { version = "0.5.1", features = ["html_reports"] } 17 | rand = "0.8.5" 18 | 19 | [[bench]] 20 | name = "decode_benchmark" 21 | harness = false 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Daulet Zhanguzin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MODULE.bazel: -------------------------------------------------------------------------------- 1 | module( 2 | name = "com_github_daulet_tokenizers", 3 | version = "1.20.2", 4 | ) 5 | 6 | bazel_dep(name = "gazelle", version = "0.37.0") 7 | bazel_dep(name = "rules_rust", version = "0.59.2") 8 | bazel_dep(name = "rules_go", version = "0.50.1") 9 | 10 | go_deps = use_extension("@gazelle//:extensions.bzl", "go_deps") 11 | use_repo(go_deps, "com_github_stretchr_testify") 12 | go_deps.from_file(go_mod = "//:go.mod") 13 | 14 | rust = use_extension("@rules_rust//rust:extensions.bzl", "rust") 15 | rust.toolchain(edition = "2021") 16 | 17 | crate = use_extension("@rules_rust//crate_universe:extensions.bzl", "crate") 18 | crate.from_cargo( 19 | name = "crate_index", 20 | cargo_lockfile = "//:Cargo.lock", 21 | manifests = ["//:Cargo.toml"], 22 | ) 23 | use_repo(crate, "crate_index") 24 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | build: 2 | @cargo build --release 3 | @cp target/release/libtokenizers.a . 4 | @go build . 5 | 6 | build-example: 7 | @docker build -f ./example/Dockerfile . -t tokenizers-example 8 | 9 | release-darwin-%: test 10 | cargo build --release --target $*-apple-darwin 11 | mkdir -p artifacts/darwin-$* 12 | cp target/$*-apple-darwin/release/libtokenizers.a artifacts/darwin-$*/libtokenizers.a 13 | cd artifacts/darwin-$* && \ 14 | tar -czf libtokenizers.darwin-$*.tar.gz libtokenizers.a 15 | mkdir -p artifacts/all 16 | cp artifacts/darwin-$*/libtokenizers.darwin-$*.tar.gz artifacts/all/libtokenizers.darwin-$*.tar.gz 17 | 18 | release-linux-%: test 19 | docker buildx build --platform linux/$* --build-arg="DOCKER_TARGETPLATFORM=linux/$*" -f release/Dockerfile . -t tokenizers.linux-$* 20 | mkdir -p artifacts/linux-$* 21 | docker run -v $(PWD)/artifacts/linux-$*:/mnt --entrypoint ls tokenizers.linux-$* /workspace/tokenizers/lib/linux 22 | docker run -v $(PWD)/artifacts/linux-$*:/mnt --entrypoint cp tokenizers.linux-$* /workspace/tokenizers/lib/linux/$*/libtokenizers.a /mnt/libtokenizers.a 23 | cd artifacts/linux-$* && \ 24 | tar -czf libtokenizers.linux-$*.tar.gz libtokenizers.a 25 | mkdir -p artifacts/all 26 | cp artifacts/linux-$*/libtokenizers.linux-$*.tar.gz artifacts/all/libtokenizers.linux-$*.tar.gz 27 | 28 | release: release-darwin-aarch64 release-darwin-x86_64 release-linux-arm64 release-linux-x86_64 29 | cp artifacts/all/libtokenizers.darwin-aarch64.tar.gz artifacts/all/libtokenizers.darwin-arm64.tar.gz 30 | cp artifacts/all/libtokenizers.linux-arm64.tar.gz artifacts/all/libtokenizers.linux-aarch64.tar.gz 31 | cp artifacts/all/libtokenizers.linux-x86_64.tar.gz artifacts/all/libtokenizers.linux-amd64.tar.gz 32 | 33 | test: build 34 | @go test -ldflags="-extldflags '-L./'" -v ./... -count=1 35 | 36 | clean: 37 | rm -rf libtokenizers.a target 38 | 39 | bazel-sync: 40 | CARGO_BAZEL_REPIN=1 bazel sync --only=crate_index 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tokenizers 2 | 3 | Go bindings for the [HuggingFace Tokenizers](https://github.com/huggingface/tokenizers) library. 4 | 5 | ## Installation 6 | 7 | `make build` to build `libtokenizers.a` that you need to run your application that uses bindings. In addition, you need to inform the linker where to find that static library: `go run -ldflags="-extldflags '-L./path/to/libtokenizers/directory'" .` or just add it to the `CGO_LDFLAGS` environment variable: `CGO_LDFLAGS="-L./path/to/libtokenizers/directory"` to avoid specifying it every time. 8 | 9 | ### Using pre-built binaries 10 | 11 | If you don't want to install Rust toolchain, build it in docker: `docker build --platform=linux/amd64 -f release/Dockerfile .` or use prebuilt binaries from the [releases](https://github.com/daulet/tokenizers/releases) page. Prebuilt libraries are available for: 12 | 13 | * [darwin-arm64](https://github.com/daulet/tokenizers/releases/latest/download/libtokenizers.darwin-arm64.tar.gz) 14 | * [linux-arm64](https://github.com/daulet/tokenizers/releases/latest/download/libtokenizers.linux-arm64.tar.gz) 15 | * [linux-amd64](https://github.com/daulet/tokenizers/releases/latest/download/libtokenizers.linux-amd64.tar.gz) 16 | 17 | ## Getting started 18 | 19 | TLDR: [working example](example/main.go). 20 | 21 | Load a tokenizer from a JSON config: 22 | 23 | ```go 24 | import "github.com/daulet/tokenizers" 25 | 26 | tk, err := tokenizers.FromFile("./data/bert-base-uncased.json") 27 | if err != nil { 28 | return err 29 | } 30 | // release native resources 31 | defer tk.Close() 32 | ``` 33 | 34 | Load a tokenizer from Huggingface: 35 | 36 | ```go 37 | import "github.com/daulet/tokenizers" 38 | 39 | tk, err := tokenizers.FromPretrained("google-bert/bert-base-uncased") 40 | if err != nil { 41 | return err 42 | } 43 | // release native resources 44 | defer tk.Close() 45 | ``` 46 | 47 | Encode text and decode tokens: 48 | 49 | ```go 50 | fmt.Println("Vocab size:", tk.VocabSize()) 51 | // Vocab size: 30522 52 | fmt.Println(tk.Encode("brown fox jumps over the lazy dog", false)) 53 | // [2829 4419 14523 2058 1996 13971 3899] [brown fox jumps over the lazy dog] 54 | fmt.Println(tk.Encode("brown fox jumps over the lazy dog", true)) 55 | // [101 2829 4419 14523 2058 1996 13971 3899 102] [[CLS] brown fox jumps over the lazy dog [SEP]] 56 | fmt.Println(tk.Decode([]uint32{2829, 4419, 14523, 2058, 1996, 13971, 3899}, true)) 57 | // brown fox jumps over the lazy dog 58 | ``` 59 | 60 | Encode text with options: 61 | 62 | ```go 63 | var encodeOptions []tokenizers.EncodeOption 64 | encodeOptions = append(encodeOptions, tokenizers.WithReturnTypeIDs()) 65 | encodeOptions = append(encodeOptions, tokenizers.WithReturnAttentionMask()) 66 | encodeOptions = append(encodeOptions, tokenizers.WithReturnTokens()) 67 | encodeOptions = append(encodeOptions, tokenizers.WithReturnOffsets()) 68 | encodeOptions = append(encodeOptions, tokenizers.WithReturnSpecialTokensMask()) 69 | 70 | // Or just basically 71 | // encodeOptions = append(encodeOptions, tokenizers.WithReturnAllAttributes()) 72 | 73 | encodingResponse := tk.EncodeWithOptions("brown fox jumps over the lazy dog", false, encodeOptions...) 74 | fmt.Println(encodingResponse.IDs) 75 | // [2829 4419 14523 2058 1996 13971 3899] 76 | fmt.Println(encodingResponse.TypeIDs) 77 | // [0 0 0 0 0 0 0] 78 | fmt.Println(encodingResponse.SpecialTokensMask) 79 | // [0 0 0 0 0 0 0] 80 | fmt.Println(encodingResponse.AttentionMask) 81 | // [1 1 1 1 1 1 1] 82 | fmt.Println(encodingResponse.Tokens) 83 | // [brown fox jumps over the lazy dog] 84 | fmt.Println(encodingResponse.Offsets) 85 | // [[0 5] [6 9] [10 15] [16 20] [21 24] [25 29] [30 33]] 86 | ``` 87 | 88 | ## Benchmarks 89 | 90 | `go test . -run=^\$ -bench=. -benchmem -count=10 > test/benchmark/$(git rev-parse HEAD).txt` 91 | 92 | Decoding overhead (due to CGO and extra allocations) is between 2% to 9% depending on the benchmark. 93 | 94 | ```bash 95 | go test . -bench=. -benchmem -benchtime=10s 96 | 97 | goos: darwin 98 | goarch: arm64 99 | pkg: github.com/daulet/tokenizers 100 | BenchmarkEncodeNTimes-10 959494 12622 ns/op 232 B/op 12 allocs/op 101 | BenchmarkEncodeNChars-10 1000000000 2.046 ns/op 0 B/op 0 allocs/op 102 | BenchmarkDecodeNTimes-10 2758072 4345 ns/op 96 B/op 3 allocs/op 103 | BenchmarkDecodeNTokens-10 18689725 648.5 ns/op 7 B/op 0 allocs/op 104 | PASS 105 | ok github.com/daulet/tokenizers 126.681s 106 | ``` 107 | 108 | Run equivalent Rust tests with `cargo bench`. 109 | 110 | ```bash 111 | decode_n_times time: [3.9812 µs 3.9874 µs 3.9939 µs] 112 | change: [-0.4103% -0.1338% +0.1275%] (p = 0.33 > 0.05) 113 | No change in performance detected. 114 | Found 7 outliers among 100 measurements (7.00%) 115 | 7 (7.00%) high mild 116 | 117 | decode_n_tokens time: [651.72 ns 661.73 ns 675.78 ns] 118 | change: [+0.3504% +2.0016% +3.5507%] (p = 0.01 < 0.05) 119 | Change within noise threshold. 120 | Found 7 outliers among 100 measurements (7.00%) 121 | 2 (2.00%) high mild 122 | 5 (5.00%) high severe 123 | ``` 124 | 125 | ## Contributing 126 | 127 | Please refer to [CONTRIBUTING.md](CONTRIBUTING.md) for information on how to contribute a PR to this project. 128 | -------------------------------------------------------------------------------- /WORKSPACE: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /benches/decode_benchmark.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 2 | use rand::Rng; 3 | use std::time::Instant; 4 | use tokenizers::tokenizer::Tokenizer; 5 | 6 | fn decode(tokenizer:&Tokenizer, ids_slice: &[u32], skip_special_tokens: bool) -> String { 7 | tokenizer.decode(ids_slice, skip_special_tokens).expect("failed to decode input") 8 | } 9 | 10 | fn bench_decode_n_times(c: &mut Criterion) { 11 | let tokenizer = Tokenizer::from_file("./test/data/bert-base-uncased.json").expect("failed to create tokenizer"); 12 | c.bench_function("decode_n_times", 13 | |b| b.iter(|| 14 | decode(&tokenizer, black_box(&[2829, 4419, 14523, 2058, 1996, 13971, 3899]), black_box(true)) 15 | ) 16 | ); 17 | } 18 | 19 | fn bench_decode_n_tokens(c: &mut Criterion) { 20 | let tokenizer = Tokenizer::from_file("./test/data/bert-base-uncased.json").expect("failed to create tokenizer"); 21 | let max_token_id = tokenizer.get_vocab_size(true); 22 | let mut rng = rand::thread_rng(); 23 | 24 | c.bench_function("decode_n_tokens", 25 | move |b| { b.iter_custom(|iters| { 26 | let tokens: Vec = (0..iters).map(|_| rng.gen_range(0..max_token_id) as u32).collect(); 27 | 28 | let start = Instant::now(); 29 | let result = decode(&tokenizer, black_box(&tokens), black_box(true)); 30 | let duration = start.elapsed(); 31 | // we do this to exclude drop time for the result 32 | _ = result; 33 | duration 34 | })} 35 | ); 36 | } 37 | 38 | criterion_group!(benches, bench_decode_n_times, bench_decode_n_tokens); 39 | criterion_main!(benches); 40 | -------------------------------------------------------------------------------- /benchmark.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import random 3 | import time 4 | 5 | import tiktoken 6 | from tiktoken.load import load_tiktoken_bpe 7 | import tokenizers 8 | 9 | 10 | def bench_tiktoken_llama3(): 11 | model_path = "test/data/Meta-Llama-3-8B-Instruct.model" 12 | num_reserved_special_tokens = 256 13 | pat_str = r"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+" # noqa: E501 14 | mergeable_ranks = load_tiktoken_bpe(model_path) 15 | num_base_tokens = len(mergeable_ranks) 16 | special_tokens = [ 17 | "<|begin_of_text|>", 18 | "<|end_of_text|>", 19 | "<|reserved_special_token_0|>", 20 | "<|reserved_special_token_1|>", 21 | "<|reserved_special_token_2|>", 22 | "<|reserved_special_token_3|>", 23 | "<|start_header_id|>", 24 | "<|end_header_id|>", 25 | "<|reserved_special_token_4|>", 26 | "<|eot_id|>", # end of turn 27 | ] + [ 28 | f"<|reserved_special_token_{i}|>" 29 | for i in range(5, num_reserved_special_tokens - 5) 30 | ] 31 | special_tokens = { 32 | token: num_base_tokens + i for i, token in enumerate(special_tokens) 33 | } 34 | tokenizer = tiktoken.Encoding( 35 | name=Path(model_path).name, 36 | pat_str=pat_str, 37 | mergeable_ranks=mergeable_ranks, 38 | special_tokens=special_tokens, 39 | ) 40 | 41 | def encode(text): 42 | return tokenizer.encode(text) 43 | def decode(tokens): 44 | return tokenizer.decode(tokens) 45 | return encode, decode 46 | 47 | 48 | def bench_tokenizers_llama3(): 49 | tokenizer = tokenizers.Tokenizer.from_file("test/data/Meta-Llama-3-8B-Instruct.json") 50 | 51 | def encode(text): 52 | return tokenizer.encode(text, add_special_tokens=False).ids 53 | def decode(tokens): 54 | return tokenizer.decode(tokens) 55 | return encode, decode 56 | 57 | 58 | def bench_encode(encodeFn, text): 59 | start = time.perf_counter_ns() 60 | res = encodeFn(text) 61 | end = time.perf_counter_ns() 62 | print(f" \t{len(text) / (end - start) * 1e9:.2f} chars / s") 63 | return res 64 | 65 | 66 | def bench_decode(decodeFn, tokens): 67 | start = time.perf_counter_ns() 68 | res = decodeFn(tokens) 69 | end = time.perf_counter_ns() 70 | 71 | print(f" \t{(end - start)/1e3:.2f} microsec") 72 | return res 73 | 74 | 75 | if __name__ == "__main__": 76 | times = 10 77 | text = Path("test/data/long_text.txt").read_text() 78 | # split text into times 79 | texts = [text[i:i + len(text) // times] for i in range(0, len(text), len(text) // times)] 80 | 81 | print("TikToken:") 82 | enc, dec = bench_tiktoken_llama3() 83 | token_groups = [] 84 | for i in range(times): 85 | tokens = bench_encode(enc, texts[i]) 86 | token_groups.append(tokens) 87 | for i in range(1, 4): 88 | token_groups.append([random.randint(0, 1000) for _ in range(i)]) 89 | for tokens in token_groups: 90 | bench_decode(dec, tokens) 91 | 92 | print("Tokenizers:") 93 | enc, dec = bench_tokenizers_llama3() 94 | for i in range(times): 95 | tokens = bench_encode(enc, texts[i]) 96 | assert tokens == token_groups[i] 97 | for tokens in token_groups: 98 | bench_decode(dec, tokens) 99 | -------------------------------------------------------------------------------- /deps.bzl: -------------------------------------------------------------------------------- 1 | load("@bazel_gazelle//:deps.bzl", "go_repository") 2 | 3 | def go_dependencies(): 4 | go_repository( 5 | name = "com_github_davecgh_go_spew", 6 | importpath = "github.com/davecgh/go-spew", 7 | sum = "h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=", 8 | version = "v1.1.1", 9 | ) 10 | go_repository( 11 | name = "com_github_pmezard_go_difflib", 12 | importpath = "github.com/pmezard/go-difflib", 13 | sum = "h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=", 14 | version = "v1.0.0", 15 | ) 16 | go_repository( 17 | name = "com_github_stretchr_objx", 18 | importpath = "github.com/stretchr/objx", 19 | sum = "h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c=", 20 | version = "v0.5.0", 21 | ) 22 | go_repository( 23 | name = "com_github_stretchr_testify", 24 | importpath = "github.com/stretchr/testify", 25 | sum = "h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8=", 26 | version = "v1.8.2", 27 | ) 28 | go_repository( 29 | name = "in_gopkg_check_v1", 30 | importpath = "gopkg.in/check.v1", 31 | sum = "h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=", 32 | version = "v0.0.0-20161208181325-20d25e280405", 33 | ) 34 | go_repository( 35 | name = "in_gopkg_yaml_v3", 36 | importpath = "gopkg.in/yaml.v3", 37 | sum = "h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=", 38 | version = "v3.0.1", 39 | ) 40 | -------------------------------------------------------------------------------- /example/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_go//go:def.bzl", "go_binary", "go_library") 2 | 3 | go_library( 4 | name = "example_lib", 5 | srcs = ["main.go"], 6 | importpath = "github.com/daulet/tokenizers/example", 7 | visibility = ["//visibility:private"], 8 | deps = ["//:tokenizers"], 9 | ) 10 | 11 | go_binary( 12 | name = "example", 13 | data = ["//test:data"], 14 | embed = [":example_lib"], 15 | visibility = ["//visibility:public"], 16 | ) 17 | -------------------------------------------------------------------------------- /example/Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax=docker/dockerfile:1.3 2 | 3 | FROM golang:1.21 as builder-go 4 | ARG TARGETPLATFORM 5 | ARG VERSION=v0.6.0 6 | WORKDIR /workspace 7 | RUN curl -fsSL https://github.com/daulet/tokenizers/releases/download/${VERSION}/libtokenizers.$(echo ${TARGETPLATFORM} | tr / -).tar.gz | tar xvz 8 | COPY ./example . 9 | COPY ./test/data ./test/data 10 | RUN go mod download 11 | RUN mv ./libtokenizers.a /go/pkg/mod/github.com/daulet/tokenizers@${VERSION}/lib/$(echo ${TARGETPLATFORM} | tr / -)/libtokenizers.a 12 | # mounting Go cache won't work since we mutate it above 13 | RUN go run main.go 14 | -------------------------------------------------------------------------------- /example/README.md: -------------------------------------------------------------------------------- 1 | # Example 2 | 3 | To run the example you need to obtain the built rust library. 4 | ``` 5 | # On M1+ Mac (note arch) 6 | curl -fsSL https://github.com/daulet/tokenizers/releases/download/v1.20.2/libtokenizers.darwin-aarch64.tar.gz | tar xvz 7 | 8 | # change -L argument to where you've placed the library download above 9 | go run -ldflags="-extldflags '-L$(pwd)'" main.go 10 | ``` 11 | or `make build` from the parent directory and `go run -ldflags="-extldflags '-L..'" main.go` -------------------------------------------------------------------------------- /example/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/daulet/tokenizers/example 2 | 3 | go 1.22 4 | 5 | require github.com/daulet/tokenizers v1.20.2 6 | -------------------------------------------------------------------------------- /example/go.sum: -------------------------------------------------------------------------------- 1 | github.com/daulet/tokenizers v1.20.2 h1:tlq/vIOiBTKDPets3596aFvmJYLn3XI6LFKq4q9LKhQ= 2 | github.com/daulet/tokenizers v1.20.2/go.mod h1:tGnMdZthXdcWY6DGD07IygpwJqiPvG85FQUnhs/wSCs= 3 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 4 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 5 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 6 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 7 | github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= 8 | github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 9 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 10 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 11 | -------------------------------------------------------------------------------- /example/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | 7 | "github.com/daulet/tokenizers" 8 | ) 9 | 10 | func simple() error { 11 | tk, err := tokenizers.FromFile("../test/data/bert-base-uncased.json") 12 | if err != nil { 13 | return err 14 | } 15 | // release native resources 16 | defer tk.Close() 17 | 18 | fmt.Println("Vocab size:", tk.VocabSize()) 19 | // Vocab size: 30522 20 | fmt.Println(tk.Encode("brown fox jumps over the lazy dog", false)) 21 | // [2829 4419 14523 2058 1996 13971 3899] [brown fox jumps over the lazy dog] 22 | fmt.Println(tk.Encode("brown fox jumps over the lazy dog", true)) 23 | // [101 2829 4419 14523 2058 1996 13971 3899 102] [[CLS] brown fox jumps over the lazy dog [SEP]] 24 | fmt.Println(tk.Decode([]uint32{2829, 4419, 14523, 2058, 1996, 13971, 3899}, true)) 25 | // brown fox jumps over the lazy dog 26 | return nil 27 | } 28 | 29 | func advanced() error { 30 | // Load tokenizer from local config file 31 | tk, err := tokenizers.FromFile("../test/data/bert-base-uncased.json") 32 | if err != nil { 33 | return err 34 | } 35 | defer tk.Close() 36 | 37 | // Load pretrained tokenizer from HuggingFace 38 | tkFromHf, err := tokenizers.FromPretrained("google-bert/bert-base-uncased", tokenizers.WithCacheDir("./.cache/tokenizers")) 39 | if err != nil { 40 | return err 41 | } 42 | defer tkFromHf.Close() 43 | 44 | // Encode with specific options 45 | encodeOptions := []tokenizers.EncodeOption{ 46 | tokenizers.WithReturnTypeIDs(), 47 | tokenizers.WithReturnAttentionMask(), 48 | tokenizers.WithReturnTokens(), 49 | tokenizers.WithReturnOffsets(), 50 | tokenizers.WithReturnSpecialTokensMask(), 51 | } 52 | // Or simply: 53 | // encodeOptions = append(encodeOptions, tokenizers.WithReturnAllAttributes()) 54 | 55 | // regardless of how the tokenizer was initialized, the output is the same 56 | for _, tkzr := range []*tokenizers.Tokenizer{tk, tkFromHf} { 57 | encodingResponse := tkzr.EncodeWithOptions("brown fox jumps over the lazy dog", true, encodeOptions...) 58 | fmt.Println(encodingResponse.IDs) 59 | // [101 2829 4419 14523 2058 1996 13971 3899 102] 60 | fmt.Println(encodingResponse.TypeIDs) 61 | // [0 0 0 0 0 0 0 0 0] 62 | fmt.Println(encodingResponse.SpecialTokensMask) 63 | // [1 0 0 0 0 0 0 0 1] 64 | fmt.Println(encodingResponse.AttentionMask) 65 | // [1 1 1 1 1 1 1 1 1] 66 | fmt.Println(encodingResponse.Tokens) 67 | // [[CLS] brown fox jumps over the lazy dog [SEP]] 68 | fmt.Println(encodingResponse.Offsets) 69 | // [[0 0] [0 5] [6 9] [10 15] [16 20] [21 24] [25 29] [30 33] [0 0]] 70 | } 71 | return nil 72 | } 73 | 74 | func main() { 75 | if err := simple(); err != nil { 76 | log.Fatal(err) 77 | } 78 | if err := advanced(); err != nil { 79 | log.Fatal(err) 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/daulet/tokenizers 2 | 3 | go 1.18 4 | 5 | require github.com/stretchr/testify v1.8.2 6 | 7 | require ( 8 | github.com/davecgh/go-spew v1.1.1 // indirect 9 | github.com/pmezard/go-difflib v1.0.0 // indirect 10 | gopkg.in/yaml.v3 v3.0.1 // indirect 11 | ) 12 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 2 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 3 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 4 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 5 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 6 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 7 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 8 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 9 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 10 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 11 | github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= 12 | github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 13 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 14 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 15 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 16 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 17 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 18 | -------------------------------------------------------------------------------- /release/Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax=docker/dockerfile:1.3 2 | 3 | FROM rust:1.87 as builder-rust 4 | ARG TARGETPLATFORM 5 | WORKDIR /workspace 6 | COPY ./benches ./benches 7 | COPY ./src ./src 8 | COPY ./Cargo.toml ./Cargo.toml 9 | COPY ./Cargo.lock ./Cargo.lock 10 | RUN cargo build --release 11 | 12 | FROM golang:1.21 as builder-go 13 | ARG DOCKER_TARGETPLATFORM 14 | WORKDIR /workspace 15 | COPY ./release/go.mod . 16 | COPY ./release/main.go . 17 | # can't rely on tokenizers module because latest release doesn't include recent changes 18 | COPY tokenizer.go ./tokenizers/ 19 | COPY tokenizers.h ./tokenizers/ 20 | COPY --from=builder-rust \ 21 | /workspace/target/release/libtokenizers.a \ 22 | ./tokenizers/lib/${DOCKER_TARGETPLATFORM}/ 23 | COPY ./test/data ./test/data 24 | RUN go run -ldflags="-extldflags '-L./tokenizers/lib/${DOCKER_TARGETPLATFORM}'" . 25 | -------------------------------------------------------------------------------- /release/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/daulet/tokenizers/release 2 | 3 | go 1.21.5 4 | -------------------------------------------------------------------------------- /release/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/daulet/tokenizers/release/tokenizers" 7 | ) 8 | 9 | func main() { 10 | tk, err := tokenizers.FromFile("./test/data/bert-base-uncased.json") 11 | if err != nil { 12 | panic(err) 13 | } 14 | // release native resources 15 | defer tk.Close() 16 | fmt.Println("Vocab size:", tk.VocabSize()) 17 | // Vocab size: 30522 18 | fmt.Println(tk.Encode("brown fox jumps over the lazy dog", false)) 19 | // [2829 4419 14523 2058 1996 13971 3899] [brown fox jumps over the lazy dog] 20 | fmt.Println(tk.Encode("brown fox jumps over the lazy dog", true)) 21 | // [101 2829 4419 14523 2058 1996 13971 3899 102] [[CLS] brown fox jumps over the lazy dog [SEP]] 22 | fmt.Println(tk.Decode([]uint32{2829, 4419, 14523, 2058, 1996, 13971, 3899}, true)) 23 | // brown fox jumps over the lazy dog 24 | } 25 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::ffi::CStr; 2 | use std::path::PathBuf; 3 | use std::ptr; 4 | use tokenizers::tokenizer::Tokenizer; 5 | 6 | const CARGO_PKG_VERSION: &str = env!("CARGO_PKG_VERSION"); 7 | 8 | #[repr(C)] 9 | pub struct tokenizers_options { 10 | encode_special_tokens: bool, 11 | } 12 | 13 | #[repr(C)] 14 | pub struct tokenizers_buffer { 15 | ids: *mut u32, 16 | type_ids: *mut u32, 17 | special_tokens_mask: *mut u32, 18 | attention_mask: *mut u32, 19 | tokens: *mut *mut libc::c_char, 20 | offsets: *mut usize, 21 | len: usize, 22 | } 23 | 24 | #[no_mangle] 25 | pub extern "C" fn tokenizers_version() -> *const libc::c_char { 26 | std::ffi::CString::new(CARGO_PKG_VERSION).unwrap().into_raw() 27 | } 28 | 29 | #[no_mangle] 30 | pub extern "C" fn tokenizers_from_bytes(bytes: *const u8, len: u32, opts: &tokenizers_options) -> *mut Tokenizer { 31 | let bytes_slice = unsafe { std::slice::from_raw_parts(bytes, len as usize) }; 32 | let mut tokenizer = Tokenizer::from_bytes(bytes_slice).expect("failed to create tokenizer"); 33 | tokenizer.set_encode_special_tokens(opts.encode_special_tokens); 34 | Box::into_raw(Box::new(tokenizer)) 35 | } 36 | 37 | // TODO merge with from_bytes and pass truncation params as an argument to TokenizerOptions 38 | #[no_mangle] 39 | pub extern "C" fn tokenizers_from_bytes_with_truncation(bytes: *const u8, len: u32, max_len: usize, dir: u8) -> *mut Tokenizer { 40 | let bytes_slice = unsafe { std::slice::from_raw_parts(bytes, len as usize) }; 41 | let tokenizer: Tokenizer = Tokenizer::from_bytes(bytes_slice) 42 | .expect("failed to create tokenizer") 43 | .with_truncation(Some(tokenizers::tokenizer::TruncationParams{ 44 | max_length: max_len, 45 | direction: match dir { 46 | 0 => tokenizers::tokenizer::TruncationDirection::Left, 47 | 1 => tokenizers::tokenizer::TruncationDirection::Right, 48 | _ => panic!("invalid truncation direction"), 49 | }, 50 | ..Default::default() 51 | })).unwrap().to_owned().into(); 52 | Box::into_raw(Box::new(tokenizer)) 53 | } 54 | 55 | #[no_mangle] 56 | pub extern "C" fn tokenizers_from_file(config: *const libc::c_char) -> *mut libc::c_void { 57 | let config_cstr = unsafe { CStr::from_ptr(config) }; 58 | let config = config_cstr.to_str().unwrap(); 59 | let config = PathBuf::from(config); 60 | match Tokenizer::from_file(config) { 61 | Ok(tokenizer) => { 62 | let ptr = Box::into_raw(Box::new(tokenizer)); 63 | ptr.cast() 64 | } 65 | Err(_) => { 66 | ptr::null_mut() 67 | } 68 | } 69 | } 70 | 71 | #[repr(C)] 72 | pub struct tokenizers_encode_options { 73 | add_special_tokens: bool, 74 | 75 | return_type_ids: bool, 76 | return_tokens: bool, 77 | return_special_tokens_mask: bool, 78 | return_attention_mask: bool, 79 | return_offsets: bool, 80 | } 81 | 82 | #[no_mangle] 83 | pub extern "C" fn tokenizers_encode(ptr: *mut libc::c_void, message: *const libc::c_char, options: &tokenizers_encode_options) -> tokenizers_buffer { 84 | let tokenizer: &Tokenizer; 85 | unsafe { 86 | tokenizer = ptr.cast::().as_ref().expect("failed to cast tokenizer"); 87 | } 88 | let message_cstr = unsafe { CStr::from_ptr(message) }; 89 | let message = message_cstr.to_str(); 90 | if message.is_err() { 91 | return tokenizers_buffer { ids: ptr::null_mut(), tokens: ptr::null_mut(), len: 0, type_ids: ptr::null_mut(), special_tokens_mask: ptr::null_mut(), attention_mask: ptr::null_mut() , offsets: ptr::null_mut()}; 92 | } 93 | 94 | let encoding = tokenizer.encode(message.unwrap(), options.add_special_tokens).expect("failed to encode input"); 95 | let mut vec_ids = encoding.get_ids().to_vec(); 96 | vec_ids.shrink_to_fit(); 97 | let ids = vec_ids.as_mut_ptr(); 98 | let len = vec_ids.len(); 99 | std::mem::forget(vec_ids); 100 | 101 | let mut type_ids: *mut u32 = ptr::null_mut(); 102 | if options.return_type_ids { 103 | let mut vec_type_ids = encoding.get_type_ids().to_vec(); 104 | vec_type_ids.shrink_to_fit(); 105 | type_ids = vec_type_ids.as_mut_ptr(); 106 | std::mem::forget(vec_type_ids); 107 | } 108 | 109 | let mut tokens: *mut *mut libc::c_char = ptr::null_mut(); 110 | if options.return_tokens { 111 | let mut vec_tokens = encoding.get_tokens() 112 | .to_vec().into_iter() 113 | .map(|s| std::ffi::CString::new(s).unwrap().into_raw()) 114 | .collect::>(); 115 | vec_tokens.shrink_to_fit(); 116 | tokens = vec_tokens.as_mut_ptr(); 117 | std::mem::forget(vec_tokens); 118 | } 119 | 120 | let mut special_tokens_mask: *mut u32 = ptr::null_mut(); 121 | if options.return_special_tokens_mask { 122 | let mut vec_special_tokens_mask = encoding.get_special_tokens_mask().to_vec(); 123 | vec_special_tokens_mask.shrink_to_fit(); 124 | special_tokens_mask = vec_special_tokens_mask.as_mut_ptr(); 125 | std::mem::forget(vec_special_tokens_mask); 126 | } 127 | 128 | let mut attention_mask: *mut u32 = ptr::null_mut(); 129 | if options.return_attention_mask { 130 | let mut vec_attention_mask = encoding.get_attention_mask().to_vec(); 131 | vec_attention_mask.shrink_to_fit(); 132 | attention_mask = vec_attention_mask.as_mut_ptr(); 133 | std::mem::forget(vec_attention_mask); 134 | } 135 | 136 | let mut offsets: *mut usize = ptr::null_mut(); 137 | if options.return_offsets { 138 | let vec_offsets_tuples = encoding.get_offsets().to_vec(); 139 | let mut vec_offsets = Vec::with_capacity(vec_offsets_tuples.len() * 2); 140 | for i in vec_offsets_tuples { 141 | vec_offsets.push(i.0); 142 | vec_offsets.push(i.1); 143 | } 144 | vec_offsets.shrink_to_fit(); 145 | offsets = vec_offsets.as_mut_ptr(); 146 | std::mem::forget(vec_offsets); 147 | } 148 | 149 | tokenizers_buffer { ids, type_ids, special_tokens_mask, attention_mask, tokens, offsets, len } 150 | } 151 | 152 | #[no_mangle] 153 | pub extern "C" fn tokenizers_decode(ptr: *mut libc::c_void, ids: *const u32, len: u32, skip_special_tokens: bool) -> *mut libc::c_char { 154 | let tokenizer: &Tokenizer; 155 | unsafe { 156 | tokenizer = ptr.cast::().as_ref().expect("failed to cast tokenizer"); 157 | } 158 | let ids_slice = unsafe { std::slice::from_raw_parts(ids, len as usize) }; 159 | 160 | let string = tokenizer.decode(ids_slice, skip_special_tokens).expect("failed to decode input"); 161 | match std::ffi::CString::new(string) { 162 | Ok(c_string) => c_string.into_raw(), 163 | Err(_) => ptr::null_mut(), 164 | } 165 | } 166 | 167 | #[no_mangle] 168 | pub extern "C" fn tokenizers_vocab_size(ptr: *mut libc::c_void) -> u32 { 169 | let tokenizer: &Tokenizer; 170 | unsafe { 171 | tokenizer = ptr.cast::().as_ref().expect("failed to cast tokenizer"); 172 | } 173 | tokenizer.get_vocab_size(true) as u32 174 | } 175 | 176 | #[no_mangle] 177 | pub extern "C" fn tokenizers_free_tokenizer(ptr: *mut ::libc::c_void) { 178 | if ptr.is_null() { 179 | return; 180 | } 181 | unsafe { 182 | drop(Box::from_raw(ptr.cast::())); 183 | } 184 | } 185 | 186 | #[no_mangle] 187 | pub extern "C" fn tokenizers_free_buffer(buf: tokenizers_buffer) { 188 | if !buf.ids.is_null() { 189 | unsafe { 190 | Vec::from_raw_parts(buf.ids, buf.len, buf.len); 191 | } 192 | } 193 | if !buf.type_ids.is_null() { 194 | unsafe { 195 | Vec::from_raw_parts(buf.type_ids, buf.len, buf.len); 196 | } 197 | } 198 | if !buf.special_tokens_mask.is_null() { 199 | unsafe { 200 | Vec::from_raw_parts(buf.special_tokens_mask, buf.len, buf.len); 201 | } 202 | } 203 | if !buf.attention_mask.is_null() { 204 | unsafe { 205 | Vec::from_raw_parts(buf.attention_mask, buf.len, buf.len); 206 | } 207 | } 208 | if !buf.offsets.is_null() { 209 | unsafe { 210 | Vec::from_raw_parts(buf.offsets, buf.len*2, buf.len*2); 211 | } 212 | } 213 | if !buf.tokens.is_null() { 214 | unsafe { 215 | let strings = Vec::from_raw_parts(buf.tokens, buf.len, buf.len); 216 | for s in strings { 217 | drop(std::ffi::CString::from_raw(s.cast::())); 218 | } 219 | } 220 | } 221 | } 222 | 223 | #[no_mangle] 224 | pub extern "C" fn tokenizers_free_string(ptr: *mut libc::c_char) { 225 | if ptr.is_null() { 226 | return; 227 | } 228 | unsafe { 229 | drop(std::ffi::CString::from_raw(ptr)); 230 | } 231 | } 232 | -------------------------------------------------------------------------------- /test/BUILD.bazel: -------------------------------------------------------------------------------- 1 | filegroup( 2 | name = "data", 3 | srcs = glob(["data/**"]), 4 | visibility = ["//visibility:public"], 5 | ) 6 | 7 | filegroup( 8 | name = "embeddata", 9 | srcs = [ 10 | "data/sentence-transformers-labse.json", 11 | ], 12 | visibility = ["//visibility:public"], 13 | ) 14 | -------------------------------------------------------------------------------- /test/benchmark/1b502b65573ea00125eac62fa301c480402be19c.txt: -------------------------------------------------------------------------------- 1 | goos: darwin 2 | goarch: arm64 3 | pkg: github.com/daulet/tokenizers 4 | BenchmarkEncodeNTimes-10 95174 12667 ns/op 232 B/op 12 allocs/op 5 | BenchmarkEncodeNTimes-10 94437 12580 ns/op 232 B/op 12 allocs/op 6 | BenchmarkEncodeNTimes-10 93362 12583 ns/op 232 B/op 12 allocs/op 7 | BenchmarkEncodeNTimes-10 94240 13372 ns/op 232 B/op 12 allocs/op 8 | BenchmarkEncodeNTimes-10 92844 12868 ns/op 232 B/op 12 allocs/op 9 | BenchmarkEncodeNTimes-10 92984 12766 ns/op 232 B/op 12 allocs/op 10 | BenchmarkEncodeNTimes-10 92055 12654 ns/op 232 B/op 12 allocs/op 11 | BenchmarkEncodeNTimes-10 91874 13204 ns/op 232 B/op 12 allocs/op 12 | BenchmarkEncodeNTimes-10 93130 12686 ns/op 232 B/op 12 allocs/op 13 | BenchmarkEncodeNTimes-10 93288 12528 ns/op 232 B/op 12 allocs/op 14 | BenchmarkEncodeNChars-10 1000000000 2.374 ns/op 0 B/op 0 allocs/op 15 | BenchmarkEncodeNChars-10 1000000000 2.651 ns/op 0 B/op 0 allocs/op 16 | BenchmarkEncodeNChars-10 1000000000 1.993 ns/op 0 B/op 0 allocs/op 17 | BenchmarkEncodeNChars-10 1000000000 2.169 ns/op 0 B/op 0 allocs/op 18 | BenchmarkEncodeNChars-10 1000000000 2.282 ns/op 0 B/op 0 allocs/op 19 | BenchmarkEncodeNChars-10 1000000000 2.348 ns/op 0 B/op 0 allocs/op 20 | BenchmarkEncodeNChars-10 1000000000 2.028 ns/op 0 B/op 0 allocs/op 21 | BenchmarkEncodeNChars-10 1000000000 2.013 ns/op 0 B/op 0 allocs/op 22 | BenchmarkEncodeNChars-10 1000000000 2.200 ns/op 0 B/op 0 allocs/op 23 | BenchmarkEncodeNChars-10 1000000000 1.957 ns/op 0 B/op 0 allocs/op 24 | BenchmarkDecodeNTimes-10 250281 4474 ns/op 96 B/op 3 allocs/op 25 | BenchmarkDecodeNTimes-10 268866 4501 ns/op 96 B/op 3 allocs/op 26 | BenchmarkDecodeNTimes-10 260468 4422 ns/op 96 B/op 3 allocs/op 27 | BenchmarkDecodeNTimes-10 264583 4455 ns/op 96 B/op 3 allocs/op 28 | BenchmarkDecodeNTimes-10 262168 4552 ns/op 96 B/op 3 allocs/op 29 | BenchmarkDecodeNTimes-10 262182 4455 ns/op 96 B/op 3 allocs/op 30 | BenchmarkDecodeNTimes-10 262510 4511 ns/op 96 B/op 3 allocs/op 31 | BenchmarkDecodeNTimes-10 263491 4524 ns/op 96 B/op 3 allocs/op 32 | BenchmarkDecodeNTimes-10 265724 4396 ns/op 96 B/op 3 allocs/op 33 | BenchmarkDecodeNTimes-10 259940 4430 ns/op 96 B/op 3 allocs/op 34 | BenchmarkDecodeNTokens-10 1804423 678.7 ns/op 7 B/op 0 allocs/op 35 | BenchmarkDecodeNTokens-10 1827415 654.8 ns/op 7 B/op 0 allocs/op 36 | BenchmarkDecodeNTokens-10 1850868 648.1 ns/op 7 B/op 0 allocs/op 37 | BenchmarkDecodeNTokens-10 1838286 650.1 ns/op 7 B/op 0 allocs/op 38 | BenchmarkDecodeNTokens-10 1853236 655.6 ns/op 7 B/op 0 allocs/op 39 | BenchmarkDecodeNTokens-10 1835120 657.1 ns/op 7 B/op 0 allocs/op 40 | BenchmarkDecodeNTokens-10 1838400 652.3 ns/op 7 B/op 0 allocs/op 41 | BenchmarkDecodeNTokens-10 1847911 659.2 ns/op 7 B/op 0 allocs/op 42 | BenchmarkDecodeNTokens-10 1808113 654.2 ns/op 7 B/op 0 allocs/op 43 | BenchmarkDecodeNTokens-10 1820958 666.3 ns/op 7 B/op 0 allocs/op 44 | PASS 45 | ok github.com/daulet/tokenizers 245.425s 46 | -------------------------------------------------------------------------------- /test/benchmark/217df12c164da67a24fea94c7d9af6332c616f03.txt: -------------------------------------------------------------------------------- 1 | goos: darwin 2 | goarch: arm64 3 | pkg: github.com/daulet/tokenizers 4 | BenchmarkEncodeNTimes-10 914437 13017 ns/op 232 B/op 12 allocs/op 5 | BenchmarkEncodeNTimes-10 919786 13044 ns/op 232 B/op 12 allocs/op 6 | BenchmarkEncodeNTimes-10 916773 13034 ns/op 232 B/op 12 allocs/op 7 | BenchmarkEncodeNTimes-10 922550 12955 ns/op 232 B/op 12 allocs/op 8 | BenchmarkEncodeNTimes-10 911588 12968 ns/op 232 B/op 12 allocs/op 9 | BenchmarkEncodeNTimes-10 927381 12986 ns/op 232 B/op 12 allocs/op 10 | BenchmarkEncodeNChars-10 1000000000 1.877 ns/op 0 B/op 0 allocs/op 11 | BenchmarkEncodeNChars-10 1000000000 2.132 ns/op 0 B/op 0 allocs/op 12 | BenchmarkEncodeNChars-10 1000000000 0.6197 ns/op 0 B/op 0 allocs/op 13 | BenchmarkEncodeNChars-10 1000000000 1.650 ns/op 0 B/op 0 allocs/op 14 | BenchmarkEncodeNChars-10 1000000000 1.641 ns/op 0 B/op 0 allocs/op 15 | BenchmarkEncodeNChars-10 1000000000 1.964 ns/op 0 B/op 0 allocs/op 16 | BenchmarkDecodeNTimes-10 2745178 4374 ns/op 96 B/op 3 allocs/op 17 | BenchmarkDecodeNTimes-10 2766117 4350 ns/op 96 B/op 3 allocs/op 18 | BenchmarkDecodeNTimes-10 2764905 4315 ns/op 96 B/op 3 allocs/op 19 | BenchmarkDecodeNTimes-10 2778410 4308 ns/op 96 B/op 3 allocs/op 20 | BenchmarkDecodeNTimes-10 2787747 4314 ns/op 96 B/op 3 allocs/op 21 | BenchmarkDecodeNTimes-10 2785400 4310 ns/op 96 B/op 3 allocs/op 22 | BenchmarkDecodeNTokens-10 18110155 639.5 ns/op 7 B/op 0 allocs/op 23 | BenchmarkDecodeNTokens-10 18771621 637.7 ns/op 7 B/op 0 allocs/op 24 | BenchmarkDecodeNTokens-10 18790963 628.7 ns/op 7 B/op 0 allocs/op 25 | BenchmarkDecodeNTokens-10 18848498 629.8 ns/op 7 B/op 0 allocs/op 26 | BenchmarkDecodeNTokens-10 18726458 630.1 ns/op 7 B/op 0 allocs/op 27 | BenchmarkDecodeNTokens-10 18785268 628.6 ns/op 7 B/op 0 allocs/op 28 | PASS 29 | ok github.com/daulet/tokenizers 347.975s 30 | -------------------------------------------------------------------------------- /test/benchmark/3188ded27885d1002698a0e25f0b32306c430e88.txt: -------------------------------------------------------------------------------- 1 | goos: darwin 2 | goarch: arm64 3 | pkg: github.com/daulet/tokenizers 4 | BenchmarkEncodeNTimes-10 101848 12317 ns/op 84 B/op 4 allocs/op 5 | BenchmarkEncodeNTimes-10 97996 11903 ns/op 84 B/op 4 allocs/op 6 | BenchmarkEncodeNTimes-10 98641 11991 ns/op 84 B/op 4 allocs/op 7 | BenchmarkEncodeNTimes-10 98586 12234 ns/op 84 B/op 4 allocs/op 8 | BenchmarkEncodeNTimes-10 99187 11781 ns/op 84 B/op 4 allocs/op 9 | BenchmarkEncodeNTimes-10 98481 11984 ns/op 84 B/op 4 allocs/op 10 | BenchmarkEncodeNChars-10 1000000000 2.443 ns/op 0 B/op 0 allocs/op 11 | BenchmarkEncodeNChars-10 1000000000 2.579 ns/op 0 B/op 0 allocs/op 12 | BenchmarkEncodeNChars-10 1000000000 2.723 ns/op 0 B/op 0 allocs/op 13 | BenchmarkEncodeNChars-10 1000000000 2.531 ns/op 0 B/op 0 allocs/op 14 | BenchmarkEncodeNChars-10 1000000000 2.787 ns/op 0 B/op 0 allocs/op 15 | BenchmarkEncodeNChars-10 1000000000 2.590 ns/op 0 B/op 0 allocs/op 16 | BenchmarkDecodeNTimes-10 690247 1707 ns/op 96 B/op 3 allocs/op 17 | BenchmarkDecodeNTimes-10 685672 1696 ns/op 96 B/op 3 allocs/op 18 | BenchmarkDecodeNTimes-10 679148 1705 ns/op 96 B/op 3 allocs/op 19 | BenchmarkDecodeNTimes-10 653304 1745 ns/op 96 B/op 3 allocs/op 20 | BenchmarkDecodeNTimes-10 669532 1687 ns/op 96 B/op 3 allocs/op 21 | BenchmarkDecodeNTimes-10 675759 1696 ns/op 96 B/op 3 allocs/op 22 | BenchmarkDecodeNTokens-10 5312313 213.2 ns/op 7 B/op 0 allocs/op 23 | BenchmarkDecodeNTokens-10 5563538 186.9 ns/op 7 B/op 0 allocs/op 24 | BenchmarkDecodeNTokens-10 6347782 195.1 ns/op 7 B/op 0 allocs/op 25 | BenchmarkDecodeNTokens-10 6054649 199.6 ns/op 7 B/op 0 allocs/op 26 | BenchmarkDecodeNTokens-10 6216045 184.5 ns/op 7 B/op 0 allocs/op 27 | BenchmarkDecodeNTokens-10 5972562 192.2 ns/op 7 B/op 0 allocs/op 28 | PASS 29 | ok github.com/daulet/tokenizers 431.946s -------------------------------------------------------------------------------- /test/benchmark/38a9a14c1c56b113461b0c7350c72de949e23cc2.txt: -------------------------------------------------------------------------------- 1 | goos: darwin 2 | goarch: arm64 3 | pkg: github.com/daulet/tokenizers 4 | BenchmarkEncodeNTimes-10 89750 13168 ns/op 232 B/op 12 allocs/op 5 | BenchmarkEncodeNTimes-10 89104 13092 ns/op 232 B/op 12 allocs/op 6 | BenchmarkEncodeNTimes-10 91214 13135 ns/op 232 B/op 12 allocs/op 7 | BenchmarkEncodeNTimes-10 91635 13164 ns/op 232 B/op 12 allocs/op 8 | BenchmarkEncodeNTimes-10 91681 13034 ns/op 232 B/op 12 allocs/op 9 | BenchmarkEncodeNTimes-10 91050 13085 ns/op 232 B/op 12 allocs/op 10 | BenchmarkEncodeNChars-10 1000000000 3.680 ns/op 0 B/op 0 allocs/op 11 | BenchmarkEncodeNChars-10 1000000000 2.547 ns/op 0 B/op 0 allocs/op 12 | BenchmarkEncodeNChars-10 1000000000 11.13 ns/op 0 B/op 0 allocs/op 13 | BenchmarkEncodeNChars-10 1000000000 2.496 ns/op 0 B/op 0 allocs/op 14 | BenchmarkEncodeNChars-10 841242856 3.430 ns/op 0 B/op 0 allocs/op 15 | BenchmarkEncodeNChars-10 1000000000 2.540 ns/op 0 B/op 0 allocs/op 16 | BenchmarkDecodeNTimes-10 245875 4610 ns/op 96 B/op 3 allocs/op 17 | BenchmarkDecodeNTimes-10 261669 4544 ns/op 96 B/op 3 allocs/op 18 | BenchmarkDecodeNTimes-10 260374 4525 ns/op 96 B/op 3 allocs/op 19 | BenchmarkDecodeNTimes-10 260748 4514 ns/op 96 B/op 3 allocs/op 20 | BenchmarkDecodeNTimes-10 256246 4492 ns/op 96 B/op 3 allocs/op 21 | BenchmarkDecodeNTimes-10 258206 4560 ns/op 96 B/op 3 allocs/op 22 | BenchmarkDecodeNTokens-10 1756308 674.8 ns/op 7 B/op 0 allocs/op 23 | BenchmarkDecodeNTokens-10 1847517 644.9 ns/op 7 B/op 0 allocs/op 24 | BenchmarkDecodeNTokens-10 1813251 657.5 ns/op 7 B/op 0 allocs/op 25 | BenchmarkDecodeNTokens-10 1849479 649.5 ns/op 7 B/op 0 allocs/op 26 | BenchmarkDecodeNTokens-10 1847059 654.6 ns/op 7 B/op 0 allocs/op 27 | BenchmarkDecodeNTokens-10 1726924 661.2 ns/op 7 B/op 0 allocs/op 28 | PASS 29 | ok github.com/daulet/tokenizers 163.493s 30 | -------------------------------------------------------------------------------- /test/benchmark/7bb47dd52e68ae3349c0461d494921d6a07f7181.txt: -------------------------------------------------------------------------------- 1 | goos: darwin 2 | goarch: arm64 3 | pkg: github.com/daulet/tokenizers 4 | BenchmarkEncodeNTimes-10 91389 12616 ns/op 232 B/op 12 allocs/op 5 | BenchmarkEncodeNTimes-10 94416 12608 ns/op 232 B/op 12 allocs/op 6 | BenchmarkEncodeNTimes-10 95833 12702 ns/op 232 B/op 12 allocs/op 7 | BenchmarkEncodeNTimes-10 93657 12692 ns/op 232 B/op 12 allocs/op 8 | BenchmarkEncodeNTimes-10 95575 12565 ns/op 232 B/op 12 allocs/op 9 | BenchmarkEncodeNTimes-10 95866 12700 ns/op 232 B/op 12 allocs/op 10 | BenchmarkEncodeNTimes-10 95568 12502 ns/op 232 B/op 12 allocs/op 11 | BenchmarkEncodeNTimes-10 95286 12625 ns/op 232 B/op 12 allocs/op 12 | BenchmarkEncodeNTimes-10 95224 12739 ns/op 232 B/op 12 allocs/op 13 | BenchmarkEncodeNTimes-10 93948 12949 ns/op 232 B/op 12 allocs/op 14 | BenchmarkEncodeNChars-10 1000000000 2.254 ns/op 0 B/op 0 allocs/op 15 | BenchmarkEncodeNChars-10 1000000000 3.099 ns/op 0 B/op 0 allocs/op 16 | BenchmarkEncodeNChars-10 1000000000 2.273 ns/op 0 B/op 0 allocs/op 17 | BenchmarkEncodeNChars-10 1000000000 2.722 ns/op 0 B/op 0 allocs/op 18 | BenchmarkEncodeNChars-10 1000000000 1.965 ns/op 0 B/op 0 allocs/op 19 | BenchmarkEncodeNChars-10 1000000000 2.024 ns/op 0 B/op 0 allocs/op 20 | BenchmarkEncodeNChars-10 1000000000 1.997 ns/op 0 B/op 0 allocs/op 21 | BenchmarkEncodeNChars-10 1000000000 2.320 ns/op 0 B/op 0 allocs/op 22 | BenchmarkEncodeNChars-10 1000000000 1.866 ns/op 0 B/op 0 allocs/op 23 | BenchmarkEncodeNChars-10 1000000000 4.136 ns/op 0 B/op 0 allocs/op 24 | BenchmarkDecodeNTimes-10 239275 4575 ns/op 96 B/op 3 allocs/op 25 | BenchmarkDecodeNTimes-10 243561 4515 ns/op 96 B/op 3 allocs/op 26 | BenchmarkDecodeNTimes-10 258657 4480 ns/op 96 B/op 3 allocs/op 27 | BenchmarkDecodeNTimes-10 262723 4597 ns/op 96 B/op 3 allocs/op 28 | BenchmarkDecodeNTimes-10 263178 4466 ns/op 96 B/op 3 allocs/op 29 | BenchmarkDecodeNTimes-10 266382 4442 ns/op 96 B/op 3 allocs/op 30 | BenchmarkDecodeNTimes-10 266616 4498 ns/op 96 B/op 3 allocs/op 31 | BenchmarkDecodeNTimes-10 266132 4544 ns/op 96 B/op 3 allocs/op 32 | BenchmarkDecodeNTimes-10 266750 4780 ns/op 96 B/op 3 allocs/op 33 | BenchmarkDecodeNTimes-10 266880 4454 ns/op 96 B/op 3 allocs/op 34 | BenchmarkDecodeNTokens-10 1808430 655.3 ns/op 7 B/op 0 allocs/op 35 | BenchmarkDecodeNTokens-10 1832203 649.4 ns/op 7 B/op 0 allocs/op 36 | BenchmarkDecodeNTokens-10 1851890 648.7 ns/op 7 B/op 0 allocs/op 37 | BenchmarkDecodeNTokens-10 1836775 649.1 ns/op 7 B/op 0 allocs/op 38 | BenchmarkDecodeNTokens-10 1839984 650.7 ns/op 7 B/op 0 allocs/op 39 | BenchmarkDecodeNTokens-10 1854864 643.8 ns/op 7 B/op 0 allocs/op 40 | BenchmarkDecodeNTokens-10 1854836 647.9 ns/op 7 B/op 0 allocs/op 41 | BenchmarkDecodeNTokens-10 1866586 643.4 ns/op 7 B/op 0 allocs/op 42 | BenchmarkDecodeNTokens-10 1794544 666.8 ns/op 7 B/op 0 allocs/op 43 | BenchmarkDecodeNTokens-10 1768803 666.9 ns/op 7 B/op 0 allocs/op 44 | PASS 45 | ok github.com/daulet/tokenizers 226.796s 46 | -------------------------------------------------------------------------------- /test/benchmark/go_results.txt: -------------------------------------------------------------------------------- 1 | Run with `go test -bench=Decode -count=10 -run=^\$ > test/benchmark/go_results.txt` then `benchstat test/benchmark/go_results.txt` 2 | 3 | goos: darwin 4 | goarch: arm64 5 | pkg: github.com/daulet/tokenizers 6 | BenchmarkDecodeNTimes-10 239250 4343 ns/op 7 | BenchmarkDecodeNTimes-10 271682 4356 ns/op 8 | BenchmarkDecodeNTimes-10 274546 4346 ns/op 9 | BenchmarkDecodeNTimes-10 271051 4368 ns/op 10 | BenchmarkDecodeNTimes-10 272458 4372 ns/op 11 | BenchmarkDecodeNTimes-10 271284 4350 ns/op 12 | BenchmarkDecodeNTimes-10 272586 4350 ns/op 13 | BenchmarkDecodeNTimes-10 271552 4358 ns/op 14 | BenchmarkDecodeNTimes-10 268934 4349 ns/op 15 | BenchmarkDecodeNTimes-10 273238 4364 ns/op 16 | BenchmarkDecodeNTokens-10 1840972 657.1 ns/op 17 | BenchmarkDecodeNTokens-10 1817886 636.0 ns/op 18 | BenchmarkDecodeNTokens-10 1884613 641.0 ns/op 19 | BenchmarkDecodeNTokens-10 1823654 637.8 ns/op 20 | BenchmarkDecodeNTokens-10 1883685 646.7 ns/op 21 | BenchmarkDecodeNTokens-10 1852138 642.2 ns/op 22 | BenchmarkDecodeNTokens-10 1852321 643.3 ns/op 23 | BenchmarkDecodeNTokens-10 1850312 649.7 ns/op 24 | BenchmarkDecodeNTokens-10 1838618 640.8 ns/op 25 | BenchmarkDecodeNTokens-10 1881645 642.7 ns/op 26 | PASS 27 | ok github.com/daulet/tokenizers 31.929s 28 | 29 | goos: darwin 30 | goarch: arm64 31 | pkg: github.com/daulet/tokenizers 32 | │ test/benchmark/go_results.txt │ 33 | │ sec/op │ 34 | DecodeNTimes-10 4.353µ ± 0% 35 | DecodeNTokens-10 642.5n ± 1% 36 | geomean 1.672µ 37 | -------------------------------------------------------------------------------- /test/benchmark/rust_results.txt: -------------------------------------------------------------------------------- 1 | run with `cargo bench` 2 | 3 | decode_n_times time: [3.9349 µs 3.9588 µs 3.9846 µs] 4 | change: [-1.6315% -1.2605% -0.8858%] (p = 0.00 < 0.05) 5 | Change within noise threshold. 6 | Found 10 outliers among 100 measurements (10.00%) 7 | 6 (6.00%) high mild 8 | 4 (4.00%) high severe 9 | 10 | decode_n_tokens time: [628.40 ns 630.04 ns 632.03 ns] 11 | change: [-3.6926% -2.1495% -0.5598%] (p = 0.01 < 0.05) 12 | Change within noise threshold. 13 | Found 4 outliers among 100 measurements (4.00%) 14 | 2 (2.00%) high mild 15 | 2 (2.00%) high severe -------------------------------------------------------------------------------- /test/data/long_text.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce sed mauris lobortis, egestas velit ac, congue sapien. Vivamus elementum scelerisque augue, sed volutpat sapien vehicula non. Fusce scelerisque, ex ac sagittis tempor, sem nunc efficitur diam, in lobortis erat augue et nisi. Praesent at metus ante. Pellentesque rutrum diam ac ex gravida scelerisque. Quisque sodales nisi et dignissim volutpat. Praesent iaculis ultrices libero eget semper. Aliquam viverra tortor vel enim ultricies consectetur. Duis vestibulum a tellus vitae malesuada. Fusce luctus orci sit amet metus faucibus semper. Quisque eu purus ut sapien sagittis ultricies dictum quis massa. 4 | 5 | Suspendisse eget sapien vel nulla ornare commodo vitae a urna. Nulla placerat diam eget magna aliquam, vel congue sem luctus. Donec feugiat mi at tincidunt consectetur. Morbi pulvinar rhoncus quam in semper. Vivamus vitae eleifend tellus, a pulvinar tortor. Vivamus felis lorem, semper nec erat vel, faucibus cursus elit. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Integer id ex non mauris efficitur viverra. Suspendisse consequat velit est, sit amet tincidunt enim feugiat ut. Nullam nec tincidunt lorem. Sed ultrices ante enim, eget suscipit quam fringilla eget. Pellentesque ac tellus vitae nisl cursus commodo nec at nibh. Curabitur mauris augue, laoreet sed hendrerit at, ultricies nec ipsum. Etiam auctor augue at neque mattis lacinia. Etiam mollis pharetra malesuada. Suspendisse placerat sem eget purus vehicula luctus. 6 | 7 | Etiam consectetur hendrerit eros, sit amet facilisis metus. In tristique dignissim dui vel egestas. Maecenas consectetur risus eget eleifend congue. Curabitur a urna tincidunt, pharetra nunc ut, fringilla erat. In volutpat dictum ultrices. Nam scelerisque nibh libero, eget vestibulum ex suscipit et. Curabitur sagittis, tortor a rutrum posuere, neque enim gravida mi, eu dignissim sem ante sit amet lacus. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Morbi faucibus id velit a volutpat. Morbi auctor mollis tellus a convallis. Mauris eu nisl vel libero laoreet sagittis. Suspendisse vitae porttitor sapien. Phasellus volutpat diam vel interdum blandit. Donec quis tortor nec est condimentum viverra non id ante. 8 | 9 | Nam tortor mauris, lacinia nec lectus at, vestibulum facilisis turpis. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Fusce vestibulum sodales bibendum. Morbi tincidunt odio pellentesque, luctus turpis a, condimentum orci. Integer ornare eleifend libero id interdum. Pellentesque malesuada elit sed euismod facilisis. Mauris vestibulum non dolor a accumsan. Nulla vestibulum, massa venenatis fringilla volutpat, mi ex tempus eros, sed dapibus quam neque sed mi. Fusce posuere auctor sapien, vel sagittis orci porta id. Praesent tempor, nunc ullamcorper aliquam pellentesque, dolor erat pharetra mi, non rutrum nibh augue sed ante. Cras consectetur ligula id velit mattis, eget aliquam nibh tincidunt. Nullam ac elementum est. 10 | 11 | In ut interdum purus. Phasellus ornare quis dui sed porta. Vestibulum sed pharetra tortor, ac sagittis leo. Integer lobortis dignissim tellus sed rhoncus. Phasellus facilisis, ligula nec venenatis hendrerit, dui quam congue lacus, tempor aliquet neque orci in leo. Donec sit amet maximus urna, sed feugiat ex. Duis egestas ipsum vel dolor faucibus mollis. Phasellus euismod velit et tellus congue, at tristique nulla efficitur. Ut vestibulum nec mi in lacinia. Aliquam vulputate scelerisque ante sit amet semper. Sed consequat, urna vitae placerat maximus, odio eros pellentesque velit, et euismod diam felis et diam. Duis scelerisque cursus ante ut tincidunt. Integer a faucibus nunc. 12 | 13 | Sed mi libero, semper vel purus in, sodales pretium metus. Donec eleifend mauris mi, facilisis faucibus mi consequat ut. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Nunc ac quam facilisis, posuere massa sit amet, placerat turpis. Morbi tristique consectetur nunc at ultrices. Integer eget placerat massa. Nunc sed dolor id risus maximus pharetra quis at metus. 14 | 15 | Cras porta vitae dui a cursus. Aenean id ex cursus, mollis dolor vel, elementum dolor. Quisque efficitur facilisis maximus. Praesent convallis viverra turpis, ac convallis lorem. Duis auctor luctus erat, vitae feugiat arcu aliquam et. In rutrum est eget nibh gravida rhoncus. Fusce congue justo quam, sed fringilla erat sollicitudin ac. Suspendisse ut urna vitae lacus sagittis euismod in eu magna. Duis eros risus, varius viverra odio at, tristique consectetur turpis. 16 | 17 | Aenean ac viverra lorem. Donec eget nulla purus. Quisque faucibus mi non magna sollicitudin, a placerat velit blandit. Mauris luctus, lorem sit amet accumsan finibus, dolor nulla lacinia orci, non tempus mauris quam a nunc. Fusce eget neque at libero facilisis blandit in ac enim. Suspendisse ultricies, nulla et posuere elementum, nibh nisi dignissim lorem, eu volutpat mauris massa sit amet metus. Cras libero libero, placerat sed urna sed, fermentum sodales ex. Aenean non velit sit amet est convallis scelerisque eu quis nibh. Cras euismod enim et odio aliquam, ac ullamcorper metus tempor. Etiam a nibh nec massa dictum auctor ut id est. Curabitur nec neque a dui viverra tincidunt vitae ac nulla. Nam mollis odio in accumsan viverra. Fusce sit amet vestibulum risus, vel tristique quam. Aenean rutrum auctor eleifend. Mauris eu ante quis ligula euismod tincidunt. 18 | 19 | Duis ut imperdiet arcu, ac convallis orci. Integer quis massa magna. Integer vestibulum augue nec tellus dictum, ac sollicitudin nulla tincidunt. Curabitur nisl ex, convallis dapibus laoreet et, convallis ac mi. Integer at nulla venenatis, volutpat erat eu, dapibus est. Donec quis sollicitudin urna. Pellentesque venenatis sodales sollicitudin. Phasellus eu ultrices sem. Aliquam sagittis turpis nisl, eu rhoncus augue placerat id. Suspendisse imperdiet vel ex vitae dapibus. Quisque ac leo at augue vestibulum iaculis. Suspendisse cursus sodales augue ac suscipit. 20 | 21 | In eget elit vel erat cursus vulputate. Nulla gravida turpis ac risus ornare, eu finibus nulla molestie. Sed faucibus eros non volutpat iaculis. Mauris id risus libero. Phasellus dictum, lectus ut fringilla molestie, odio purus tempor sem, in malesuada felis enim non mauris. Praesent ullamcorper tempus quam, et molestie sapien condimentum ut. Donec eu quam ex. Etiam eget dolor nec justo sollicitudin aliquet. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Suspendisse dignissim vulputate nunc, sed molestie ante facilisis ut. Etiam consequat enim quis lorem pulvinar mattis. Interdum et malesuada fames ac ante ipsum primis in faucibus. Phasellus id metus feugiat, bibendum urna a, consectetur turpis. 22 | 23 | Quisque eget nibh imperdiet, rutrum velit ac, vestibulum felis. Etiam id ante ac ante convallis porta non quis tellus. Phasellus at elementum felis. Praesent volutpat massa eu ullamcorper pulvinar. Donec nec lorem elit. Morbi semper quis ipsum at luctus. Curabitur imperdiet nisi nibh, in egestas lectus convallis ut. Sed et velit vel urna consequat laoreet. 24 | 25 | Suspendisse potenti. Sed consectetur dictum sem, eget varius sapien posuere et. Donec finibus, turpis quis malesuada cursus, mauris est aliquet massa, in gravida massa arcu et ipsum. Integer nec elementum magna. Nam pellentesque ornare velit vitae volutpat. Suspendisse ultricies magna enim, vel volutpat ex pretium nec. Suspendisse a velit odio. Nullam cursus rhoncus eros, nec semper ipsum faucibus ac. Donec malesuada cursus massa, quis commodo tellus laoreet id. Aliquam suscipit nisi quam, eu suscipit dolor sollicitudin tincidunt. Donec convallis orci at mi suscipit gravida. 26 | 27 | Sed a aliquam nunc, euismod semper mi. Interdum et malesuada fames ac ante ipsum primis in faucibus. Etiam metus eros, venenatis nec felis eu, malesuada dictum neque. Suspendisse ultrices nec urna vel tincidunt. Quisque pulvinar vitae urna vel mollis. Nunc imperdiet turpis a tortor vulputate, in imperdiet tortor euismod. Phasellus sed ligula ligula. Maecenas est sapien, dapibus tristique lectus ut, ullamcorper aliquam turpis. Aenean quis consequat sem. 28 | 29 | Interdum et malesuada fames ac ante ipsum primis in faucibus. Etiam semper metus a est maximus, at malesuada quam maximus. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Morbi aliquet enim vel ipsum semper, id condimentum metus congue. Maecenas lacus erat, eleifend eget nibh a, vestibulum vestibulum purus. Aliquam imperdiet sollicitudin est, quis scelerisque diam sagittis in. Vivamus mollis risus quis pulvinar dictum. 30 | 31 | Suspendisse dignissim libero vel dolor commodo imperdiet. Morbi venenatis felis eu mauris placerat, eget placerat metus mattis. Etiam leo felis, rhoncus imperdiet vehicula et, laoreet sit amet lectus. Curabitur aliquet ac massa sed ullamcorper. Nullam quis lacus auctor, dictum mi non, feugiat est. Fusce sed nisi vel nisl interdum sollicitudin. Nulla vel hendrerit lorem. Praesent vestibulum ex sed enim rhoncus ullamcorper. Nullam eleifend felis ut volutpat aliquam. Vestibulum bibendum leo hendrerit laoreet mollis. Phasellus lacinia sagittis urna sit amet tempor. Donec et enim sagittis, sollicitudin orci pretium, ornare velit. 32 | 33 | Nunc quis suscipit leo, vel molestie augue. Proin accumsan imperdiet cursus. Proin luctus mollis odio, et tristique arcu facilisis hendrerit. Morbi interdum tincidunt odio dapibus rutrum. Morbi nec nisl ultricies, tincidunt urna et, suscipit nibh. Etiam luctus ante non nisi cursus, id blandit ex suscipit. Suspendisse feugiat, massa non eleifend convallis, ligula metus interdum elit, a consectetur odio nisi ornare felis. Nam in lorem in velit interdum fermentum ut id justo. Integer ut ipsum tempor, hendrerit elit ut, faucibus tellus. Quisque accumsan metus eu efficitur ullamcorper. 34 | 35 | Duis semper semper ex, nec accumsan augue accumsan sit amet. Suspendisse in luctus nunc. Integer at orci faucibus, facilisis elit quis, tristique purus. Nullam vitae placerat justo. Suspendisse a tellus vel orci placerat mollis. Vivamus nec libero non libero auctor eleifend quis vel dui. Nulla elit magna, porttitor et scelerisque quis, tincidunt a dui. Aliquam porta felis malesuada bibendum pharetra. Sed eget urna nibh. Cras imperdiet luctus risus fringilla efficitur. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Ut quis vulputate diam, vitae pellentesque mauris. Sed euismod convallis sapien, sit amet ultricies ipsum dictum vel. Pellentesque pretium ultrices consequat. Sed nec ante faucibus, tincidunt nunc eu, facilisis est. 36 | 37 | Sed consequat risus eget nulla ultricies, convallis semper enim ullamcorper. Nam ultrices velit eget augue fringilla ultricies. Etiam congue molestie ante, aliquam bibendum mi accumsan eget. Donec pulvinar justo nec odio malesuada imperdiet. Sed nunc erat, sagittis in velit sit amet, blandit convallis enim. Sed et ipsum vitae turpis viverra vehicula. Nunc lobortis sapien sit amet odio pretium, at consectetur risus congue. Vivamus cursus euismod tempus. Morbi est orci, fermentum et tellus a, scelerisque condimentum purus. Lorem ipsum dolor sit amet, consectetur adipiscing elit. 38 | 39 | Nam condimentum odio purus. Curabitur ut urna imperdiet, efficitur augue eu, ornare turpis. Nulla ullamcorper vulputate nisi vitae tincidunt. Morbi placerat arcu vel arcu varius, ac eleifend nibh cursus. Integer vitae mollis justo. Mauris hendrerit consequat nibh, vel viverra ante porta vel. Phasellus fringilla eu magna vehicula elementum. Duis vulputate mi id ante elementum, eget rutrum mi congue. Aliquam ornare et augue at iaculis. Proin ultricies hendrerit purus, a tincidunt est porttitor lobortis. Nunc faucibus libero lobortis vulputate convallis. Proin euismod erat fringilla consectetur maximus. Vivamus vel ex est. Duis dolor urna, pretium ac elementum vitae, vehicula quis dui. Nulla suscipit auctor arcu ac interdum. 40 | 41 | Donec id fermentum turpis, eget viverra eros. Cras purus sapien, blandit a augue in, ultricies finibus ante. Etiam ornare nulla porttitor bibendum dictum. Phasellus efficitur erat sed erat mattis, at faucibus urna efficitur. Proin sit amet massa in lectus sodales vulputate. Integer ac eros diam. Duis vitae imperdiet mauris. Vestibulum bibendum massa eros, facilisis tincidunt libero pellentesque vel. Nunc sit amet sodales erat, maximus fermentum augue. Aenean pharetra sit amet metus ac elementum. Integer porta orci sit amet urna tincidunt, vel consectetur erat malesuada. Fusce porta tincidunt neque eget blandit. Quisque quis euismod magna. Etiam a pretium diam. Aliquam condimentum a dolor sit amet lacinia. 42 | 43 | Suspendisse sollicitudin sapien non leo sagittis iaculis. In eget erat laoreet, ultrices elit eu, maximus metus. Maecenas luctus, diam eu ornare ornare, tellus elit congue urna, tincidunt mollis nibh sapien in magna. Phasellus ut dignissim lorem, eu ultricies neque. Nullam vitae odio non tellus commodo eleifend facilisis eu justo. Quisque vitae malesuada enim, eget ornare lorem. In hac habitasse platea dictumst. 44 | 45 | Integer rutrum, tortor sit amet volutpat faucibus, libero dui porta purus, ut luctus nisl magna a dui. Donec fermentum elementum massa vel tincidunt. Aliquam tincidunt, est sed porttitor vulputate, velit erat faucibus nisi, in porttitor arcu tortor vel ipsum. Donec et augue sem. In efficitur lacinia egestas. Donec sodales leo vitae leo auctor, eget tristique nibh consectetur. Aliquam tincidunt pretium consequat. Ut pretium, leo id rhoncus sollicitudin, lectus nisl pulvinar elit, eu laoreet ipsum orci vitae dui. Curabitur sit amet pharetra diam. 46 | 47 | Pellentesque maximus accumsan convallis. Nulla at vestibulum justo. Quisque sed ex nec purus commodo ornare. Sed metus nisl, pharetra ut efficitur eget, dapibus a felis. Etiam quis augue sodales, hendrerit velit vitae, rhoncus orci. Ut sollicitudin luctus mauris, ac congue massa vestibulum non. Sed dignissim accumsan quam, vitae maximus lectus ullamcorper nec. Duis quis enim quis eros consequat consectetur ac vel risus. Fusce et convallis justo. Nulla dignissim nulla quis libero dictum mattis. Proin porttitor finibus urna et semper. 48 | 49 | Vestibulum luctus orci sem, in sagittis arcu rutrum eu. Aliquam egestas velit quis tempus laoreet. Ut elementum risus sed purus iaculis, ut consequat magna facilisis. Etiam tincidunt aliquam augue, eu molestie neque suscipit id. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Pellentesque vitae accumsan ligula. Cras porta pretium odio, vitae fringilla odio vehicula a. Donec finibus iaculis urna eu pulvinar. Nulla sed diam nunc. Donec a eros eget tellus gravida condimentum a vel nulla. Fusce vel suscipit est. Nunc tristique diam sit amet euismod congue. Vivamus ornare laoreet neque, in fringilla nisl efficitur vel. Nullam vehicula finibus cursus. Vestibulum sit amet elementum dui. 50 | 51 | Sed non lacus non elit vestibulum posuere et id sapien. Aliquam vestibulum, enim a egestas interdum, mauris nibh mollis tortor, et malesuada metus nulla vehicula leo. Sed non sem tortor. Ut nec nulla varius, porta purus ut, aliquet nisl. Aliquam felis nunc, tristique gravida justo ut, gravida ultricies sem. Nullam dictum, orci sed tincidunt consectetur, dui ipsum posuere dolor, non varius nibh nisi non mauris. Sed a consectetur sapien, quis gravida erat. 52 | 53 | Fusce sed neque mi. Donec urna orci, venenatis in augue a, mollis condimentum risus. Suspendisse ac sagittis est, et pretium velit. Sed nisl tellus, commodo nec faucibus non, scelerisque quis enim. In sollicitudin diam nibh, et blandit magna hendrerit a. Duis congue ac elit quis viverra. Etiam ut blandit quam. Aliquam lobortis felis vel arcu condimentum vulputate. Proin sed sapien consequat, egestas orci id, ullamcorper enim. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Pellentesque suscipit, lacus sollicitudin lacinia tempor, arcu sem dictum ipsum, eget feugiat tellus justo id urna. Mauris nec bibendum lacus. Morbi ut aliquam lorem. 54 | 55 | Ut auctor commodo libero in venenatis. Aenean mattis pretium molestie. Etiam quis bibendum mi. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec sodales laoreet arcu, non condimentum tortor condimentum a. Integer pulvinar mi eget porttitor rhoncus. Sed laoreet sem rutrum metus interdum vulputate et eget nisl. 56 | 57 | Donec augue nisi, ullamcorper at ullamcorper sit amet, pretium in ante. Mauris cursus commodo erat nec pharetra. Nulla eu dui sapien. Interdum et malesuada fames ac ante ipsum primis in faucibus. Duis nisi risus, vehicula faucibus nunc quis, lacinia consequat diam. Integer pharetra, ante quis pulvinar iaculis, nisi elit imperdiet lacus, sit amet tristique libero risus eu quam. Ut feugiat sapien diam, id lacinia tortor porttitor et. Praesent vestibulum congue dolor vitae euismod. Ut ligula odio, viverra nec pharetra ut, bibendum sed felis. Nullam porta gravida ligula eget pulvinar. Nullam pharetra feugiat enim interdum dictum. Maecenas mollis elit non dolor vehicula ultrices quis et odio. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Duis vel dolor tempus, volutpat sem vel, commodo sem. Praesent semper lectus nec purus faucibus, ut posuere dui vehicula. Donec feugiat turpis sit amet tortor varius, at consectetur urna ornare. 58 | 59 | Proin sodales dictum diam lacinia ultrices. Phasellus et lectus dolor. Aenean tincidunt mauris et metus vulputate tempus. Phasellus faucibus est ut massa cursus, vel imperdiet tortor semper. Aenean ultricies quis libero eu sagittis. Aliquam velit nulla, maximus sit amet nulla vel, aliquet condimentum purus. Vestibulum ultricies leo et neque facilisis faucibus. 60 | 61 | Donec semper porttitor mollis. Praesent eget malesuada ex. Proin consequat sed libero sodales egestas. Aenean vestibulum vitae est eu convallis. Vestibulum dolor ante, molestie ut felis lacinia, laoreet mattis tellus. Maecenas condimentum tellus quis semper ultricies. Etiam fermentum commodo urna et ultricies. Ut mollis, tortor eget vulputate sagittis, ex ante tristique libero, nec posuere ligula velit a tortor. Suspendisse eu semper nisi. Quisque suscipit sodales rutrum. Ut porta, magna at imperdiet porta, neque diam dictum nisl, id ornare leo orci faucibus libero. 62 | 63 | Nam posuere orci tellus, in sagittis tortor aliquam nec. Morbi eleifend velit nec urna viverra, sit amet porttitor urna feugiat. Nunc hendrerit, ligula elementum faucibus tempus, magna ante faucibus sem, a vestibulum mi mi eget lorem. Aenean a maximus nunc. Morbi ex arcu, porta id dignissim eget, vestibulum non dui. Quisque blandit risus dolor, quis porta eros volutpat id. In quis dictum augue, id posuere nulla. Donec volutpat turpis luctus nulla feugiat tristique. In luctus mattis dolor, vel sodales lectus sodales sed. 64 | 65 | Mauris in nisl risus. Nulla venenatis elit ut bibendum posuere. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Pellentesque ultricies leo eget sapien iaculis, et vulputate lectus volutpat. Sed iaculis rutrum sollicitudin. Nam ut elit volutpat, facilisis dolor in, iaculis justo. Phasellus at enim non dolor cursus posuere a in orci. Donec lorem lorem, viverra in consectetur sit amet, ullamcorper vulputate nisl. Quisque eget tincidunt nulla. Curabitur malesuada mauris augue, non iaculis ligula bibendum eget. Duis risus tellus, semper vitae lectus sodales, varius faucibus augue. Suspendisse vestibulum vulputate leo, nec ultrices urna imperdiet quis. Mauris egestas purus eu ex convallis aliquam. Nunc pretium lorem sapien, vel tempus nunc ultrices id. 66 | 67 | Fusce in tellus nec nisi scelerisque molestie. Aliquam eros dolor, malesuada dictum pellentesque cursus, dignissim non justo. Quisque a sollicitudin nunc, at bibendum mauris. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nulla in mattis metus, ut posuere velit. Nulla facilisi. Nullam posuere quam non quam gravida pharetra. Vivamus id ipsum at nunc sagittis feugiat. Vivamus ultrices vitae ipsum ac interdum. Sed fermentum tortor vel nunc imperdiet venenatis. 68 | 69 | Suspendisse id dui luctus, tincidunt neque a, tincidunt diam. Morbi maximus nisi ante, ut aliquam tellus convallis in. Nulla eget quam velit. Sed ut nunc commodo, euismod neque id, sollicitudin lacus. Vivamus ut justo eget eros luctus varius ullamcorper et augue. Praesent ac scelerisque nibh, et interdum massa. Nulla facilisi. Nullam vestibulum eget lacus at molestie. Phasellus ante velit, ultrices eget ante vel, venenatis faucibus lectus. 70 | 71 | Nulla facilisi. Quisque dictum nisi tortor, non congue ligula blandit vitae. Nulla dignissim vitae ante eu porttitor. Mauris eget rutrum risus, nec tempor ante. Pellentesque maximus libero eget placerat egestas. Morbi sed lacinia erat. Praesent sagittis tincidunt dolor, euismod pellentesque lacus ornare at. Cras vehicula auctor luctus. Pellentesque efficitur, nulla eget pharetra rutrum, velit nisi vehicula eros, at pharetra felis arcu in tortor. Praesent ipsum orci, euismod id ante et, volutpat posuere libero. Vestibulum id odio vitae lorem sagittis pretium. Mauris feugiat feugiat eros non ornare. Integer sollicitudin, mi et gravida fermentum, elit quam tincidunt metus, eget ullamcorper nibh sem venenatis risus. Ut non placerat felis. Aliquam facilisis nisi urna, at consequat justo placerat ut. Vivamus bibendum leo posuere, aliquam ipsum at, aliquam enim. 72 | 73 | Mauris nec rhoncus ipsum. Cras risus diam, aliquam ut tortor id, suscipit lacinia ipsum. Aenean porta turpis libero, non posuere arcu vehicula sit amet. Sed aliquam neque in velit egestas porta. In id placerat massa. Integer ac felis id odio viverra facilisis eget in nibh. Cras nec nisi tincidunt, volutpat sapien quis, commodo diam. Nam ut metus eget sem porta rutrum. 74 | 75 | In hac habitasse platea dictumst. Sed nunc eros, viverra a tristique eget, viverra in erat. Quisque pretium ornare ex a porttitor. Etiam quis ex eu tortor venenatis luctus nec vitae risus. Nullam quis lectus vel ante ullamcorper ultricies. Maecenas lectus enim, pellentesque sit amet fringilla et, cursus vel nisi. Quisque nulla magna, malesuada et ultricies in, tincidunt sed metus. Vestibulum egestas tempus convallis. Curabitur commodo, dui at tincidunt lobortis, nisi neque venenatis tortor, nec varius est ante non augue. Sed vitae sodales libero, fringilla interdum orci. Cras nec fringilla lectus, et suscipit enim. Ut semper nunc sit amet placerat congue. 76 | 77 | Ut vulputate elit eget orci ullamcorper dictum. Etiam hendrerit consequat turpis, at tristique sapien finibus vel. Nunc molestie tincidunt ipsum ac ornare. In cursus euismod venenatis. Curabitur feugiat posuere ligula, consectetur euismod lacus pretium quis. Vivamus sed tortor gravida, suscipit dolor nec, commodo turpis. Nulla facilisi. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nam sit amet tincidunt neque. Phasellus vulputate, leo facilisis tincidunt mollis, justo metus tempor ex, nec sollicitudin erat turpis sit amet erat. Etiam nec tristique leo. Cras porta ante non odio lobortis, et imperdiet diam maximus. Vivamus blandit et arcu ut tincidunt. Ut elit massa, euismod nec lobortis vel, tristique non eros. Donec efficitur, sapien a congue ultricies, velit dolor mollis tortor, ut mollis mauris nisi sit amet justo. Etiam id ante nisi. 78 | 79 | Sed elementum, magna vitae eleifend auctor, diam mauris rhoncus sem, ut faucibus orci dolor ut nulla. Donec imperdiet lectus ut lorem rhoncus varius. Suspendisse euismod, sem in volutpat vehicula, ipsum metus maximus justo, sed vehicula arcu turpis non lorem. Praesent interdum metus eu ex condimentum, ut tristique justo gravida. Quisque sem nulla, dapibus quis dignissim sit amet, pulvinar sit amet ante. Nunc in nibh neque. Nam nec massa commodo, luctus urna et, lacinia nulla. 80 | 81 | Mauris semper ac nibh et volutpat. Praesent quis odio semper est commodo ultricies at sit amet leo. Pellentesque tempor malesuada sem, ut pellentesque elit ullamcorper nec. Nulla a placerat nisi, sit amet cursus erat. Mauris id venenatis lacus. Nullam varius sodales arcu eu consequat. Suspendisse posuere eget nulla vitae vestibulum. Nulla convallis mauris mauris. Pellentesque ut molestie urna. Nam molestie nibh congue ex dignissim vehicula. Suspendisse aliquet id nunc sed scelerisque. Pellentesque nec pellentesque tellus. Nunc rutrum ipsum arcu, sit amet volutpat massa aliquam vitae. Nullam ut convallis magna. Pellentesque a mollis nisi. Fusce condimentum id sapien vitae semper. 82 | 83 | Nam eget volutpat libero. Etiam lobortis pretium arcu id lacinia. Donec malesuada lorem a erat convallis, sit amet convallis sem sollicitudin. Curabitur sed bibendum arcu. Etiam posuere sed leo at gravida. Donec et accumsan libero. Vivamus felis arcu, tincidunt eget mauris vel, vulputate mollis odio. Integer quis semper orci. Pellentesque rhoncus massa velit, sit amet varius erat imperdiet sit amet. 84 | 85 | Duis eu magna vitae sem scelerisque convallis. Sed faucibus turpis ac dolor mollis, lobortis pellentesque augue sollicitudin. Curabitur nibh mauris, tristique et laoreet eget, luctus vel ante. Etiam maximus vehicula vestibulum. Pellentesque scelerisque lectus quis lacus vehicula gravida. Duis vel tortor in urna maximus sodales vitae ac ex. Aliquam eleifend quam sodales pulvinar aliquam. In hac habitasse platea dictumst. Cras eleifend magna nec dolor dictum tempus. Nulla lobortis malesuada nisl. Duis laoreet massa vitae libero pellentesque volutpat. Maecenas nisl quam, semper ac viverra sit amet, vehicula in nulla. Duis non blandit odio, sed luctus diam. Aliquam non cursus orci. 86 | 87 | Maecenas pharetra ut urna eget dignissim. Mauris mollis vel nisl vel vehicula. Proin faucibus diam interdum mauris efficitur sagittis. Donec pulvinar ipsum quis sapien lobortis, ut sollicitudin enim fermentum. Etiam a velit et diam aliquet aliquam. Sed tristique cursus massa, eu imperdiet neque elementum a. Proin pulvinar sem arcu, id cursus urna tristique vitae. Cras fringilla lectus ac tellus posuere tristique. Vivamus at ante finibus, bibendum lacus ut, finibus erat. Vestibulum viverra turpis in enim dictum, vel pharetra turpis scelerisque. 88 | 89 | Nulla finibus metus turpis, a ullamcorper velit molestie ut. Quisque ornare orci molestie lacus volutpat bibendum. Quisque sagittis enim non justo convallis imperdiet. Phasellus quis quam sit amet ligula dictum iaculis. Etiam venenatis suscipit dignissim. Aliquam facilisis tortor tortor, sit amet iaculis leo fermentum in. Vivamus vitae ornare urna, ac volutpat ipsum. Nam hendrerit, metus et blandit condimentum, tellus ante interdum nulla, a tincidunt mauris enim a magna. Phasellus iaculis quam a neque tincidunt auctor. Proin consequat tempor magna, ac vehicula quam auctor et. Aliquam dictum ut lorem vel cursus. Etiam hendrerit neque enim, ultrices vulputate est maximus ut. Nam imperdiet pretium tempus. 90 | 91 | Aliquam erat volutpat. Suspendisse porttitor eget neque dapibus ornare. Quisque ut blandit massa, non facilisis nulla. In sed massa turpis. Aenean ac semper odio. Mauris sed porttitor ex, a gravida mauris. Suspendisse sed orci ut augue pharetra euismod. In dignissim vitae turpis vel lacinia. Proin porttitor ex erat, ut eleifend felis accumsan vitae. Morbi id tincidunt nisi, et pharetra purus. Sed risus ipsum, tincidunt eget vehicula at, finibus vel augue. Mauris velit turpis, posuere et blandit non, vestibulum rhoncus odio. Nullam quis ligula ac orci lobortis auctor. Nulla eget ipsum consectetur, facilisis risus sed, facilisis nibh. Morbi tristique turpis vel quam vestibulum viverra. 92 | 93 | Ut hendrerit risus mi. Donec purus enim, facilisis non consectetur quis, imperdiet id orci. Pellentesque at enim malesuada, luctus quam vitae, mollis quam. Maecenas tincidunt purus eget magna posuere tristique in nec erat. Cras eu mattis lorem. Morbi convallis neque commodo quam varius, nec suscipit lectus tempus. Morbi quam diam, tempus et volutpat ut, commodo a quam. Praesent velit urna, bibendum et dui sit amet, volutpat auctor augue. Etiam non ex est. Integer volutpat mauris id finibus cursus. Nunc sit amet leo nec ex blandit faucibus eu quis libero. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Cras quis lectus lectus. Donec eget eros finibus, convallis mi vitae, interdum metus. Vivamus vitae sem metus. 94 | 95 | In ullamcorper imperdiet sem, a vulputate nisi iaculis et. Aliquam ultrices, leo vitae eleifend rutrum, ex nunc tincidunt tellus, at varius massa magna sed nisl. Morbi ullamcorper, lorem quis fermentum posuere, nisi libero faucibus libero, nec pretium libero risus eu odio. Aenean in diam id sapien eleifend vulputate a tincidunt dui. Donec porttitor mollis mauris quis commodo. Morbi suscipit nibh quis elit faucibus, in elementum libero iaculis. Aenean ante sapien, ornare vel metus sed, dapibus mollis diam. Maecenas non ultricies massa, eget pretium erat. In sodales, lacus sit amet tempor vestibulum, erat erat viverra massa, ac semper ante augue vitae mauris. Nunc interdum, tellus a viverra vestibulum, sem erat tempus augue, ac bibendum neque erat a orci. Morbi arcu velit, molestie non magna eu, fringilla congue nisi. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. 96 | 97 | Sed condimentum orci in pulvinar porttitor. Praesent posuere, est sit amet rutrum ultrices, purus turpis lobortis sem, sed pulvinar nisl sapien in orci. Mauris at ultricies ligula. Nulla lacinia, enim maximus bibendum pellentesque, enim velit vestibulum arcu, et tincidunt urna est at lorem. Phasellus et arcu vitae nunc blandit vulputate. Nullam accumsan imperdiet metus. Sed at rhoncus elit, quis egestas urna. Nam in sem arcu. Integer volutpat semper dolor, nec posuere nisl aliquam vulputate. Nulla laoreet, tellus id mollis consectetur, sapien nisi fringilla dui, eget efficitur ligula purus et felis. Interdum et malesuada fames ac ante ipsum primis in faucibus. Sed blandit id orci sed mattis. 98 | 99 | Nullam vulputate, lacus eget congue aliquet, est nibh hendrerit purus, eu vulputate magna elit finibus libero. Ut nec nisi felis. Nam porttitor urna a neque eleifend, at hendrerit elit aliquet. Curabitur sagittis, felis et eleifend congue, sem est interdum metus, sed tempor ligula justo at leo. Suspendisse cursus tortor diam. Proin interdum nisi justo, quis vulputate nibh lobortis in. Curabitur faucibus sapien vitae orci varius dictum. In vulputate mauris ac vulputate dignissim. Mauris sit amet augue lorem. In eget porta sem. Nullam commodo augue a rhoncus commodo. Donec congue lacinia faucibus. Aliquam tincidunt odio a diam pharetra pharetra. 100 | 101 | Quisque quis ex a arcu scelerisque fermentum vel eu lorem. Nulla facilisi. Morbi non metus cursus, auctor enim eget, porttitor enim. Sed finibus cursus iaculis. Ut posuere aliquet lorem, quis consequat nisl condimentum sed. Phasellus feugiat mi scelerisque mi molestie tincidunt ac vel sapien. Sed suscipit, nisi vitae condimentum suscipit, orci lectus luctus urna, sit amet dictum massa lacus sed orci. Aenean at risus dictum, ultrices dui at, ultricies neque. Duis in tincidunt sapien. Aenean gravida magna fermentum turpis vulputate, convallis lacinia velit sagittis. Morbi ac risus finibus, cursus tortor eu, convallis mauris. In at feugiat sapien. Donec feugiat faucibus nisi, id sagittis libero maximus eget. Etiam rhoncus sit amet arcu at mollis. Curabitur condimentum id neque eu maximus. Vivamus posuere mi et molestie vulputate. 102 | 103 | Etiam placerat interdum purus, id hendrerit ante euismod a. Vivamus laoreet mi felis, ut euismod lorem auctor id. Integer velit nulla, rhoncus lacinia eleifend nec, feugiat eu quam. Nulla nec vehicula justo. Duis scelerisque at tellus non tincidunt. Nam accumsan diam dui, sit amet malesuada quam porta vitae. Donec a consectetur leo. Etiam tempus, elit a ullamcorper interdum, orci turpis tempor nisl, eget auctor nibh velit eu enim. Nullam sollicitudin scelerisque felis a imperdiet. 104 | 105 | Ut nec mollis risus. In malesuada euismod neque, vitae mattis odio elementum ac. Etiam eleifend euismod nibh et iaculis. Sed elementum id eros vel venenatis. Nunc vestibulum enim quis ante commodo, quis viverra erat eleifend. In nec elit nunc. Ut tempus blandit dolor in semper. Aliquam erat volutpat. Praesent eget magna orci. Duis eu varius diam. Nunc et rhoncus nulla. Integer vel ligula at eros aliquet rhoncus quis eget nibh. Mauris rutrum aliquam euismod. 106 | 107 | Sed sem libero, pulvinar volutpat molestie ut, placerat sit amet mi. Nulla pharetra vel erat ac pharetra. Aenean mattis dui a libero tempor scelerisque. Morbi nisl augue, eleifend eu risus id, auctor vehicula turpis. Nunc purus lectus, finibus ac neque sed, commodo tincidunt urna. Pellentesque vel purus purus. Nulla orci orci, blandit quis fringilla auctor, blandit id neque. Nulla eget est non quam consequat vestibulum in a nisi. Vestibulum feugiat ac quam ac tristique. Pellentesque in maximus nibh. Phasellus eget blandit ex, quis pretium nunc. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Nam non massa libero. Nam suscipit porttitor laoreet. Fusce semper condimentum arcu, sed fermentum purus dignissim ac. Maecenas sed lacinia quam, id commodo odio. 108 | 109 | Aliquam facilisis ante sit amet magna aliquet blandit. Integer ipsum leo, lobortis eget dui non, posuere rutrum nulla. Nunc ac diam at felis volutpat commodo quis in turpis. Sed id lectus sem. Nunc blandit at felis sed cursus. Vestibulum euismod vestibulum consectetur. Morbi a dolor quis lectus gravida auctor. Sed varius ante nec mi fringilla, nec imperdiet nunc feugiat. Nulla ac augue suscipit eros molestie maximus nec sit amet neque. Pellentesque rhoncus mattis nisi et ullamcorper. Fusce vel commodo nisl. 110 | 111 | Proin luctus orci ut justo rutrum lobortis. Sed vestibulum tortor augue. Quisque eget pretium ante. Quisque semper rhoncus varius. Praesent ut volutpat sapien, sed vehicula massa. Suspendisse lacinia erat ac ligula commodo cursus. Etiam sit amet mollis magna. Nunc eget ullamcorper eros. Fusce quis venenatis lacus. Proin lobortis lectus sed libero lobortis semper. Mauris varius feugiat felis, sed commodo mi ullamcorper et. Nulla ullamcorper tempor cursus. 112 | 113 | In enim enim, viverra sit amet tincidunt eget, dapibus sed ipsum. Donec malesuada a augue dignissim feugiat. Duis elementum facilisis sem, sit amet porttitor libero hendrerit ac. Maecenas sagittis pretium dolor, eget posuere enim pulvinar non. Praesent lorem felis, ultricies et rutrum quis, consequat sed erat. Cras nunc augue, volutpat at mauris quis, aliquam vehicula odio. Sed consequat pretium arcu blandit auctor. 114 | 115 | Donec molestie, libero et lacinia eleifend, massa tortor pharetra nisi, in dignissim tellus nulla id erat. Cras tristique quam nec augue placerat, ac tempor enim malesuada. Praesent elementum lectus eget enim tincidunt suscipit. Maecenas blandit elit massa, at mattis lorem euismod quis. Cras euismod nibh vitae felis accumsan rhoncus. Sed vitae tortor sit amet ante dignissim malesuada in quis dolor. Integer aliquam malesuada leo, quis finibus leo pellentesque vitae. Duis suscipit accumsan eleifend. In lacus nibh, convallis at leo sit amet, viverra sagittis nisl. Duis pretium tempor luctus. Morbi eget facilisis sapien, at condimentum ipsum. Fusce non lacus lectus. Integer vel pharetra urna, quis mollis urna. 116 | 117 | Donec sed interdum lorem, non semper turpis. Donec pharetra accumsan porta. Nunc laoreet, erat eget euismod porta, ante quam blandit elit, sit amet facilisis erat eros eget sem. In ullamcorper neque id ligula feugiat, vel vestibulum lacus tempus. Sed volutpat, metus a posuere tincidunt, diam risus scelerisque ligula, eget scelerisque sapien odio in dui. Curabitur pretium justo eros, a elementum turpis facilisis et. Nunc id quam pharetra, mattis ante et, semper dui. Vestibulum quis quam sed elit semper sagittis. Cras pharetra luctus ante id consequat. Morbi consequat, magna quis dapibus semper, ante lectus faucibus justo, vitae lobortis sapien elit at tortor. 118 | 119 | Phasellus vitae rhoncus mi, nec tristique elit. In id eros ut lorem laoreet suscipit. Nulla porttitor at nunc at venenatis. Integer hendrerit accumsan lacus, a tempor nunc. Donec luctus tempus facilisis. Proin dictum mi ac viverra fringilla. Proin consectetur tempor felis, sed semper elit dapibus et. Donec et condimentum nunc, eu dictum dui. Nullam leo nibh, suscipit sit amet pulvinar varius, porttitor sed purus. Aenean sit amet sodales risus. 120 | 121 | Curabitur vulputate, lectus at maximus tincidunt, tortor nisi congue mi, non faucibus erat ipsum eget lectus. Nulla facilisi. Curabitur finibus ligula risus, sed elementum felis gravida et. Praesent blandit pellentesque tempor. Mauris maximus sapien auctor, maximus nunc sit amet, elementum mauris. Nam tempor ultricies efficitur. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse suscipit lobortis mollis. Nunc quis nisi tempus, congue nulla a, tincidunt tortor. In molestie cursus feugiat. Phasellus eu nisl eu metus rutrum dignissim quis in erat. Praesent porta nisi mollis ipsum molestie, nec sollicitudin ex convallis. 122 | 123 | Phasellus suscipit pellentesque enim, non porttitor lacus sodales id. Etiam iaculis ullamcorper sodales. Fusce sit amet blandit diam, id semper mauris. Sed lacinia nibh sed quam lobortis varius. Duis consequat mi maximus, facilisis leo ac, ullamcorper dui. Ut pulvinar malesuada commodo. Aenean velit elit, venenatis eu venenatis sit amet, laoreet eu nibh. Integer iaculis nisl vitae nulla suscipit, at euismod nulla sagittis. Ut neque magna, condimentum non vulputate id, tempor ac nisi. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Suspendisse potenti. Donec id nisl in magna tincidunt accumsan ut vitae augue. Phasellus neque arcu, maximus at feugiat nec, molestie sed nisl. Aliquam vel lorem consequat neque dignissim molestie sed eget magna. 124 | 125 | Integer sed leo ut ligula rutrum ullamcorper. Donec at lectus leo. Fusce ac risus vitae risus lacinia dignissim vestibulum non nibh. In hac habitasse platea dictumst. Nulla purus velit, interdum vitae massa a, convallis suscipit quam. Maecenas sit amet eros maximus, elementum magna eget, tristique ex. Suspendisse molestie sed purus id varius. Quisque aliquet neque est, ac maximus justo pulvinar bibendum. 126 | 127 | Phasellus et efficitur velit, a fermentum nisl. Fusce tincidunt, enim vel laoreet dapibus, diam tellus consequat eros, sed consectetur tellus nisi et arcu. Nulla et tempus felis. Nunc feugiat est auctor ex scelerisque placerat ac et lacus. Integer ultricies ac magna ut lacinia. Phasellus neque ex, viverra et magna vel, venenatis luctus diam. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Sed tempor erat risus. Duis egestas augue ut laoreet iaculis. 128 | 129 | Nam vel fringilla lorem. Pellentesque lectus tellus, scelerisque in dolor id, blandit consequat mi. Integer molestie elementum dolor, quis maximus quam fringilla a. In pretium ligula et orci bibendum gravida. In bibendum tellus erat. Phasellus faucibus auctor commodo. Nam aliquam eros sed ante porttitor, sit amet fermentum magna sodales. 130 | 131 | Proin neque ex, tincidunt et accumsan ac, condimentum ac tortor. Aenean non risus sed nisi rutrum rutrum. Donec id congue orci, euismod vehicula sapien. Donec eros turpis, tempor sed ante sit amet, egestas posuere diam. Maecenas lacinia auctor ligula, vitae eleifend augue aliquam sed. Mauris quis nibh id magna imperdiet luctus in ac justo. Mauris pulvinar ante et justo fringilla, eget aliquet nisl bibendum. Maecenas condimentum ornare erat sit amet ornare. Quisque velit ex, euismod sed sapien at, luctus scelerisque metus. Donec faucibus, orci nec sollicitudin lacinia, velit massa mollis justo, ut porta velit purus eget neque. Etiam orci risus, varius nec enim in, rutrum porttitor nisl. Praesent nec sapien vitae quam porta fringilla nec sed nunc. Phasellus non efficitur arcu. Nunc ex ligula, vehicula vel ipsum non, feugiat congue quam. Praesent aliquam iaculis nunc, ac suscipit elit porta et. Nullam eu suscipit tellus. 132 | 133 | Pellentesque posuere eleifend sapien eu rhoncus. Sed hendrerit ornare risus sed tincidunt. Mauris et lacus tellus. Proin iaculis, magna semper finibus interdum, tortor libero interdum augue, nec dignissim mauris risus gravida diam. Nulla nec ultricies leo. Vestibulum bibendum vitae neque in faucibus. Maecenas ullamcorper, tellus sed aliquam aliquam, neque eros iaculis quam, vel finibus tortor elit sed urna. Mauris eu gravida erat. Aliquam sit amet vestibulum tortor. Sed mattis at justo id viverra. Mauris commodo nibh sit amet justo tincidunt, ac sagittis erat commodo. Donec sed dui molestie, ultrices ipsum quis, finibus justo. Etiam ut metus massa. 134 | 135 | Sed gravida et mauris id pharetra. Sed dignissim lacinia mi, sed ultrices turpis ullamcorper eget. Suspendisse vulputate mi aliquet quam volutpat, non auctor ante aliquet. Etiam congue rhoncus mauris, id convallis tellus ornare at. Pellentesque suscipit massa ut ipsum efficitur, vel sodales velit tincidunt. Fusce tempor risus at mollis venenatis. Donec sit amet quam nec velit consectetur aliquet id quis ante. Vivamus a ex at odio malesuada tempor. Quisque porta elementum nunc eu imperdiet. Cras vitae urna elementum, suscipit lacus ac, efficitur leo. Curabitur quis dignissim mi. Sed ut aliquam risus. Pellentesque porta quis purus vel varius. Proin sodales ultricies arcu. Vestibulum nisi massa, facilisis at ante nec, aliquet cursus lacus. 136 | 137 | Pellentesque felis mi, fermentum ut erat in, tempus placerat nulla. Integer ut dapibus augue. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Suspendisse porttitor mattis leo et iaculis. Duis eu mi eget nisi sodales dictum vel feugiat magna. Etiam vitae volutpat erat. Curabitur placerat odio ut nunc porttitor egestas. Aenean ac dui arcu. Duis scelerisque lectus vitae varius maximus. Donec mollis placerat quam, nec vestibulum nulla tempor et. Donec nec purus ut diam hendrerit convallis vel id diam. Aenean in mi at massa blandit consectetur quis vitae risus. Sed tempor dui in ante tristique, eu ultricies mauris euismod. Vestibulum id laoreet tellus. 138 | 139 | Curabitur in purus leo. Aenean at enim lacus. Praesent laoreet ullamcorper ex eu ultricies. Vivamus eu leo arcu. Suspendisse commodo risus eu vulputate imperdiet. Vivamus quis nisi nec ex cursus aliquam sed quis libero. Vestibulum pellentesque sollicitudin magna a placerat. Maecenas quis felis in nunc mollis sollicitudin eu at dui. 140 | 141 | Quisque condimentum, ex consectetur pretium bibendum, erat purus luctus velit, sed commodo nibh ante non neque. Nulla ultricies ipsum tortor, eget sagittis diam aliquet nec. Duis lacinia, dolor eu porta ornare, leo ligula ultricies nunc, et consectetur lorem mi eu turpis. Praesent a auctor metus. Fusce ac porta ipsum, sit amet faucibus turpis. Sed vulputate, velit sed maximus congue, velit felis vestibulum enim, id pulvinar sapien justo ut dui. Quisque at pretium mi. Sed suscipit lacus quis nisi sodales ultricies. In bibendum diam lacus, vel sodales sapien venenatis quis. Proin pretium placerat sapien, in pulvinar erat venenatis accumsan. Suspendisse a ante at velit suscipit egestas cursus quis lectus. Pellentesque vel lacus euismod, volutpat purus a, mollis odio. Morbi porta ex sit amet velit volutpat faucibus. Vestibulum ultrices odio orci, sit amet molestie neque tristique a. 142 | 143 | Fusce elementum massa vel elit suscipit euismod. Nunc ac libero eget augue suscipit vulputate. Integer turpis lorem, molestie a est a, pellentesque feugiat sapien. Ut venenatis vulputate tincidunt. Etiam dapibus placerat lorem at maximus. Donec id lorem ut nulla condimentum consectetur in at risus. Fusce congue gravida purus quis dignissim. Fusce sit amet massa quis lectus aliquet mollis. Sed porta risus non scelerisque venenatis. Nam laoreet congue tellus eget ornare. Maecenas consequat metus quis ex volutpat pellentesque. Sed vel facilisis arcu. 144 | 145 | Donec porta felis et arcu fringilla, ac lobortis nibh porta. Pellentesque rhoncus sapien quis ultricies pulvinar. Vivamus a ipsum ut ipsum dictum pellentesque. Quisque blandit tempus augue ut rhoncus. Nam sed tellus ante. Nunc gravida orci maximus orci vulputate, vel bibendum dui fermentum. Nunc vehicula mi magna, ut dignissim diam varius pellentesque. Sed commodo felis eu lacus aliquam scelerisque. Donec id augue in nulla congue pharetra quis venenatis velit. Nulla et magna metus. Proin ornare vestibulum feugiat. Aenean non dolor tellus. Integer auctor massa metus, in porta tortor gravida non. 146 | 147 | Nullam sit amet lorem dapibus, aliquet est ut, tempor massa. In hac habitasse platea dictumst. Vestibulum accumsan at diam nec commodo. Morbi nec nibh velit. Aliquam sit amet urna viverra, dictum arcu sed, mollis lacus. Vivamus interdum aliquam lobortis. Pellentesque feugiat nisl id mauris rutrum, ac euismod neque sodales. Nulla facilisi. 148 | 149 | Cras sit amet imperdiet lorem. Donec id lectus at nunc finibus tincidunt. Pellentesque vestibulum molestie tincidunt. Mauris tincidunt urna id nisl tempor pellentesque. Aenean mattis ullamcorper tellus, et fermentum urna placerat vel. Etiam commodo eleifend cursus. Nullam id arcu non velit elementum dapibus. Aenean nec nunc sit amet turpis aliquam facilisis. Aenean commodo, risus eget condimentum pharetra, ipsum turpis ornare quam, accumsan placerat felis ipsum id quam. Curabitur hendrerit lacus massa, lobortis dapibus orci iaculis ut. Quisque aliquam, metus in interdum tristique, diam mi commodo neque, quis hendrerit augue est sit amet libero. Aliquam erat volutpat. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Maecenas a fermentum neque, vel pharetra lectus. Phasellus scelerisque mi sed dictum dapibus. 150 | 151 | Quisque feugiat lectus quis felis blandit, non elementum sem efficitur. Nulla facilisi. Vestibulum a scelerisque ante. Aenean luctus libero elit, vitae iaculis augue aliquet quis. Vestibulum enim sapien, auctor ac finibus id, tristique in lacus. Suspendisse et dolor dui. Cras condimentum sodales lacus, at rhoncus arcu vulputate nec. Fusce imperdiet auctor urna vel ornare. Ut augue mauris, fringilla ac consectetur non, suscipit a orci. 152 | 153 | Nulla fringilla dolor mauris. Etiam fringilla neque vel tortor vehicula venenatis. Aenean cursus eu neque a eleifend. Vestibulum vitae dapibus lectus. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Curabitur dapibus mattis sapien ut fringilla. Phasellus condimentum rutrum nibh vitae malesuada. Aenean molestie sed magna ac hendrerit. 154 | 155 | Sed id dictum enim, et lacinia eros. Donec tempor lectus lacinia varius vehicula. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Aliquam eros massa, mollis sed erat sit amet, interdum porta erat. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Donec consequat felis non posuere tristique. Donec ornare iaculis consectetur. Nullam vel mauris tellus. Sed congue quis turpis ut luctus. Duis malesuada enim ac ligula feugiat, et condimentum neque malesuada. Morbi urna ligula, porta sed sollicitudin sit amet, rutrum non lacus. 156 | 157 | Vivamus elit mauris, venenatis in magna sed, mollis molestie ex. In quis velit dui. Mauris euismod lectus a tortor tincidunt lacinia eget eu velit. Donec eget auctor lectus. Quisque ut lectus imperdiet, venenatis massa at, dictum orci. Sed pulvinar porta feugiat. Praesent semper commodo ex eget placerat. Praesent accumsan elit finibus ipsum cursus, ut fermentum nibh auctor. Ut elementum, tortor at interdum accumsan, nibh mauris sodales libero, ut mattis est nisl eget lacus. Sed hendrerit sed massa at mattis. Interdum et malesuada fames ac ante ipsum primis in faucibus. Cras ac mi aliquam, iaculis dolor eget, dignissim metus. Aliquam sit amet mauris vitae nunc molestie vulputate et vitae ex. Vestibulum rhoncus et magna ut auctor. Phasellus in est neque. Fusce ornare euismod ante eget rhoncus. 158 | 159 | Fusce ligula nibh, molestie sed dui ut, tempus dapibus elit. Etiam odio nulla, molestie sit amet feugiat molestie, volutpat vel neque. Integer lobortis, risus non vestibulum condimentum, eros purus fringilla diam, sit amet blandit eros enim eget leo. Ut non magna eget mauris egestas varius sollicitudin a eros. Donec lobortis, nibh sed rutrum malesuada, velit purus porttitor ante, ut auctor arcu risus ut urna. Donec eu interdum ipsum. Aliquam mollis vestibulum pulvinar. Integer nec fermentum nunc, ut vulputate nibh. Nam convallis est nec augue porttitor vulputate. Integer efficitur elit sem, non venenatis felis facilisis eu. 160 | 161 | Aenean eleifend nibh eget tellus feugiat rhoncus. Quisque cursus leo sed luctus blandit. Donec tellus justo, lacinia quis maximus vitae, viverra in neque. Mauris purus leo, porta ac viverra a, pretium ut metus. Cras dolor ipsum, mattis ut felis nec, mollis finibus nibh. Nunc at finibus felis. Nam semper magna eget quam feugiat, in dapibus augue hendrerit. Phasellus mattis mollis nulla, eget pulvinar mauris tincidunt quis. Quisque sodales a quam at iaculis. Aliquam id purus at dolor dignissim maximus eu vitae velit. Curabitur ut orci vel diam iaculis volutpat nec volutpat mauris. Sed venenatis maximus aliquet. In hac habitasse platea dictumst. Nullam suscipit elit venenatis turpis dignissim, sed pretium est bibendum. 162 | 163 | Phasellus id enim id augue condimentum hendrerit ac sed ex. Maecenas blandit vitae odio id consectetur. Integer semper malesuada est, id placerat ligula condimentum pellentesque. Aliquam semper nisi dolor, sit amet rutrum nisi hendrerit laoreet. Etiam accumsan ante et turpis vehicula viverra. Maecenas hendrerit turpis nec ante commodo venenatis. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nam nunc libero, mattis et ligula a, euismod efficitur diam. Nam imperdiet nisi enim, a tincidunt velit luctus sit amet. Nam congue, risus semper cursus porta, sapien ante ultricies sem, at sodales velit erat ac leo. Aenean rutrum, nisl ac lacinia faucibus, ligula est feugiat quam, sit amet accumsan felis est facilisis ligula. Vivamus eu gravida urna. Proin sit amet turpis magna. Suspendisse scelerisque lectus arcu. 164 | 165 | Duis facilisis dolor dui, quis tincidunt enim tincidunt eu. Donec tempus, metus eu lacinia porta, massa orci tempus turpis, sed condimentum ipsum orci sit amet erat. Suspendisse et cursus metus. Duis consectetur quam eget eros consequat, id laoreet turpis malesuada. Aliquam ornare volutpat ultricies. Phasellus sit amet ligula lacus. Duis ac nisl non eros sagittis pulvinar eu non est. Quisque ullamcorper quam et massa vehicula luctus in non lectus. Aliquam vel urna et massa pretium bibendum ut vitae ipsum. Maecenas egestas lectus urna, vel tempor ex egestas ut. Proin imperdiet mi mauris, in eleifend est facilisis vel. 166 | 167 | Phasellus et volutpat sem. Vestibulum in cursus diam. Nullam fermentum velit in velit dictum pharetra. Aenean quis egestas massa. Pellentesque sollicitudin nibh nec finibus consequat. Cras eget leo lacus. Nullam metus urna, tempus id vehicula vel, molestie non ante. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Nulla facilisi. Pellentesque vestibulum viverra tristique. Aliquam id sapien ante. Phasellus non tincidunt neque. 168 | 169 | Mauris porta placerat tempus. Nam velit erat, porttitor convallis tempor ut, consectetur blandit nibh. In augue elit, pellentesque a erat id, finibus viverra sem. Praesent bibendum aliquet metus. Phasellus commodo dolor nisl, vel accumsan mi venenatis id. Aliquam in viverra nulla. Vestibulum posuere iaculis ipsum eget accumsan. Donec lectus nulla, imperdiet sed placerat sed, porttitor non ipsum. Suspendisse cursus efficitur ex eu cursus. Aliquam rhoncus efficitur imperdiet. Etiam a mauris quis ex mollis consectetur in eu magna. 170 | 171 | Praesent nunc neque, facilisis at neque sed, auctor sagittis tellus. Aliquam a luctus odio. Donec consectetur commodo mauris eu eleifend. Mauris nulla risus, molestie gravida dolor at, rutrum congue lacus. Morbi at lobortis sapien. Quisque condimentum condimentum libero, vel ultricies dolor dapibus nec. Curabitur in dolor vel ipsum semper consequat luctus non ex. Maecenas vestibulum viverra sapien et pretium. Quisque dignissim pellentesque diam. Aenean lectus tortor, porttitor id vehicula in, euismod egestas purus. Aenean eget ullamcorper mi. Aliquam porta augue ac nunc malesuada consequat. 172 | 173 | Aliquam ut nunc et nisi semper faucibus. Aliquam sem erat, feugiat eu velit non, sodales molestie quam. Fusce odio mauris, suscipit eget ligula et, ultricies bibendum nibh. Ut viverra felis ut dignissim luctus. Vivamus luctus suscipit lacus, eget pellentesque dui porttitor quis. Pellentesque eget posuere purus, eget accumsan dolor. In nisl lorem, pulvinar et tempus vel, interdum in ipsum. 174 | 175 | Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Donec bibendum metus ipsum, vitae egestas turpis luctus eu. Nunc sit amet aliquam elit. Aliquam quis ullamcorper augue. In sit amet sem sit amet augue tincidunt commodo sed eu nunc. Nullam dignissim tempor justo sed tempus. Pellentesque volutpat, ex id vulputate vehicula, libero mauris ultricies nulla, at placerat velit ante quis justo. Vivamus rutrum pulvinar lectus, quis vehicula nunc dapibus at. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Fusce at est nec quam tempus aliquet non sit amet ipsum. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin eu egestas tortor, non scelerisque nisi. Maecenas ultrices ante a velit lobortis, in aliquam tellus scelerisque. Donec auctor viverra rhoncus. 176 | 177 | Nullam lobortis convallis lorem eu accumsan. Sed dui ante, aliquam eget velit at, fermentum pellentesque lorem. Sed metus nisl, tincidunt sed pharetra ut, maximus id dui. Nulla aliquet magna eget nibh commodo venenatis eu eu tortor. Donec in orci ut nisl placerat tincidunt vitae vel diam. Quisque quis bibendum erat, faucibus blandit metus. Donec eget placerat orci, at vehicula tortor. Etiam purus nisl, tempor id massa quis, mollis condimentum velit. Ut egestas mi urna, quis ultrices ipsum tempus eget. Etiam lorem mauris, iaculis convallis metus sed, varius malesuada turpis. Mauris fringilla nunc ac orci ullamcorper rhoncus. Suspendisse dignissim tellus nisi, eget tempor tortor maximus sit amet. 178 | 179 | Ut bibendum enim eu justo aliquet imperdiet. Aliquam in aliquam erat, id euismod nunc. Donec dui erat, semper vitae lectus ut, placerat imperdiet augue. Ut mauris metus, ultrices et dignissim ut, hendrerit eget tellus. Nulla facilisi. Pellentesque pretium, massa at luctus convallis, dui urna pulvinar sapien, ut commodo erat est vel metus. Nulla sit amet purus nisl. Mauris eget gravida dolor. Etiam ligula sapien, consequat vel erat eget, finibus euismod nisi. Phasellus ornare libero placerat urna faucibus dictum. Nulla at faucibus enim. Nam tincidunt dolor nibh, rhoncus condimentum augue imperdiet condimentum. Ut rhoncus eget ex quis dignissim. Nulla accumsan dui ut urna commodo, ac efficitur sapien laoreet. In malesuada eros ac dui consectetur, lobortis iaculis sem mollis. 180 | 181 | Mauris placerat nulla vitae nulla condimentum mattis. Quisque lobortis elit ut pretium vestibulum. Nunc finibus leo et justo ultrices vulputate. Aliquam egestas, dui et bibendum pellentesque, lacus mauris facilisis dolor, sed dictum elit ipsum vel mauris. Ut tortor orci, feugiat sit amet nibh et, volutpat aliquam metus. Sed porta dolor purus, id mattis erat suscipit nec. In hac habitasse platea dictumst. Praesent quis augue tincidunt, eleifend augue quis, vehicula elit. Fusce vel mauris eu massa lobortis feugiat. Aenean hendrerit lacinia nunc, at venenatis mi rutrum vitae. Sed ut lacus et dui eleifend gravida eu in neque. 182 | 183 | Donec consequat eros at ultricies lacinia. Suspendisse nisi leo, rhoncus at aliquet elementum, cursus ut neque. In non erat at nulla iaculis molestie mollis hendrerit eros. In finibus scelerisque ex, eget rutrum metus finibus sit amet. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Donec non luctus ligula, ut tempor lorem. Morbi egestas volutpat metus sit amet iaculis. Suspendisse eros sapien, auctor vel faucibus quis, tristique vitae purus. Aenean lobortis diam nibh, a hendrerit quam egestas eu. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Maecenas ornare ac mi at consectetur. Donec nec euismod sapien. Cras facilisis a diam non imperdiet. Sed non arcu diam. 184 | 185 | Suspendisse potenti. Duis turpis turpis, imperdiet a accumsan in, consectetur ut justo. Mauris porttitor elit metus, non aliquet ex condimentum ac. Nam at eleifend tortor. Proin in ullamcorper quam. Duis tempor purus in est convallis vehicula. In eu ullamcorper nunc. Nam feugiat justo non risus pharetra fermentum. Nunc in orci nec ligula finibus bibendum eget nec sapien. Fusce enim elit, laoreet ac pulvinar vitae, convallis a odio. Sed malesuada purus eu massa sollicitudin pulvinar. Integer lobortis eleifend posuere. Quisque consectetur magna vitae nunc imperdiet tincidunt vitae ornare nunc. 186 | 187 | Nam convallis, sem at porttitor tempus, enim nunc tristique sem, tincidunt suscipit turpis ligula non urna. Nullam tellus eros, auctor at rhoncus id, bibendum fringilla augue. Sed fermentum felis quis nulla egestas, id aliquet urna tempor. Praesent ut orci diam. Nunc scelerisque ipsum arcu, in ornare quam feugiat vel. Sed ullamcorper turpis justo, nec suscipit elit lobortis a. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Etiam dignissim arcu non est volutpat, nec pellentesque justo scelerisque. 188 | 189 | Curabitur ac maximus ante, non consequat urna. Proin turpis mi, egestas nec justo quis, semper cursus augue. Morbi auctor tempus quam id viverra. Aenean at est ut ante lobortis pharetra. Cras eros nisl, mollis ac euismod vel, ornare quis enim. Sed risus neque, accumsan nec orci sagittis, maximus pharetra purus. Quisque imperdiet purus est, et porttitor ligula ultrices ut. Donec congue lorem eget ornare porttitor. Nam egestas orci nec erat commodo bibendum. Fusce fringilla velit at diam elementum, vitae fringilla quam varius. Donec congue sapien id nisl pharetra viverra. 190 | 191 | Donec et lectus quis purus convallis iaculis eu sed est. Donec euismod, ante vitae faucibus vulputate, neque magna facilisis enim, elementum blandit tellus libero id purus. Fusce ut pretium lectus. In ac elit bibendum, placerat justo in, mollis nulla. Aliquam non ex eu nisl placerat pretium. Sed fringilla rhoncus aliquet. Aliquam vulputate posuere rhoncus. Fusce feugiat massa erat, et vehicula turpis mattis ut. Pellentesque nec sodales velit, et mattis magna. Proin mattis ipsum eget dictum ornare. Aliquam vitae porttitor enim. Aenean ut ornare nisi, ut imperdiet diam. Maecenas maximus eros vitae lacinia posuere. Quisque ac enim vulputate, sodales odio at, commodo dolor. 192 | 193 | In finibus nisl consequat, efficitur dolor nec, condimentum ligula. Sed nulla nunc, posuere sed consequat a, porttitor sed risus. Phasellus ornare fermentum felis sit amet pretium. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Vivamus mattis laoreet sapien sit amet facilisis. Mauris a ipsum quis elit sagittis euismod. Mauris aliquam lectus nec nibh pellentesque sodales. 194 | 195 | Etiam tortor massa, dapibus ac purus in, faucibus auctor nisl. Curabitur sed suscipit sem, vel sollicitudin metus. Praesent vulputate nisi congue dolor viverra scelerisque. Nunc fermentum urna eget purus faucibus laoreet. In eget aliquet risus. Praesent et massa iaculis, sollicitudin mauris sit amet, ultricies diam. Duis accumsan varius massa. Etiam efficitur dolor sed lobortis bibendum. Nunc at rutrum justo, ac ornare lacus. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Pellentesque viverra, mi eu tristique pharetra, justo mauris maximus urna, et dapibus mi sapien et nibh. Mauris id commodo nunc, at fermentum lorem. Aenean ac nisi odio. Curabitur mi felis, sollicitudin vitae consectetur ut, interdum ac sem. Nulla non odio facilisis, laoreet nibh ut, venenatis purus. In hac habitasse platea dictumst. 196 | 197 | Fusce euismod est ipsum, id cursus erat suscipit vitae. Mauris sit amet pellentesque ante, quis bibendum purus. Nulla at diam vestibulum justo laoreet vehicula vel quis arcu. Nam aliquet sed mauris in placerat. Nam imperdiet libero et eros mattis, sed viverra enim iaculis. Nam sed eros imperdiet, tempor neque nec, varius lectus. Nullam malesuada suscipit nibh id convallis. Pellentesque id dignissim quam. Praesent blandit, tellus a fringilla dignissim, risus justo dapibus enim, ut vehicula justo lacus eget turpis. Donec vel volutpat augue. Praesent quis rhoncus sapien. Nunc eu aliquam quam, vel semper tortor. Aliquam erat volutpat. Maecenas ullamcorper, nulla ac consequat pellentesque, sem libero rutrum velit, sed porttitor turpis nulla sit amet leo. Quisque ut leo mi. 198 | 199 | Maecenas tincidunt pulvinar interdum. Sed sagittis nisi sit amet tellus faucibus, non pretium erat molestie. Nam elementum dolor in erat faucibus egestas. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam feugiat mattis eros nec porttitor. Fusce finibus ultrices lacus eu pulvinar. Sed bibendum augue ante, at fringilla nisi tristique ac. Phasellus quam purus, laoreet at consequat ut, efficitur quis lorem. Phasellus justo nunc, efficitur ut laoreet efficitur, feugiat eu massa. Sed ullamcorper porta ex vitae iaculis. 200 | 201 | Morbi sed leo non leo hendrerit viverra. Duis fermentum dolor eget sapien rhoncus volutpat. Maecenas odio justo, condimentum ac libero a, bibendum facilisis est. Suspendisse vitae libero viverra, pharetra diam sed, hendrerit leo. Vivamus tempor lacinia venenatis. Vestibulum sed hendrerit sapien. In sit amet augue eget orci egestas luctus et fermentum orci. Morbi a erat quis tortor cursus convallis quis non purus. Suspendisse finibus luctus nunc at egestas. Vivamus volutpat nunc eu erat malesuada sollicitudin. Quisque semper pellentesque lorem, ac rutrum nibh placerat at. Aenean ut neque egestas, consectetur justo vitae, consectetur justo. Aliquam sit amet risus facilisis, dapibus neque sed, aliquet mauris. Etiam pulvinar auctor elit, cursus dapibus velit euismod a. Nullam eu risus ut nisi dignissim tincidunt in et orci. Donec mattis dui eu ipsum ullamcorper, in mollis lectus mollis. 202 | 203 | Cras ante eros, porta mollis purus ut, maximus varius dolor. Interdum et malesuada fames ac ante ipsum primis in faucibus. Mauris porttitor orci leo, eu luctus neque lobortis vitae. Sed tristique finibus ante, id posuere odio aliquam eget. Duis urna sapien, malesuada eu lectus nec, semper ultricies libero. Fusce ac efficitur eros, eget viverra lectus. Nunc vulputate hendrerit tempus. Integer rhoncus urna a ante lobortis, vitae eleifend purus fringilla. Etiam suscipit libero non turpis tincidunt varius. 204 | 205 | Quisque eu dapibus tortor. Maecenas sed consectetur sem. Ut quis ullamcorper turpis, eu porta enim. Aliquam egestas nunc magna, vel ullamcorper augue tincidunt vitae. Aenean viverra diam magna, at ornare nibh rutrum vel. Duis eget tincidunt ligula. In rhoncus id turpis nec blandit. Ut nec orci mi. Praesent dignissim ipsum augue, ac auctor sapien eleifend ut. Suspendisse potenti. Nunc lobortis dapibus sem, id tempor mi imperdiet a. 206 | 207 | Morbi ac rutrum massa, a ultricies risus. Suspendisse pharetra mollis cursus. Vivamus non tristique ex, nec iaculis enim. Vestibulum sodales ultricies nibh sit amet dignissim. Sed at felis urna. Aenean convallis leo vitae nisl consectetur, vitae congue justo maximus. Maecenas fringilla non dolor faucibus aliquet. Nam suscipit erat eget ante hendrerit varius. Vivamus dapibus neque sit amet velit interdum ornare. Nullam aliquam, lacus at ullamcorper dignissim, erat velit tincidunt diam, vel sodales velit tellus ut nisl. Nam commodo condimentum elit. Pellentesque rutrum ante sit amet consectetur egestas. Phasellus leo justo, rutrum at ornare varius, laoreet id enim. Proin tincidunt justo sed velit vulputate, sit amet feugiat sem ultricies. 208 | 209 | Sed vulputate velit sit amet leo pulvinar ullamcorper. Pellentesque ac ante scelerisque, blandit quam ut, venenatis urna. Aenean quis justo libero. Vivamus maximus ornare sollicitudin. Nulla non metus nulla. Pellentesque porta at justo id auctor. Morbi at rhoncus quam, rhoncus commodo nibh. In hac habitasse platea dictumst. Fusce quis libero a lectus iaculis vestibulum. 210 | 211 | Quisque erat lacus, fermentum et est ut, vestibulum venenatis ex. Nulla id aliquam nibh. Aenean gravida euismod vehicula. Nulla ornare pellentesque lectus, sed sollicitudin eros porttitor non. Nam non odio nec justo sagittis iaculis. Curabitur pellentesque lobortis neque et malesuada. Sed condimentum, enim eu pharetra finibus, enim nulla luctus lorem, vitae laoreet ipsum massa sit amet leo. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Praesent eu vestibulum mi, eu venenatis tellus. Suspendisse eu pretium tortor. Nulla vulputate sem non ipsum tincidunt sodales. Aliquam lectus neque, elementum in vehicula ut, sagittis et nibh. 212 | 213 | Sed pulvinar facilisis orci, at pulvinar dolor dignissim eu. Integer mattis ex nec lectus scelerisque semper. Etiam at facilisis leo. Aliquam erat volutpat. Nulla mattis fermentum malesuada. Quisque commodo a dui quis vehicula. Integer congue pharetra urna, sit amet mattis dolor porta eu. Morbi interdum elementum diam sed semper. Nulla cursus et elit eget semper. Vivamus mollis vehicula dui, sit amet mattis sem viverra a. Vivamus tempus, lacus id porta viverra, felis ligula blandit ipsum, nec consectetur velit sapien at nunc. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. 214 | 215 | Phasellus et est ac tortor egestas tristique a a justo. Morbi id nulla ullamcorper, imperdiet dui in, mattis est. Morbi a urna nulla. Phasellus sapien nisl, tempus vitae ante nec, convallis blandit libero. Donec sed mattis turpis, quis porta tellus. Vivamus velit velit, sollicitudin convallis convallis sed, commodo nec augue. Nulla a tincidunt justo. 216 | 217 | In interdum libero porttitor risus fermentum, vitae feugiat ex facilisis. Aliquam tincidunt at diam id feugiat. In sit amet blandit tellus. Donec eu rutrum nisi. Morbi congue odio mauris, eget porttitor est fermentum non. Cras ullamcorper dictum velit, non porttitor mauris. Fusce ut varius justo. Nunc vitae mi interdum, pharetra neque sed, luctus tellus. Integer ex nisl, placerat eu purus vel, maximus pellentesque risus. Donec commodo et diam luctus gravida. Phasellus finibus ante sit amet massa consectetur tincidunt. Suspendisse potenti. Donec ut turpis dignissim, dignissim lacus at, molestie dolor. Quisque vitae maximus dolor. 218 | 219 | Nunc dolor nibh, ultricies convallis cursus at, viverra ut velit. In blandit augue tellus, eu pharetra augue fermentum at. Suspendisse sollicitudin augue eget pharetra faucibus. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Interdum et malesuada fames ac ante ipsum primis in faucibus. Aliquam nec lobortis urna, at sagittis ex. Proin finibus faucibus tellus, quis congue mauris sagittis quis. 220 | 221 | Fusce id varius felis. Morbi consequat dolor a fringilla porta. Ut sit amet elit tortor. Suspendisse ut venenatis ligula. Curabitur eget lorem magna. Nulla sed tortor vestibulum, tempus leo et, lacinia elit. Etiam in purus a diam hendrerit tempor. Ut scelerisque sapien nisl, id efficitur metus aliquam vitae. Praesent congue lacus felis, in condimentum ex sollicitudin eget. Vivamus ullamcorper magna lacus, eu suscipit leo lacinia eu. In ultrices, nibh in aliquam fringilla, leo diam porta libero, in posuere neque tellus sed libero. 222 | 223 | Fusce a augue felis. Praesent viverra porttitor nisi, dapibus volutpat magna ultrices eu. Suspendisse nisl tortor, tristique at dictum non, scelerisque quis purus. Quisque vitae lectus non nisi aliquam tempor in sit amet sapien. Nulla dapibus vulputate dapibus. Vestibulum sed magna quis ex rhoncus semper. Nunc consequat elit eu semper pharetra. Curabitur pharetra metus commodo pharetra tincidunt. Mauris quis felis at nunc vulputate suscipit. Integer hendrerit fermentum ullamcorper. Cras pretium augue eget elit ullamcorper, id tincidunt arcu porttitor. Aenean porta augue non magna auctor, eget tincidunt ipsum facilisis. Cras facilisis, nisl eget eleifend aliquet, tellus nulla interdum ex, non egestas lorem tortor nec metus. 224 | 225 | Fusce ultrices, tellus ac finibus ullamcorper, lorem massa volutpat sem, sit amet viverra odio odio eget velit. Vestibulum eget urna elementum, interdum magna id, pellentesque sapien. Nam ipsum dolor, lacinia in volutpat at, porttitor mattis nulla. Etiam ultricies at libero id aliquet. Nullam id sodales purus. Etiam bibendum fringilla interdum. Cras quis orci volutpat, congue est at, suscipit elit. In porttitor, sapien vitae faucibus condimentum, metus massa laoreet massa, a dapibus risus lacus at dolor. Donec finibus eros vel commodo lacinia. Fusce molestie neque non ipsum rutrum sodales. Proin lectus libero, scelerisque vitae auctor ut, interdum et libero. 226 | 227 | In imperdiet dignissim dui in luctus. Sed neque arcu, aliquet sit amet elit vitae, venenatis tincidunt nulla. Nulla condimentum tristique elit, eu rutrum nulla egestas dapibus. Cras faucibus, leo a dapibus porta, magna diam tincidunt dui, vehicula efficitur odio eros eget nunc. Maecenas mollis nibh in mauris venenatis, nec condimentum quam tincidunt. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Fusce ac magna a velit imperdiet eleifend. Lorem ipsum dolor sit amet, consectetur adipiscing elit. 228 | 229 | Etiam nec efficitur neque. Vestibulum placerat justo sapien, eu imperdiet ipsum vehicula et. Praesent laoreet sapien lectus, vitae posuere justo varius ut. Morbi euismod rutrum odio sed fermentum. Phasellus vehicula mauris quis venenatis rhoncus. Aliquam erat volutpat. Cras mauris quam, cursus vitae mollis a, eleifend at libero. Sed dictum viverra maximus. 230 | 231 | Duis nibh tortor, pellentesque in mi eget, cursus scelerisque mi. Vivamus nisl leo, fringilla ut lorem vel, elementum varius eros. Vivamus porta lacinia metus, in posuere lectus congue in. Donec lacinia lacus id lectus faucibus, tempus auctor tellus aliquam. Vestibulum hendrerit bibendum maximus. Praesent vel diam ante. Fusce elementum erat vitae diam scelerisque faucibus. Nam dapibus quis ante et commodo. Fusce quis malesuada quam. 232 | 233 | Ut dignissim tempor tortor, at bibendum est hendrerit at. Nam in ligula ac enim dapibus tempus in a lectus. Sed mollis metus vel ullamcorper ultrices. Nullam nec semper magna. Nullam egestas velit vehicula arcu lacinia, sit amet scelerisque metus tempor. Donec quis ullamcorper magna, sit amet lobortis ante. Maecenas eget felis vitae nisi pulvinar volutpat vitae accumsan neque. Donec nec interdum lacus. Donec porttitor ullamcorper consequat. Duis varius, turpis quis finibus efficitur, nunc nulla condimentum justo, ut dapibus diam nisl sed est. Morbi leo ex, auctor eget nisl sed, iaculis pretium neque. 234 | 235 | Sed a tempus nulla. Aliquam id auctor sapien. In tortor enim, convallis eu tempor ut, aliquet eu turpis. Cras commodo arcu eget posuere condimentum. Pellentesque ornare diam id est malesuada, id fermentum dui rhoncus. Donec diam urna, posuere eu mauris vel, rhoncus porta nulla. Aenean efficitur commodo odio nec finibus. Mauris a dolor nec lectus fringilla tincidunt. Ut convallis lectus et tincidunt fringilla. Mauris scelerisque fermentum arcu nec cursus. Etiam ultricies quis mauris sit amet placerat. Donec non lobortis purus. 236 | 237 | Praesent ut felis eu lorem cursus sollicitudin eget quis orci. Curabitur est nisl, mattis non rhoncus non, mollis et nibh. Mauris porta, libero ut aliquet aliquam, sapien lorem elementum arcu, at tristique nulla felis ultricies nunc. Donec ut purus in lacus pretium vehicula ac ac justo. Nullam vel pharetra lorem, at congue sapien. Duis eleifend rhoncus diam id tincidunt. Vivamus laoreet mi ipsum, ut rhoncus nibh mollis faucibus. Nulla rhoncus tellus et mi aliquet, vitae imperdiet elit aliquet. Ut condimentum consectetur convallis. 238 | 239 | Aliquam erat volutpat. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Nullam sed est et lacus laoreet ultricies et sit amet sapien. Sed auctor nisi vel egestas tristique. Donec elementum velit non bibendum viverra. Integer ipsum purus, consequat id risus euismod, imperdiet lacinia tellus. In hac habitasse platea dictumst. Curabitur vel malesuada mauris, ac efficitur tellus. Donec in ultrices felis. Sed id est neque. 240 | 241 | Nulla ac mauris ultrices, mollis felis eget, tempus sem. Sed sit amet venenatis lectus, at fermentum leo. Donec a neque magna. Praesent viverra urna at gravida accumsan. Aenean efficitur enim accumsan enim vehicula, eget accumsan nibh pulvinar. Nam at dui quis urna tempus tempor ac venenatis mauris. Nam hendrerit enim vitae odio interdum molestie. Sed fermentum, mi sed porta cursus, tellus tellus eleifend diam, eget suscipit mauris turpis vitae risus. Sed non suscipit nisl. Morbi facilisis dapibus tortor, at consectetur urna tincidunt id. Ut tempor purus sed rhoncus pharetra. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Donec orci elit, faucibus vitae odio non, tempor congue augue. 242 | 243 | Pellentesque orci quam, pellentesque id lobortis sit amet, consequat a eros. Nunc a rutrum purus. Praesent aliquam ex nec tincidunt luctus. Integer sollicitudin lorem ipsum, in gravida quam pulvinar id. Nulla tempus ante id maximus eleifend. Donec ante risus, elementum sit amet lacus ut, finibus pretium arcu. In gravida justo non eros luctus, in convallis quam interdum. Nam at lectus congue, mollis orci vitae, tincidunt nisl. Sed sit amet consequat turpis. Sed venenatis convallis rhoncus. Curabitur interdum vestibulum magna, at vestibulum libero mattis sit amet. Mauris porttitor quam placerat, mollis dui non, imperdiet lorem. Maecenas aliquam finibus urna, eu aliquet diam pharetra vitae. 244 | 245 | Praesent rutrum vel metus nec varius. Mauris finibus a orci id vulputate. Nullam maximus dui eget tortor elementum, non pulvinar mi rhoncus. Donec ultrices dolor vitae est sollicitudin, non rutrum tellus fringilla. Aenean vitae metus et nulla faucibus placerat. Pellentesque et metus maximus, maximus sem non, aliquam sem. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Pellentesque id leo vitae sapien ultricies vehicula. Nunc quam odio, lacinia non finibus ut, elementum in neque. Donec a ipsum finibus, sollicitudin nibh nec, cursus libero. Phasellus laoreet, ex faucibus lobortis sagittis, lectus massa viverra nisi, eu facilisis urna leo blandit ex. Nulla cursus, velit egestas rhoncus faucibus, magna augue feugiat neque, vitae commodo ex leo sit amet elit. Donec malesuada risus ut mi rutrum placerat. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Quisque sodales, est vel ornare pharetra, risus orci placerat quam, id pellentesque mauris orci at orci. 246 | 247 | Aliquam consectetur urna in erat aliquet, molestie molestie purus malesuada. Morbi efficitur lorem lorem, ac semper enim elementum id. Praesent tempus massa sed vestibulum vestibulum. Quisque a dui sit amet justo tempor volutpat. Nullam vel sem nec eros ornare lacinia pretium vel enim. Phasellus aliquam facilisis felis nec ultrices. Mauris sed lorem eu dolor pretium facilisis id non leo. Proin in nisl malesuada, varius felis tincidunt, mollis odio. 248 | 249 | Morbi dictum erat quis nisl tincidunt, sed luctus arcu porttitor. Vivamus rutrum ante eget lectus volutpat commodo. Mauris dui orci, gravida quis libero ac, molestie gravida arcu. Ut ultricies neque in dui semper dictum. Pellentesque vulputate, arcu ut accumsan convallis, tortor sem viverra metus, eu aliquam justo nisi vitae orci. Curabitur porttitor condimentum risus quis porttitor. Sed vulputate maximus diam ut venenatis. Curabitur rutrum lobortis dui pulvinar lobortis. Duis pellentesque, velit ac pulvinar aliquam, arcu nulla aliquam augue, at iaculis leo augue ac metus. 250 | 251 | Cras bibendum neque ut consequat molestie. Nunc volutpat finibus porta. Aenean sollicitudin accumsan ipsum, et iaculis sapien elementum eu. Nullam egestas urna ipsum, vel porta nulla iaculis a. Mauris non ante felis. Ut lacus enim, porttitor eget sem tincidunt, efficitur tempor tellus. Sed quis mauris ullamcorper, pellentesque leo et, eleifend risus. Nam non convallis nisi. Nulla in orci vitae neque condimentum venenatis. Morbi velit leo, auctor at fringilla eget, placerat eu nisi. Morbi consequat metus arcu, viverra dapibus felis viverra vulputate. 252 | 253 | Cras et eros eget quam auctor rhoncus sed et enim. Nulla vitae dui ac ante gravida varius in eu mi. Duis venenatis ligula ut congue ornare. Integer mollis turpis magna, ut dapibus neque pulvinar at. Nullam nisi elit, fermentum quis volutpat ut, porttitor at tortor. Sed id aliquet ipsum. Aliquam lobortis metus euismod, suscipit quam at, semper tellus. Suspendisse dignissim id urna id rhoncus. Duis quis massa mollis, commodo odio nec, commodo mauris. Fusce ut vulputate velit, ut iaculis nisl. Ut eget imperdiet tortor. Mauris nec lorem congue, porttitor augue eget, facilisis nisl. In vel ullamcorper odio, et dignissim velit. 254 | 255 | Quisque a tellus fermentum, tincidunt massa ultrices, fermentum arcu. Nunc dignissim tempus erat sit amet feugiat. Vestibulum sed tellus tortor. Aliquam ligula orci, dictum id suscipit vel, sollicitudin tincidunt turpis. Curabitur aliquam blandit nibh vitae fringilla. Vestibulum id neque congue, imperdiet lorem eget, mattis lectus. Phasellus aliquet erat eget metus pulvinar, eget pretium est consequat. Donec in suscipit metus, id convallis neque. In ultrices tristique pretium. Nullam congue aliquet sem, at venenatis sapien tempor mollis. Quisque cursus ligula at cursus luctus. Ut et libero iaculis, sollicitudin risus quis, feugiat ipsum. 256 | 257 | Sed eu lacus tempus arcu sagittis dapibus. Morbi semper venenatis diam, sit amet ultrices dolor feugiat et. Sed faucibus cursus dolor, at pulvinar lacus posuere sit amet. Ut non diam vel turpis feugiat maximus id eget tortor. Sed tincidunt dui ut odio tincidunt rhoncus. Sed sollicitudin fermentum fringilla. Nulla efficitur est in velit feugiat, vel aliquam mauris tempus. Duis ut egestas diam. Morbi sapien risus, luctus ac suscipit in, molestie nec arcu. Nullam nec iaculis massa, a mattis velit. Nulla viverra ornare tincidunt. 258 | 259 | Sed venenatis ipsum mauris, id auctor nunc scelerisque sed. Nulla at eros sed ex pellentesque varius. Aenean consectetur vitae felis sit amet aliquet. Aenean semper neque at massa luctus fermentum. Praesent facilisis dolor non sagittis eleifend. Cras euismod nulla sit amet erat gravida, ac sagittis diam accumsan. Vivamus a nisl vel lectus tempor fermentum. Proin gravida augue ut faucibus tristique. Etiam rutrum elementum leo, et dapibus orci vulputate sed. 260 | 261 | Maecenas scelerisque, enim nec ornare euismod, nibh massa tristique diam, eu pretium ante libero a erat. Praesent eu auctor odio. Aenean feugiat ultricies dui, eu tincidunt risus efficitur a. Donec porttitor nec orci volutpat lacinia. Sed malesuada neque ac purus bibendum tempor. Aenean vel ligula ac quam iaculis tincidunt et quis felis. Proin nec dapibus lorem, in molestie enim. Duis mattis, tellus ac ornare tristique, mi orci suscipit sapien, sed imperdiet urna dui at lacus. 262 | 263 | Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Aenean id urna placerat, viverra augue sed, mattis quam. Mauris maximus neque eget enim rutrum feugiat. Nulla facilisi. Fusce consectetur tellus eget arcu sollicitudin, eu rhoncus nisl laoreet. Praesent quis tempus nulla, quis pretium tortor. Nullam vehicula gravida ultrices. Suspendisse sollicitudin aliquam eros, vel luctus est vehicula nec. Sed nec auctor quam, eget viverra neque. Nulla finibus dui ac volutpat consectetur. Aliquam lobortis elementum dolor, nec ullamcorper lorem euismod auctor. Vivamus pretium consequat erat non accumsan. Quisque fringilla risus ut sapien rutrum tincidunt vel vel ipsum. 264 | 265 | Vivamus malesuada dolor at lacus bibendum, a dapibus dui tincidunt. Donec faucibus commodo sapien ac tincidunt. Fusce at porta dolor. Nullam nec sem at risus blandit aliquam. Phasellus hendrerit imperdiet ligula, at auctor nisl. Nunc sit amet iaculis magna. Etiam venenatis libero at ex sodales, a sollicitudin mauris faucibus. Morbi sed erat semper arcu tempus ultrices. Proin massa velit, venenatis sit amet pharetra vitae, feugiat in elit. Donec luctus elit velit, blandit fringilla sapien eleifend et. Nunc in aliquam est. Aliquam nec lobortis dolor, non convallis enim. Vivamus convallis, sapien at suscipit pellentesque, libero enim vestibulum libero, at congue sem ipsum vel purus. Nullam congue egestas vehicula. Integer dictum augue dui, ut hendrerit dui facilisis in. Maecenas justo turpis, interdum sit amet enim et, malesuada gravida magna. 266 | 267 | Donec vitae quam ac libero congue viverra ac hendrerit nunc. Aenean viverra sem quis orci ultricies, at imperdiet magna dignissim. Vivamus efficitur laoreet erat, in consequat lacus molestie non. Suspendisse non sem ultricies, egestas risus et, aliquam magna. Maecenas in ante sit amet orci pretium vestibulum id vitae neque. Proin justo augue, lobortis at luctus et, mattis ac enim. Phasellus ac convallis tortor. Suspendisse nec leo a lectus sodales luctus a molestie nibh. Donec lobortis imperdiet risus, nec ornare tellus elementum ut. Cras iaculis et turpis quis imperdiet. Quisque nec tincidunt risus. Nulla tristique libero eu diam mollis finibus. In faucibus massa quis massa porta, sit amet maximus eros laoreet. Integer id metus rutrum, hendrerit urna id, tincidunt tortor. In sed massa ac tortor euismod consequat id nec quam. Fusce eleifend elit nec lobortis ullamcorper. 268 | 269 | Aenean vulputate nisi id cursus sodales. Aliquam ac elementum nisl. Suspendisse pretium metus arcu, pretium pulvinar lorem feugiat non. Proin id egestas ipsum. Duis pulvinar diam id quam egestas laoreet ac sit amet ante. Proin quis felis libero. Integer vel dapibus justo. Morbi sagittis feugiat sapien nec mollis. Mauris iaculis sed nisl sed finibus. Phasellus id leo sed tortor hendrerit euismod nec nec ipsum. Fusce id augue semper erat lobortis iaculis. 270 | 271 | Maecenas sollicitudin sodales pellentesque. Pellentesque rutrum metus sed orci vehicula, ultricies lacinia orci tincidunt. Integer at nibh tellus. Mauris sollicitudin iaculis diam, vitae facilisis nulla iaculis eget. Nam sodales ultricies vestibulum. Pellentesque ut erat consectetur, cursus metus a, placerat nunc. Proin tristique justo nisi, ac condimentum quam lobortis ut. Etiam eleifend ipsum id lacus pharetra, sed pulvinar dolor rhoncus. Donec venenatis ac quam vel bibendum. Vestibulum eget lacus consectetur, eleifend libero nec, facilisis mauris. Quisque egestas elit ac volutpat gravida. Fusce et justo accumsan, blandit neque et, maximus urna. Maecenas pulvinar metus diam, at egestas ipsum molestie eget. Proin imperdiet fermentum odio vitae efficitur. Cras at sem eu leo iaculis iaculis. Integer porta, odio rhoncus finibus facilisis, massa lacus varius sem, vitae iaculis eros magna et mi. 272 | 273 | Nam dignissim fringilla ante, in ultrices felis pulvinar vitae. Morbi blandit at ex in ornare. Nullam laoreet gravida interdum. Duis facilisis felis vitae orci venenatis feugiat. Sed tempor est quis augue facilisis sollicitudin. Phasellus metus orci, maximus vulputate sollicitudin et, finibus et ligula. Cras et convallis tortor. Nam in metus erat. 274 | 275 | Mauris nulla nunc, aliquam vel pharetra a, condimentum semper sapien. Nullam vitae arcu tincidunt, auctor risus nec, mattis risus. In nec porttitor tellus. Nulla quis neque blandit orci hendrerit placerat. Suspendisse potenti. Integer accumsan nisl eget est sollicitudin, vitae tincidunt turpis gravida. Nunc non elit rutrum, fermentum nisi ac, placerat nunc. Donec dignissim, turpis at hendrerit accumsan, enim mi porttitor ipsum, egestas placerat felis urna in libero. Nullam iaculis nisl eget orci consectetur gravida. Ut ac eros leo. Aenean a elit a dui feugiat lacinia id at odio. Sed dapibus enim nibh, id dictum leo venenatis eget. In dignissim facilisis ipsum non posuere. Cras quam ante, congue id velit ac, tempus semper libero. Praesent sem lectus, faucibus ac est eu, varius suscipit velit. 276 | 277 | Integer eget orci vestibulum lectus venenatis egestas. Cras a augue urna. Maecenas ornare rutrum sapien, eu bibendum quam. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nulla non nisi ut lectus pulvinar ultricies. Suspendisse ultrices, neque id ultrices cursus, augue massa rhoncus neque, quis blandit nisl ipsum id purus. Morbi sed libero enim. Mauris ac lectus quis diam dictum ullamcorper. Nullam sed ipsum ut urna faucibus condimentum. Nullam leo erat, dapibus ut turpis et, sollicitudin vestibulum sapien. Etiam porttitor condimentum enim nec vehicula. Morbi magna augue, tincidunt ac nunc nec, dignissim vulputate erat. Aliquam tincidunt, ligula vel fringilla luctus, mauris erat lacinia metus, vel tristique risus nulla eget risus. Etiam varius a elit eget dapibus. 278 | 279 | Sed libero magna, ultricies id interdum sed, tempus ut tortor. Duis accumsan condimentum sapien, id rhoncus justo bibendum quis. Aenean nibh diam, rhoncus vel aliquam in, tristique ac sapien. Sed luctus a neque nec laoreet. Sed justo mauris, ullamcorper quis elementum nec, facilisis ut purus. Duis sapien sapien, tempor et maximus ut, porttitor vel orci. Nulla facilisi. Duis lobortis tellus at turpis vehicula, et consectetur tellus iaculis. Cras tincidunt quam nec felis ornare porta. Nullam viverra elementum massa at tincidunt. Aenean in risus at ex feugiat interdum non id est. Morbi fringilla magna in ipsum convallis, hendrerit eleifend turpis ultricies. Phasellus efficitur dolor arcu, ut mattis sem consequat et. 280 | 281 | Mauris accumsan fringilla lorem, nec laoreet velit tincidunt eu. Phasellus tincidunt, odio eget aliquet pellentesque, diam turpis ultricies elit, sit amet ultricies odio metus at risus. Proin eget lacus commodo, convallis mauris congue, accumsan ex. Vestibulum libero mauris, porta eget auctor ac, faucibus et urna. Aenean imperdiet neque eget metus tincidunt ornare. Quisque non pretium ligula, sed accumsan ligula. Nam dolor sem, imperdiet eu erat accumsan, hendrerit interdum ipsum. Proin eleifend odio massa, eget ullamcorper augue pretium eu. Nunc hendrerit fermentum tincidunt. 282 | 283 | Nullam sagittis commodo facilisis. Proin in luctus ex, sed tristique lectus. Integer nec diam ac neque accumsan tempus et non orci. Aenean iaculis dolor sed semper mollis. Cras varius est nec placerat mattis. Aenean magna nunc, tempus eu felis ac, vestibulum feugiat sem. Nulla facilisi. Donec quis bibendum mauris. Aliquam sit amet aliquet erat. Aenean maximus neque et tincidunt finibus. 284 | 285 | Vestibulum a blandit lacus. Integer velit ante, cursus sed pharetra non, fringilla sit amet nisi. Vestibulum gravida enim sit amet velit auctor, sed faucibus justo pellentesque. Curabitur mauris risus, tempus vitae velit sit amet, feugiat mattis massa. Donec sapien lectus, suscipit vel aliquam ac, pretium nec lacus. Nam suscipit mattis gravida. In sed arcu ac eros posuere posuere et ut sapien. In et eleifend urna. In hac habitasse platea dictumst. Nunc viverra, diam in bibendum convallis, elit libero interdum diam, eget tincidunt est risus a nibh. Vestibulum mi ante, convallis volutpat augue sed, placerat vestibulum est. Proin sit amet neque diam. Morbi pretium posuere diam eget posuere. Aenean sed interdum elit. Donec leo felis, dictum non varius in, commodo ut ipsum. Quisque ac lorem vel arcu sollicitudin fermentum. 286 | 287 | Suspendisse vulputate sed libero blandit tincidunt. Morbi ac massa erat. Nulla tortor mauris, fermentum eu leo sit amet, imperdiet laoreet ante. Sed porta tortor ac rutrum accumsan. Maecenas id malesuada lectus. Donec sagittis sapien neque, at bibendum risus convallis at. Curabitur cursus, quam vel suscipit volutpat, felis tellus vehicula est, a mollis felis odio vel velit. Nullam non risus eu diam ullamcorper venenatis. Vestibulum faucibus accumsan neque ac pellentesque. 288 | 289 | Donec non dignissim libero. Suspendisse convallis egestas dui eu semper. Fusce mollis convallis nibh sit amet posuere. Aenean suscipit enim non augue viverra pretium. Quisque pellentesque magna ut arcu porta, nec tincidunt nulla convallis. Praesent vulputate eu ligula ultrices dapibus. Curabitur quis tortor eu arcu consectetur convallis in in est. Vestibulum orci odio, egestas euismod iaculis eget, semper in nisl. 290 | 291 | Cras suscipit dolor ac sem malesuada pharetra. Vivamus aliquet vestibulum enim, id vestibulum metus rhoncus at. Pellentesque ac augue urna. Vivamus congue neque vel lacus elementum varius. Aliquam quis enim sit amet arcu egestas tempor. Nunc bibendum nunc nunc, ac pulvinar lorem tempor nec. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nam nec quam tempor, porta purus ac, consequat quam. Nulla facilisi. 292 | 293 | In eget lorem faucibus magna efficitur fringilla sit amet in ligula. Vivamus eget augue suscipit, aliquam orci eget, faucibus nunc. Vestibulum feugiat sollicitudin arcu ac suscipit. Sed ut massa et nibh luctus molestie condimentum eget dolor. Vivamus nec finibus ligula. Curabitur euismod nisl bibendum massa dignissim, et fermentum risus sagittis. Sed quis aliquam purus. Aenean gravida pharetra erat, quis pellentesque nunc auctor eu. Suspendisse gravida justo a efficitur molestie. Duis et urna odio. Vivamus ornare tristique ultrices. 294 | 295 | Aenean rhoncus, nibh vitae elementum maximus, tellus lorem malesuada dolor, ut pulvinar leo libero iaculis mi. Vivamus ullamcorper magna nec erat iaculis, sit amet commodo lorem scelerisque. Nullam sit amet tortor vitae ligula placerat dapibus. Nullam euismod, sapien at rutrum mollis, dolor sapien condimentum erat, sit amet viverra metus turpis eu magna. Etiam id nibh condimentum, commodo magna maximus, convallis lorem. In mollis ipsum non orci fermentum maximus. Mauris ultricies nulla et accumsan accumsan. Fusce quis laoreet mi. Sed auctor, nibh id fringilla ullamcorper, metus orci rhoncus velit, ut imperdiet tellus odio a justo. Pellentesque ut augue ut risus semper gravida. Ut ac turpis non nibh pharetra pharetra et et nibh. Nulla nec cursus tortor, et dictum ligula. Pellentesque id quam mollis, bibendum justo at, eleifend libero. Nulla et odio eleifend, maximus nunc in, fringilla sem. Praesent condimentum in est vel fermentum. Mauris ut lacus est. 296 | 297 | Integer viverra risus sit amet tellus dignissim, facilisis accumsan ligula eleifend. Donec eu sem mauris. Donec augue nunc, bibendum fringilla nulla eu, ultricies placerat sapien. Sed magna massa, venenatis scelerisque porta in, facilisis non quam. In maximus tortor nec pretium consectetur. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Proin nec nisl ullamcorper, aliquet odio sed, posuere orci. In in tincidunt nisi, ac posuere nulla. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Vestibulum egestas condimentum sollicitudin. Pellentesque at vehicula lectus. Pellentesque laoreet tellus et ante tincidunt, sed sodales velit pellentesque. Nunc elementum semper convallis. 298 | 299 | Vivamus a odio in elit iaculis scelerisque et eu mi. Praesent laoreet turpis a nisi vestibulum, at porta lacus iaculis. Suspendisse potenti. Pellentesque nulla elit, mollis facilisis nibh quis, posuere rhoncus dui. Curabitur eget lectus sed velit tempor iaculis. Maecenas dictum consectetur magna eget aliquet. Quisque pharetra mauris sit amet enim ultrices consequat. 300 | 301 | Nulla tincidunt velit sit amet vehicula sollicitudin. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Nunc eu lacus odio. Duis eleifend elit dolor, sit amet rhoncus purus lobortis at. Aliquam ultricies, ante non pellentesque tempor, leo neque lobortis sapien, vel viverra urna ligula non purus. Mauris ac luctus velit. Duis varius odio in efficitur fringilla. Donec eu ante varius, mattis nibh at, ultrices dolor. Integer quis felis pretium, ultrices odio non, tincidunt est. Curabitur pellentesque mauris vitae maximus posuere. -------------------------------------------------------------------------------- /test/data/sentence-transformers-labse.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "1.0", 3 | "truncation": null, 4 | "padding": null, 5 | "added_tokens": [ 6 | { 7 | "id": 0, 8 | "content": "[PAD]", 9 | "single_word": false, 10 | "lstrip": false, 11 | "rstrip": false, 12 | "normalized": false, 13 | "special": true 14 | }, 15 | { 16 | "id": 100, 17 | "content": "[UNK]", 18 | "single_word": false, 19 | "lstrip": false, 20 | "rstrip": false, 21 | "normalized": false, 22 | "special": true 23 | }, 24 | { 25 | "id": 101, 26 | "content": "[CLS]", 27 | "single_word": false, 28 | "lstrip": false, 29 | "rstrip": false, 30 | "normalized": false, 31 | "special": true 32 | }, 33 | { 34 | "id": 102, 35 | "content": "[SEP]", 36 | "single_word": false, 37 | "lstrip": false, 38 | "rstrip": false, 39 | "normalized": false, 40 | "special": true 41 | }, 42 | { 43 | "id": 103, 44 | "content": "[MASK]", 45 | "single_word": false, 46 | "lstrip": false, 47 | "rstrip": false, 48 | "normalized": false, 49 | "special": true 50 | } 51 | ], 52 | "normalizer": { 53 | "type": "BertNormalizer", 54 | "clean_text": true, 55 | "handle_chinese_chars": true, 56 | "strip_accents": null, 57 | "lowercase": false 58 | }, 59 | "pre_tokenizer": { 60 | "type": "BertPreTokenizer" 61 | }, 62 | "post_processor": { 63 | "type": "TemplateProcessing", 64 | "single": [ 65 | { 66 | "SpecialToken": { 67 | "id": "[CLS]", 68 | "type_id": 0 69 | } 70 | }, 71 | { 72 | "Sequence": { 73 | "id": "A", 74 | "type_id": 0 75 | } 76 | }, 77 | { 78 | "SpecialToken": { 79 | "id": "[SEP]", 80 | "type_id": 0 81 | } 82 | } 83 | ], 84 | "pair": [ 85 | { 86 | "SpecialToken": { 87 | "id": "[CLS]", 88 | "type_id": 0 89 | } 90 | }, 91 | { 92 | "Sequence": { 93 | "id": "A", 94 | "type_id": 0 95 | } 96 | }, 97 | { 98 | "SpecialToken": { 99 | "id": "[SEP]", 100 | "type_id": 0 101 | } 102 | }, 103 | { 104 | "Sequence": { 105 | "id": "B", 106 | "type_id": 1 107 | } 108 | }, 109 | { 110 | "SpecialToken": { 111 | "id": "[SEP]", 112 | "type_id": 1 113 | } 114 | } 115 | ], 116 | "special_tokens": { 117 | "[CLS]": { 118 | "id": "[CLS]", 119 | "ids": [ 120 | 101 121 | ], 122 | "tokens": [ 123 | "[CLS]" 124 | ] 125 | }, 126 | "[SEP]": { 127 | "id": "[SEP]", 128 | "ids": [ 129 | 102 130 | ], 131 | "tokens": [ 132 | "[SEP]" 133 | ] 134 | } 135 | } 136 | }, 137 | "decoder": { 138 | "type": "WordPiece", 139 | "prefix": "##", 140 | "cleanup": true 141 | }, 142 | "model": { 143 | "type": "WordPiece", 144 | "unk_token": "[UNK]", 145 | "continuing_subword_prefix": "##", 146 | "max_input_chars_per_word": 100, 147 | "vocab": { 148 | "[PAD]": 0, 149 | "[CLS]":101, 150 | "[SEP]":102, 151 | "brown": 51775, 152 | "fox": 193284, 153 | "jumps": 333915, 154 | "over": 15444, 155 | "the": 14985, 156 | "lazy": 221123, 157 | "dog": 22452, 158 | "[":164, 159 | "CLS":304910, 160 | "]":166, 161 | "SEP":211703 162 | } 163 | } 164 | } -------------------------------------------------------------------------------- /tokenizer.go: -------------------------------------------------------------------------------- 1 | package tokenizers 2 | 3 | // TODO packaging: how do we build the rust lib for distribution? 4 | 5 | /* 6 | #cgo LDFLAGS: -ltokenizers -ldl -lm -lstdc++ 7 | #include 8 | #include "tokenizers.h" 9 | */ 10 | import "C" 11 | 12 | // NOTE: There should be NO space between the comments and the `import "C"` line. 13 | import ( 14 | "fmt" 15 | "io" 16 | "net/http" 17 | "os" 18 | "path/filepath" 19 | "strings" 20 | "sync" 21 | "unsafe" 22 | ) 23 | 24 | const ( 25 | WANT_VERSION = "2.20.2" 26 | 27 | baseURL = "https://huggingface.co" 28 | ) 29 | 30 | // List of necessary tokenizer files and their mandatory status. 31 | // True means mandatory, false means optional. 32 | var tokenizerFiles = map[string]bool{ 33 | "tokenizer.json": true, 34 | "vocab.txt": false, 35 | "merges.txt": false, 36 | "special_tokens_map.json": false, 37 | "added_tokens.json": false, 38 | } 39 | 40 | func init() { 41 | version := C.tokenizers_version() 42 | got := C.GoString(version) 43 | if got != WANT_VERSION { 44 | panic(fmt.Errorf("tokenizers library version mismatch, want: %s, got: %s", WANT_VERSION, got)) 45 | } 46 | } 47 | 48 | type Tokenizer struct { 49 | tokenizer unsafe.Pointer 50 | } 51 | 52 | type tokenizerOpts struct { 53 | encodeSpecialTokens C.bool 54 | } 55 | 56 | type TokenizerOption func(to *tokenizerOpts) 57 | 58 | func WithEncodeSpecialTokens() TokenizerOption { 59 | return func(to *tokenizerOpts) { 60 | to.encodeSpecialTokens = C.bool(true) 61 | } 62 | } 63 | 64 | type TruncationDirection int 65 | 66 | const ( 67 | TruncationDirectionLeft TruncationDirection = iota 68 | TruncationDirectionRight 69 | ) 70 | 71 | var _ io.Closer = (*Tokenizer)(nil) 72 | 73 | func FromBytes(data []byte, opts ...TokenizerOption) (*Tokenizer, error) { 74 | allOpts := &tokenizerOpts{ 75 | // by default, we do not encode special tokens 76 | encodeSpecialTokens: C.bool(false), 77 | } 78 | for _, opt := range opts { 79 | opt(allOpts) 80 | } 81 | tokenizer := C.tokenizers_from_bytes((*C.uchar)(unsafe.Pointer(&data[0])), C.uint(len(data)), (*C.struct_tokenizers_options)(unsafe.Pointer(allOpts))) 82 | return &Tokenizer{tokenizer: tokenizer}, nil 83 | } 84 | 85 | func FromBytesWithTruncation(data []byte, maxLen uint32, dir TruncationDirection) (*Tokenizer, error) { 86 | tokenizer := C.tokenizers_from_bytes_with_truncation((*C.uchar)(unsafe.Pointer(&data[0])), C.uint(len(data)), C.uint(maxLen), C.uchar(dir)) 87 | return &Tokenizer{tokenizer: tokenizer}, nil 88 | } 89 | 90 | func FromFile(path string) (*Tokenizer, error) { 91 | cPath := C.CString(path) 92 | defer C.free(unsafe.Pointer(cPath)) 93 | tokenizer, err := C.tokenizers_from_file(cPath) 94 | if err != nil { 95 | return nil, err 96 | } 97 | return &Tokenizer{tokenizer: tokenizer}, nil 98 | } 99 | 100 | type tokenizerConfig struct { 101 | cacheDir *string 102 | authToken *string 103 | } 104 | 105 | type TokenizerConfigOption func(cfg *tokenizerConfig) 106 | 107 | func WithCacheDir(path string) TokenizerConfigOption { 108 | return func(cfg *tokenizerConfig) { 109 | cfg.cacheDir = &path 110 | } 111 | } 112 | 113 | func WithAuthToken(token string) TokenizerConfigOption { 114 | return func(cfg *tokenizerConfig) { 115 | cfg.authToken = &token 116 | } 117 | } 118 | 119 | // FromPretrained downloads necessary files and initializes the tokenizer. 120 | // Parameters: 121 | // - modelID: The Hugging Face model identifier (e.g., "bert-base-uncased"). 122 | // - destination: Optional. If provided and not nil, files will be downloaded to this folder. 123 | // If nil, a temporary directory will be used. 124 | // - authToken: Optional. If provided and not nil, it will be used to authenticate requests. 125 | func FromPretrained(modelID string, opts ...TokenizerConfigOption) (*Tokenizer, error) { 126 | cfg := &tokenizerConfig{} 127 | for _, opt := range opts { 128 | opt(cfg) 129 | } 130 | if strings.TrimSpace(modelID) == "" { 131 | return nil, fmt.Errorf("modelID cannot be empty") 132 | } 133 | 134 | // Construct the model URL 135 | modelURL := fmt.Sprintf("%s/%s/resolve/main", baseURL, modelID) 136 | 137 | // Determine the download directory 138 | var downloadDir string 139 | if cfg.cacheDir != nil { 140 | downloadDir = fmt.Sprintf("%s/%s", *cfg.cacheDir, modelID) 141 | // Create the destination directory if it doesn't exist 142 | err := os.MkdirAll(downloadDir, os.ModePerm) 143 | if err != nil { 144 | return nil, fmt.Errorf("failed to create destination directory %s: %w", downloadDir, err) 145 | } 146 | } else { 147 | // Create a temporary directory 148 | tmpDir, err := os.MkdirTemp("", "huggingface-tokenizer-*") 149 | if err != nil { 150 | return nil, fmt.Errorf("error creating temporary directory: %w", err) 151 | } 152 | downloadDir = tmpDir 153 | } 154 | 155 | var wg sync.WaitGroup 156 | errCh := make(chan error) 157 | 158 | // Download each tokenizer file concurrently 159 | for filename, isMandatory := range tokenizerFiles { 160 | wg.Add(1) 161 | go func(fn string, mandatory bool) { 162 | defer wg.Done() 163 | fileURL := fmt.Sprintf("%s/%s", modelURL, fn) 164 | destPath := filepath.Join(downloadDir, fn) 165 | err := downloadFile(fileURL, destPath, cfg.authToken) 166 | if err != nil && mandatory { 167 | // If the file is mandatory, report an error 168 | errCh <- fmt.Errorf("failed to download mandatory file %s: %w", fn, err) 169 | } 170 | }(filename, isMandatory) 171 | } 172 | 173 | go func() { 174 | wg.Wait() 175 | close(errCh) 176 | }() 177 | 178 | var errs []error 179 | for err := range errCh { 180 | errs = append(errs, err) 181 | } 182 | 183 | if len(errs) > 0 { 184 | if err := os.RemoveAll(downloadDir); err != nil { 185 | fmt.Printf("Warning: failed to clean up directory %s: %v\n", downloadDir, err) 186 | } 187 | return nil, errs[0] 188 | } 189 | 190 | return FromFile(filepath.Join(downloadDir, "tokenizer.json")) 191 | } 192 | 193 | // downloadFile downloads a file from the given URL and saves it to the specified destination. 194 | // If authToken is provided (non-nil), it will be used for authorization. 195 | // Returns an error if the download fails. 196 | func downloadFile(url, destination string, authToken *string) error { 197 | // Check if the file already exists 198 | if _, err := os.Stat(destination); err == nil { 199 | return nil 200 | } 201 | 202 | // Create a new HTTP request 203 | req, err := http.NewRequest("GET", url, nil) 204 | if err != nil { 205 | return fmt.Errorf("failed to create request for %s: %w", url, err) 206 | } 207 | 208 | // If authToken is provided, set the Authorization header 209 | if authToken != nil { 210 | req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *authToken)) 211 | } 212 | 213 | resp, err := http.DefaultClient.Do(req) 214 | if err != nil { 215 | return fmt.Errorf("failed to download from %s: %w", url, err) 216 | } 217 | defer resp.Body.Close() 218 | 219 | // Check for successful response 220 | if resp.StatusCode != http.StatusOK { 221 | return fmt.Errorf("failed to download from %s: status code %d", url, resp.StatusCode) 222 | } 223 | 224 | // Create the destination file 225 | out, err := os.Create(destination) 226 | if err != nil { 227 | return fmt.Errorf("failed to create file %s: %w", destination, err) 228 | } 229 | defer out.Close() 230 | 231 | // Write the response body to the file 232 | _, err = io.Copy(out, resp.Body) 233 | if err != nil { 234 | return fmt.Errorf("failed to write to file %s: %w", destination, err) 235 | } 236 | 237 | fmt.Printf("Successfully downloaded %s\n", destination) 238 | return nil 239 | } 240 | 241 | func (t *Tokenizer) Close() error { 242 | C.tokenizers_free_tokenizer(t.tokenizer) 243 | t.tokenizer = nil 244 | return nil 245 | } 246 | 247 | type Offset [2]uint 248 | 249 | type Encoding struct { 250 | IDs []uint32 251 | TypeIDs []uint32 252 | SpecialTokensMask []uint32 253 | AttentionMask []uint32 254 | Tokens []string 255 | Offsets []Offset 256 | } 257 | 258 | type encodeOpts struct { 259 | AddSpecialTokens C.bool 260 | 261 | ReturnTypeIDs C.bool 262 | ReturnTokens C.bool 263 | ReturnSpecialTokensMask C.bool 264 | ReturnAttentionMask C.bool 265 | ReturnOffsets C.bool 266 | } 267 | 268 | type EncodeOption func(eo *encodeOpts) 269 | 270 | func uintVecToSlice(arrPtr *C.uint, len int) []uint32 { 271 | arr := unsafe.Slice(arrPtr, len) 272 | slice := make([]uint32, len) 273 | for i, v := range arr { 274 | slice[i] = uint32(v) 275 | } 276 | return slice 277 | } 278 | 279 | func offsetVecToSlice(arrPtr *C.size_t, tokenLength int) []Offset { 280 | arr := unsafe.Slice(arrPtr, tokenLength*2) 281 | slice := make([]Offset, tokenLength) 282 | counter := 0 283 | for i := 0; i < tokenLength; i++ { 284 | offset := Offset{uint(arr[counter]), uint(arr[counter+1])} 285 | slice[i] = offset 286 | counter = counter + 2 287 | } 288 | return slice 289 | } 290 | 291 | func (t *Tokenizer) Encode(str string, addSpecialTokens bool) ([]uint32, []string) { 292 | cStr := C.CString(str) 293 | defer C.free(unsafe.Pointer(cStr)) 294 | options := encodeOpts{ 295 | AddSpecialTokens: C.bool(addSpecialTokens), 296 | ReturnTokens: C.bool(true), 297 | } 298 | res := C.tokenizers_encode(t.tokenizer, cStr, (*C.struct_tokenizers_encode_options)(unsafe.Pointer(&options))) 299 | len := int(res.len) 300 | if len == 0 { 301 | return nil, nil 302 | } 303 | defer C.tokenizers_free_buffer(res) 304 | 305 | ids := uintVecToSlice(res.ids, len) 306 | 307 | var tokens []string 308 | if res.tokens != nil { 309 | tokens = make([]string, len) 310 | for i, s := range (*[1 << 30]*C.char)(unsafe.Pointer(res.tokens))[:len:len] { 311 | tokens[i] = C.GoString(s) 312 | } 313 | } 314 | return ids, tokens 315 | } 316 | 317 | func WithReturnAllAttributes() EncodeOption { 318 | return func(eo *encodeOpts) { 319 | eo.ReturnTypeIDs = C.bool(true) 320 | eo.ReturnSpecialTokensMask = C.bool(true) 321 | eo.ReturnAttentionMask = C.bool(true) 322 | eo.ReturnTokens = C.bool(true) 323 | eo.ReturnOffsets = C.bool(true) 324 | } 325 | } 326 | 327 | func WithReturnTypeIDs() EncodeOption { 328 | return func(eo *encodeOpts) { 329 | eo.ReturnTypeIDs = C.bool(true) 330 | } 331 | } 332 | 333 | func WithReturnSpecialTokensMask() EncodeOption { 334 | return func(eo *encodeOpts) { 335 | eo.ReturnSpecialTokensMask = C.bool(true) 336 | } 337 | } 338 | 339 | func WithReturnTokens() EncodeOption { 340 | return func(eo *encodeOpts) { 341 | eo.ReturnTokens = C.bool(true) 342 | } 343 | } 344 | 345 | func WithReturnAttentionMask() EncodeOption { 346 | return func(eo *encodeOpts) { 347 | eo.ReturnAttentionMask = C.bool(true) 348 | } 349 | } 350 | 351 | func WithReturnOffsets() EncodeOption { 352 | return func(eo *encodeOpts) { 353 | eo.ReturnOffsets = C.bool(true) 354 | } 355 | } 356 | 357 | func (t *Tokenizer) EncodeWithOptions(str string, addSpecialTokens bool, opts ...EncodeOption) Encoding { 358 | cStr := C.CString(str) 359 | defer C.free(unsafe.Pointer(cStr)) 360 | 361 | encOptions := encodeOpts{ 362 | AddSpecialTokens: C.bool(addSpecialTokens), 363 | } 364 | for _, opt := range opts { 365 | opt(&encOptions) 366 | } 367 | 368 | res := C.tokenizers_encode(t.tokenizer, cStr, (*C.struct_tokenizers_encode_options)(unsafe.Pointer(&encOptions))) 369 | len := int(res.len) 370 | if len == 0 { 371 | return Encoding{} 372 | } 373 | defer C.tokenizers_free_buffer(res) 374 | 375 | encoding := Encoding{} 376 | encoding.IDs = uintVecToSlice(res.ids, len) 377 | 378 | if encOptions.ReturnTypeIDs && res.type_ids != nil { 379 | encoding.TypeIDs = uintVecToSlice(res.type_ids, len) 380 | } 381 | 382 | if encOptions.ReturnTokens && res.tokens != nil { 383 | tokens := make([]string, len) 384 | for i, s := range (*[1 << 30]*C.char)(unsafe.Pointer(res.tokens))[:len:len] { 385 | tokens[i] = C.GoString(s) 386 | } 387 | encoding.Tokens = tokens 388 | } 389 | 390 | if encOptions.ReturnSpecialTokensMask && res.special_tokens_mask != nil { 391 | encoding.SpecialTokensMask = uintVecToSlice(res.special_tokens_mask, len) 392 | } 393 | 394 | if encOptions.ReturnAttentionMask && res.attention_mask != nil { 395 | encoding.AttentionMask = uintVecToSlice(res.attention_mask, len) 396 | } 397 | 398 | if encOptions.ReturnOffsets && res.offsets != nil { 399 | encoding.Offsets = offsetVecToSlice(res.offsets, len) 400 | } 401 | 402 | return encoding 403 | } 404 | 405 | func (t *Tokenizer) Decode(tokenIDs []uint32, skipSpecialTokens bool) string { 406 | if len(tokenIDs) == 0 { 407 | return "" 408 | } 409 | len := C.uint(len(tokenIDs)) 410 | res := C.tokenizers_decode(t.tokenizer, (*C.uint)(unsafe.Pointer(&tokenIDs[0])), len, C.bool(skipSpecialTokens)) 411 | defer C.tokenizers_free_string(res) 412 | return C.GoString(res) 413 | } 414 | 415 | func (t *Tokenizer) VocabSize() uint32 { 416 | return uint32(C.tokenizers_vocab_size(t.tokenizer)) 417 | } 418 | -------------------------------------------------------------------------------- /tokenizer_test.go: -------------------------------------------------------------------------------- 1 | package tokenizers_test 2 | 3 | import ( 4 | _ "embed" 5 | "math/rand" 6 | "os" 7 | "path/filepath" 8 | "testing" 9 | 10 | "github.com/daulet/tokenizers" 11 | 12 | "github.com/stretchr/testify/assert" 13 | "github.com/stretchr/testify/require" 14 | ) 15 | 16 | //go:embed test/data/sentence-transformers-labse.json 17 | var embeddedBytes []byte 18 | 19 | // TODO test for leaks 20 | 21 | func TestInvalidConfigPath(t *testing.T) { 22 | _, err := tokenizers.FromFile("./non-existent.json") 23 | require.Error(t, err) 24 | } 25 | 26 | func TestEmbeddingConfig(t *testing.T) { 27 | tk, err := tokenizers.FromBytes(embeddedBytes) 28 | require.NoError(t, err) 29 | defer tk.Close() 30 | 31 | tests := []struct { 32 | name string 33 | str string 34 | addSpecial bool 35 | wantIDs []uint32 36 | wantTypeIDs []uint32 37 | wantTokens []string 38 | wantSpecialTokensMask []uint32 39 | wantAttentionMask []uint32 40 | wantOffsets []tokenizers.Offset 41 | }{ 42 | { 43 | name: "without special tokens", 44 | str: "brown fox jumps over the lazy dog", 45 | addSpecial: false, 46 | wantIDs: []uint32{0xca3f, 0x2f304, 0x5185b, 0x3c54, 0x3a89, 0x35fc3, 0x57b4}, 47 | wantTypeIDs: []uint32{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, 48 | wantTokens: []string{"brown", "fox", "jumps", "over", "the", "lazy", "dog"}, 49 | wantSpecialTokensMask: []uint32{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, 50 | wantAttentionMask: []uint32{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1}, 51 | wantOffsets: []tokenizers.Offset{{0x0, 0x5}, {0x6, 0x9}, {0xa, 0xf}, {0x10, 0x14}, {0x15, 0x18}, {0x19, 0x1d}, {0x1e, 0x21}}, 52 | }, 53 | { 54 | name: "with special tokens", 55 | str: "brown fox jumps over the lazy dog", 56 | addSpecial: true, 57 | wantIDs: []uint32{0x65, 0xca3f, 0x2f304, 0x5185b, 0x3c54, 0x3a89, 0x35fc3, 0x57b4, 0x66}, 58 | wantTypeIDs: []uint32{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, 59 | wantTokens: []string{"[CLS]", "brown", "fox", "jumps", "over", "the", "lazy", "dog", "[SEP]"}, 60 | wantSpecialTokensMask: []uint32{0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1}, 61 | wantAttentionMask: []uint32{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1}, 62 | wantOffsets: []tokenizers.Offset{{0x0, 0x0}, {0x0, 0x5}, {0x6, 0x9}, {0xa, 0xf}, {0x10, 0x14}, {0x15, 0x18}, {0x19, 0x1d}, {0x1e, 0x21}, {0x0, 0x0}}, 63 | }, 64 | } 65 | for _, tt := range tests { 66 | t.Run(tt.name, func(t *testing.T) { 67 | encoding := tk.EncodeWithOptions(tt.str, tt.addSpecial, tokenizers.WithReturnAllAttributes()) 68 | assert.Equal(t, tt.wantIDs, encoding.IDs, "wrong ids") 69 | assert.Equal(t, tt.wantTypeIDs, encoding.TypeIDs, "wrong type ids") 70 | assert.Equal(t, tt.wantTokens, encoding.Tokens, "wrong tokens") 71 | assert.Equal(t, tt.wantSpecialTokensMask, encoding.SpecialTokensMask, "wrong special tokens mask") 72 | assert.Equal(t, tt.wantAttentionMask, encoding.AttentionMask, "wrong attention mask") 73 | assert.Equal(t, tt.wantOffsets, encoding.Offsets, "wrong offsets") 74 | 75 | ids, tokens := tk.Encode(tt.str, tt.addSpecial) 76 | assert.Equal(t, tt.wantIDs, ids, "wrong ids") 77 | assert.Equal(t, tt.wantTokens, tokens, "wrong tokens") 78 | }) 79 | } 80 | } 81 | 82 | func TestEncodeWithAndWithoutOptions(t *testing.T) { 83 | tk, err := tokenizers.FromFile("./test/data/bert-base-uncased.json") 84 | require.NoError(t, err) 85 | defer tk.Close() 86 | tests := []struct { 87 | name string 88 | str string 89 | addSpecial bool 90 | wantIDs []uint32 91 | wantTypeIDs []uint32 92 | wantTokens []string 93 | wantSpecialTokensMask []uint32 94 | wantAttentionMask []uint32 95 | wantOffsets []tokenizers.Offset 96 | }{ 97 | { 98 | name: "without special tokens", 99 | str: "brown fox jumps over the lazy dog", 100 | addSpecial: false, 101 | wantIDs: []uint32{2829, 4419, 14523, 2058, 1996, 13971, 3899}, 102 | wantTypeIDs: []uint32{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, 103 | wantTokens: []string{"brown", "fox", "jumps", "over", "the", "lazy", "dog"}, 104 | wantSpecialTokensMask: []uint32{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, 105 | wantAttentionMask: []uint32{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1}, 106 | wantOffsets: []tokenizers.Offset{{0x0, 0x5}, {0x6, 0x9}, {0xa, 0xf}, {0x10, 0x14}, {0x15, 0x18}, {0x19, 0x1d}, {0x1e, 0x21}}, 107 | }, 108 | { 109 | name: "with special tokens", 110 | str: "brown fox jumps over the lazy dog", 111 | addSpecial: true, 112 | wantIDs: []uint32{101, 2829, 4419, 14523, 2058, 1996, 13971, 3899, 102}, 113 | wantTypeIDs: []uint32{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, 114 | wantTokens: []string{"[CLS]", "brown", "fox", "jumps", "over", "the", "lazy", "dog", "[SEP]"}, 115 | wantSpecialTokensMask: []uint32{0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1}, 116 | wantAttentionMask: []uint32{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1}, 117 | wantOffsets: []tokenizers.Offset{{0x0, 0x0}, {0x0, 0x5}, {0x6, 0x9}, {0xa, 0xf}, {0x10, 0x14}, {0x15, 0x18}, {0x19, 0x1d}, {0x1e, 0x21}, {0x0, 0x0}}, 118 | }, 119 | { 120 | name: "empty string", 121 | str: "", 122 | addSpecial: false, 123 | }, 124 | { 125 | name: "empty string with special tokens", 126 | str: "", 127 | addSpecial: true, 128 | wantTypeIDs: []uint32{0x0, 0x0}, 129 | wantSpecialTokensMask: []uint32{0x1, 0x1}, 130 | wantAttentionMask: []uint32{0x1, 0x1}, 131 | wantIDs: []uint32{101, 102}, 132 | wantTokens: []string{"[CLS]", "[SEP]"}, 133 | wantOffsets: []tokenizers.Offset{{0x0, 0x0}, {0x0, 0x0}}, 134 | }, 135 | { 136 | name: "invalid utf8 string", 137 | str: "\x91D", 138 | addSpecial: false, 139 | }, 140 | } 141 | for _, tt := range tests { 142 | t.Run(tt.name, func(t *testing.T) { 143 | encoding := tk.EncodeWithOptions(tt.str, tt.addSpecial, tokenizers.WithReturnAllAttributes()) 144 | assert.Equal(t, tt.wantIDs, encoding.IDs, "wrong ids") 145 | assert.Equal(t, tt.wantTypeIDs, encoding.TypeIDs, "wrong type ids") 146 | assert.Equal(t, tt.wantTokens, encoding.Tokens, "wrong tokens") 147 | assert.Equal(t, tt.wantSpecialTokensMask, encoding.SpecialTokensMask, "wrong special tokens mask") 148 | assert.Equal(t, tt.wantAttentionMask, encoding.AttentionMask, "wrong attention mask") 149 | assert.Equal(t, tt.wantOffsets, encoding.Offsets, "wrong offsets mask") 150 | 151 | ids, tokens := tk.Encode(tt.str, tt.addSpecial) 152 | assert.Equal(t, tt.wantIDs, ids, "wrong ids") 153 | assert.Equal(t, tt.wantTokens, tokens, "wrong tokens") 154 | }) 155 | } 156 | } 157 | 158 | func TestEncodeSpecialTokens(t *testing.T) { 159 | tk, err := tokenizers.FromBytes(embeddedBytes) 160 | require.NoError(t, err) 161 | // special tokens are not encoded by default, 162 | // meaning if input matches a special token, encoding will include the special token 163 | ids, _ := tk.Encode("[CLS]fox[SEP]", false) 164 | assert.Equal(t, []uint32{101, 193284, 102}, ids) 165 | tk.Close() 166 | 167 | tk, err = tokenizers.FromBytes(embeddedBytes, tokenizers.WithEncodeSpecialTokens()) 168 | require.NoError(t, err) 169 | ids, _ = tk.Encode("[CLS]fox[SEP]", false) 170 | // assert that special tokens 101 and 102 are not present 171 | assert.Equal(t, []uint32{164, 304910, 166, 193284, 164, 211703, 166}, ids) 172 | tk.Close() 173 | } 174 | 175 | func TestEncodeOptions(t *testing.T) { 176 | tk, err := tokenizers.FromFile("./test/data/bert-base-uncased.json") 177 | require.NoError(t, err) 178 | defer tk.Close() 179 | tests := []struct { 180 | name string 181 | str string 182 | addSpecial bool 183 | wantIDs []uint32 184 | wantTypeIDs []uint32 185 | wantTokens []string 186 | wantSpecialTokensMask []uint32 187 | wantAttentionMask []uint32 188 | wantOffsets []tokenizers.Offset 189 | }{ 190 | { 191 | name: "without special tokens", 192 | str: "brown fox jumps over the lazy dog", 193 | addSpecial: false, 194 | wantIDs: []uint32{2829, 4419, 14523, 2058, 1996, 13971, 3899}, 195 | wantTypeIDs: []uint32{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, 196 | wantTokens: []string{"brown", "fox", "jumps", "over", "the", "lazy", "dog"}, 197 | wantSpecialTokensMask: []uint32{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, 198 | wantAttentionMask: []uint32{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1}, 199 | wantOffsets: []tokenizers.Offset{{0x0, 0x5}, {0x6, 0x9}, {0xa, 0xf}, {0x10, 0x14}, {0x15, 0x18}, {0x19, 0x1d}, {0x1e, 0x21}}, 200 | }, 201 | } 202 | for _, tt := range tests { 203 | t.Run(tt.name, func(t *testing.T) { 204 | encoding := tk.EncodeWithOptions(tt.str, tt.addSpecial) 205 | assert.Equal(t, tt.wantIDs, encoding.IDs, "wrong ids") 206 | assert.Equal(t, []uint32(nil), encoding.TypeIDs, "wrong type ids") 207 | assert.Equal(t, []string(nil), encoding.Tokens, "wrong tokens") 208 | assert.Equal(t, []uint32(nil), encoding.SpecialTokensMask, "wrong special tokens mask") 209 | assert.Equal(t, []uint32(nil), encoding.AttentionMask, "wrong attention mask") 210 | assert.Equal(t, []tokenizers.Offset(nil), encoding.Offsets, "wrong offsets") 211 | 212 | encoding = tk.EncodeWithOptions(tt.str, tt.addSpecial, tokenizers.WithReturnTokens()) 213 | assert.Equal(t, tt.wantIDs, encoding.IDs, "wrong ids") 214 | assert.Equal(t, []uint32(nil), encoding.TypeIDs, "wrong type ids") 215 | assert.Equal(t, tt.wantTokens, encoding.Tokens, "wrong tokens") 216 | assert.Equal(t, []uint32(nil), encoding.SpecialTokensMask, "wrong special tokens mask") 217 | assert.Equal(t, []uint32(nil), encoding.AttentionMask, "wrong attention mask") 218 | assert.Equal(t, []tokenizers.Offset(nil), encoding.Offsets, "wrong offsets") 219 | 220 | encoding = tk.EncodeWithOptions(tt.str, tt.addSpecial, tokenizers.WithReturnTypeIDs()) 221 | assert.Equal(t, tt.wantIDs, encoding.IDs, "wrong ids") 222 | assert.Equal(t, tt.wantTypeIDs, encoding.TypeIDs, "wrong type ids") 223 | assert.Equal(t, []string(nil), encoding.Tokens, "wrong tokens") 224 | assert.Equal(t, []uint32(nil), encoding.SpecialTokensMask, "wrong special tokens mask") 225 | assert.Equal(t, []uint32(nil), encoding.AttentionMask, "wrong attention mask") 226 | assert.Equal(t, []tokenizers.Offset(nil), encoding.Offsets, "wrong offsets") 227 | 228 | encoding = tk.EncodeWithOptions(tt.str, tt.addSpecial, tokenizers.WithReturnSpecialTokensMask()) 229 | assert.Equal(t, tt.wantIDs, encoding.IDs, "wrong ids") 230 | assert.Equal(t, []uint32(nil), encoding.TypeIDs, "wrong type ids") 231 | assert.Equal(t, []string(nil), encoding.Tokens, "wrong tokens") 232 | assert.Equal(t, tt.wantSpecialTokensMask, encoding.SpecialTokensMask, "wrong special tokens mask") 233 | assert.Equal(t, []uint32(nil), encoding.AttentionMask, "wrong attention mask") 234 | assert.Equal(t, []tokenizers.Offset(nil), encoding.Offsets, "wrong offsets") 235 | 236 | encoding = tk.EncodeWithOptions(tt.str, tt.addSpecial, tokenizers.WithReturnAttentionMask()) 237 | assert.Equal(t, tt.wantIDs, encoding.IDs, "wrong ids") 238 | assert.Equal(t, []uint32(nil), encoding.TypeIDs, "wrong type ids") 239 | assert.Equal(t, []string(nil), encoding.Tokens, "wrong tokens") 240 | assert.Equal(t, []uint32(nil), encoding.SpecialTokensMask, "wrong special tokens mask") 241 | assert.Equal(t, tt.wantAttentionMask, encoding.AttentionMask, "wrong attention mask") 242 | assert.Equal(t, []tokenizers.Offset(nil), encoding.Offsets, "wrong offsets") 243 | 244 | encoding = tk.EncodeWithOptions(tt.str, tt.addSpecial, tokenizers.WithReturnOffsets()) 245 | assert.Equal(t, tt.wantIDs, encoding.IDs, "wrong ids") 246 | assert.Equal(t, []uint32(nil), encoding.TypeIDs, "wrong type ids") 247 | assert.Equal(t, []string(nil), encoding.Tokens, "wrong tokens") 248 | assert.Equal(t, []uint32(nil), encoding.SpecialTokensMask, "wrong special tokens mask") 249 | assert.Equal(t, []uint32(nil), encoding.AttentionMask, "wrong attention mask") 250 | assert.Equal(t, tt.wantOffsets, encoding.Offsets, "wrong offsets") 251 | }) 252 | } 253 | } 254 | 255 | func TestEncodeWithTruncation(t *testing.T) { 256 | tests := []struct { 257 | name string 258 | str string 259 | addSpecial bool 260 | maxLen int 261 | dir tokenizers.TruncationDirection 262 | wantIDs []uint32 263 | wantTokens []string 264 | }{ 265 | { 266 | name: "without special tokens, left truncation", 267 | str: "brown fox jumps over the lazy dog", 268 | addSpecial: false, 269 | maxLen: 5, 270 | dir: tokenizers.TruncationDirectionLeft, 271 | wantIDs: []uint32{0x5185b, 0x3c54, 0x3a89, 0x35fc3, 0x57b4}, 272 | wantTokens: []string{"jumps", "over", "the", "lazy", "dog"}, 273 | }, 274 | { 275 | name: "without special tokens, right truncation", 276 | str: "brown fox jumps over the lazy dog", 277 | addSpecial: false, 278 | maxLen: 5, 279 | dir: tokenizers.TruncationDirectionRight, 280 | wantIDs: []uint32{0xca3f, 0x2f304, 0x5185b, 0x3c54, 0x3a89}, 281 | wantTokens: []string{"brown", "fox", "jumps", "over", "the"}, 282 | }, 283 | { 284 | name: "with special tokens, left truncation", 285 | str: "brown fox jumps over the lazy dog", 286 | addSpecial: true, 287 | maxLen: 5, 288 | dir: tokenizers.TruncationDirectionLeft, 289 | wantIDs: []uint32{0x65, 0x3a89, 0x35fc3, 0x57b4, 0x66}, 290 | wantTokens: []string{"[CLS]", "the", "lazy", "dog", "[SEP]"}, 291 | }, 292 | { 293 | name: "with special tokens, right truncation", 294 | str: "brown fox jumps over the lazy dog", 295 | addSpecial: true, 296 | maxLen: 5, 297 | dir: tokenizers.TruncationDirectionRight, 298 | wantIDs: []uint32{0x65, 0xca3f, 0x2f304, 0x5185b, 0x66}, 299 | wantTokens: []string{"[CLS]", "brown", "fox", "jumps", "[SEP]"}, 300 | }, 301 | } 302 | for _, tt := range tests { 303 | t.Run(tt.name, func(t *testing.T) { 304 | tk, err := tokenizers.FromBytesWithTruncation(embeddedBytes, uint32(tt.maxLen), tt.dir) 305 | require.NoError(t, err) 306 | defer tk.Close() 307 | 308 | ids, tokens := tk.Encode(tt.str, tt.addSpecial) 309 | assert.Equal(t, tt.wantIDs, ids, "wrong ids") 310 | assert.Equal(t, tt.wantTokens, tokens, "wrong tokens") 311 | }) 312 | } 313 | } 314 | 315 | func TestEncodeWithPadding(t *testing.T) { 316 | tk, err := tokenizers.FromFile("./test/data/all-minilm-l6-v2.json") 317 | require.NoError(t, err) 318 | defer tk.Close() 319 | 320 | tests := []struct { 321 | name string 322 | str string 323 | addSpecial bool 324 | wantIDs []uint32 325 | wantTypeIDs []uint32 326 | wantTokens []string 327 | wantSpecialTokensMask []uint32 328 | wantAttentionMask []uint32 329 | wantOffsets []tokenizers.Offset 330 | }{ 331 | { 332 | name: "sentence with padding", 333 | str: "this short sentence", 334 | addSpecial: false, 335 | wantIDs: []uint32{0x7e7, 0x99c, 0x186b, 0x0, 0x0, 0x0, 0x0, 0x0}, 336 | wantTypeIDs: []uint32{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, 337 | wantTokens: []string{"this", "short", "sentence", "[PAD]", "[PAD]", "[PAD]", "[PAD]", "[PAD]"}, 338 | wantSpecialTokensMask: []uint32{0x0, 0x0, 0x0, 0x1, 0x1, 0x1, 0x1, 0x1}, 339 | wantAttentionMask: []uint32{0x1, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0}, 340 | wantOffsets: []tokenizers.Offset{{0x0, 0x4}, {0x5, 0xa}, {0xb, 0x13}, {0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}}, 341 | }, 342 | } 343 | for _, tt := range tests { 344 | t.Run(tt.name, func(t *testing.T) { 345 | encoding := tk.EncodeWithOptions(tt.str, tt.addSpecial, tokenizers.WithReturnAllAttributes()) 346 | assert.Equal(t, tt.wantIDs, encoding.IDs, "wrong ids") 347 | assert.Equal(t, tt.wantTypeIDs, encoding.TypeIDs, "wrong type ids") 348 | assert.Equal(t, tt.wantTokens, encoding.Tokens, "wrong tokens") 349 | assert.Equal(t, tt.wantSpecialTokensMask, encoding.SpecialTokensMask, "wrong special tokens mask") 350 | assert.Equal(t, tt.wantAttentionMask, encoding.AttentionMask, "wrong attention mask") 351 | assert.Equal(t, tt.wantOffsets, encoding.Offsets, "wrong offsets") 352 | 353 | ids, tokens := tk.Encode(tt.str, tt.addSpecial) 354 | assert.Equal(t, tt.wantIDs, ids, "wrong ids") 355 | assert.Equal(t, tt.wantTokens, tokens, "wrong tokens") 356 | }) 357 | } 358 | } 359 | 360 | func TestDecode(t *testing.T) { 361 | tk, err := tokenizers.FromFile("./test/data/bert-base-uncased.json") 362 | require.NoError(t, err) 363 | defer tk.Close() 364 | tests := []struct { 365 | name string 366 | tokens []uint32 367 | skipSpecial bool 368 | want string 369 | }{ 370 | { 371 | name: "without special tokens, skip special tokens", 372 | tokens: []uint32{2829, 4419, 14523, 2058, 1996, 13971, 3899}, 373 | skipSpecial: true, 374 | want: "brown fox jumps over the lazy dog", 375 | }, 376 | { 377 | name: "with special tokens, skip special tokens", 378 | tokens: []uint32{101, 2829, 4419, 14523, 2058, 1996, 13971, 3899, 102}, 379 | skipSpecial: true, 380 | want: "brown fox jumps over the lazy dog", 381 | }, 382 | { 383 | name: "without special tokens, don't skip special tokens", 384 | tokens: []uint32{2829, 4419, 14523, 2058, 1996, 13971, 3899}, 385 | skipSpecial: false, 386 | want: "brown fox jumps over the lazy dog", 387 | }, 388 | { 389 | name: "with special tokens, don't skip special tokens", 390 | tokens: []uint32{101, 2829, 4419, 14523, 2058, 1996, 13971, 3899, 102}, 391 | skipSpecial: false, 392 | want: "[CLS] brown fox jumps over the lazy dog [SEP]", 393 | }, 394 | { 395 | name: "no tokens", 396 | tokens: []uint32{}, 397 | skipSpecial: false, 398 | want: "", 399 | }, 400 | } 401 | for _, tt := range tests { 402 | t.Run(tt.name, func(t *testing.T) { 403 | got := tk.Decode(tt.tokens, tt.skipSpecial) 404 | assert.Equal(t, tt.want, got) 405 | }) 406 | } 407 | } 408 | 409 | func TestDecodeInvalidString(t *testing.T) { 410 | tk, err := tokenizers.FromFile("test/data/cohere-tokenizer.json") 411 | require.NoError(t, err) 412 | defer tk.Close() 413 | 414 | str := tk.Decode([]uint32{196}, true) 415 | assert.Empty(t, str) 416 | } 417 | 418 | func TestVocabSize(t *testing.T) { 419 | tk, err := tokenizers.FromFile("./test/data/bert-base-uncased.json") 420 | require.NoError(t, err) 421 | defer tk.Close() 422 | assert.Equal(t, uint32(30522), tk.VocabSize()) 423 | } 424 | 425 | func BenchmarkEncodeNTimes(b *testing.B) { 426 | tk, err := tokenizers.FromFile("./test/data/bert-base-uncased.json") 427 | require.NoError(b, err) 428 | defer tk.Close() 429 | expected := []uint32{2829, 4419, 14523, 2058, 1996, 13971, 3899} 430 | b.ResetTimer() 431 | for i := 0; i < b.N; i++ { 432 | ids, _ := tk.Encode("brown fox jumps over the lazy dog", false) 433 | assert.Equal(b, expected, ids) 434 | } 435 | } 436 | 437 | func BenchmarkEncodeNChars(b *testing.B) { 438 | tk, err := tokenizers.FromFile("./test/data/bert-base-uncased.json") 439 | require.NoError(b, err) 440 | defer tk.Close() 441 | vocabSize := tk.VocabSize() 442 | input := make([]rune, 0, b.N) 443 | for i := 0; i < b.N; i++ { 444 | input = append(input, rune(rand.Uint32()%vocabSize)) 445 | } 446 | str := string(input) 447 | b.ResetTimer() 448 | _, tokens := tk.Encode(str, false) 449 | assert.Greater(b, len(tokens), 0) 450 | } 451 | 452 | func BenchmarkDecodeNTimes(b *testing.B) { 453 | tk, err := tokenizers.FromFile("./test/data/bert-base-uncased.json") 454 | require.NoError(b, err) 455 | defer tk.Close() 456 | b.ResetTimer() 457 | for i := 0; i < b.N; i++ { 458 | str := tk.Decode([]uint32{2829, 4419, 14523, 2058, 1996, 13971, 3899}, true) 459 | assert.Equal(b, "brown fox jumps over the lazy dog", str) 460 | } 461 | } 462 | 463 | func BenchmarkDecodeNTokens(b *testing.B) { 464 | tk, err := tokenizers.FromFile("./test/data/bert-base-uncased.json") 465 | require.NoError(b, err) 466 | defer tk.Close() 467 | vocabSize := tk.VocabSize() 468 | input := make([]uint32, 0, b.N) 469 | for i := 0; i < b.N; i++ { 470 | input = append(input, rand.Uint32()%vocabSize) 471 | } 472 | b.ResetTimer() 473 | text := tk.Decode(input, true) 474 | // a token is one or more characters 475 | assert.Greater(b, len(text), b.N) 476 | } 477 | 478 | func TestFromPretrained(t *testing.T) { 479 | tests := []struct { 480 | name string 481 | modelID string 482 | setupOpts func(t *testing.T) ([]tokenizers.TokenizerConfigOption, string) 483 | wantErr bool 484 | expectedToken bool 485 | }{ 486 | { 487 | name: "valid public model with cache dir", 488 | modelID: "bert-base-uncased", 489 | expectedToken: true, 490 | setupOpts: func(t *testing.T) ([]tokenizers.TokenizerConfigOption, string) { 491 | tmpDir := t.TempDir() 492 | return []tokenizers.TokenizerConfigOption{ 493 | tokenizers.WithCacheDir(tmpDir), 494 | }, tmpDir 495 | }, 496 | }, 497 | { 498 | name: "valid public model without cache dir", 499 | modelID: "bert-base-uncased", 500 | expectedToken: true, 501 | setupOpts: func(t *testing.T) ([]tokenizers.TokenizerConfigOption, string) { 502 | return nil, "" 503 | }, 504 | }, 505 | { 506 | name: "private model with valid auth token", 507 | modelID: "bert-base-uncased", 508 | expectedToken: true, 509 | setupOpts: func(t *testing.T) ([]tokenizers.TokenizerConfigOption, string) { 510 | tmpDir := t.TempDir() 511 | return []tokenizers.TokenizerConfigOption{ 512 | tokenizers.WithCacheDir(tmpDir), 513 | tokenizers.WithAuthToken("test-token"), 514 | }, tmpDir 515 | }, 516 | }, 517 | { 518 | name: "private model with invalid auth token", 519 | modelID: "private-model", 520 | wantErr: true, 521 | setupOpts: func(t *testing.T) ([]tokenizers.TokenizerConfigOption, string) { 522 | tmpDir := t.TempDir() 523 | return []tokenizers.TokenizerConfigOption{ 524 | tokenizers.WithCacheDir(tmpDir), 525 | tokenizers.WithAuthToken("invalid-token"), 526 | }, tmpDir 527 | }, 528 | }, 529 | { 530 | name: "empty model ID", 531 | modelID: "", 532 | wantErr: true, 533 | setupOpts: func(t *testing.T) ([]tokenizers.TokenizerConfigOption, string) { 534 | return nil, "" 535 | }, 536 | }, 537 | { 538 | name: "nonexistent model", 539 | modelID: "nonexistent/model", 540 | wantErr: true, 541 | setupOpts: func(t *testing.T) ([]tokenizers.TokenizerConfigOption, string) { 542 | tmpDir := t.TempDir() 543 | return []tokenizers.TokenizerConfigOption{ 544 | tokenizers.WithCacheDir(tmpDir), 545 | }, tmpDir 546 | }, 547 | }, 548 | } 549 | 550 | for _, tt := range tests { 551 | t.Run(tt.name, func(t *testing.T) { 552 | opts, cacheDir := tt.setupOpts(t) 553 | tokenizer, err := tokenizers.FromPretrained(tt.modelID, opts...) 554 | 555 | if gotErr := err != nil; gotErr != tt.wantErr { 556 | t.Fatalf("expected error: %v, got error: %v", tt.wantErr, err) 557 | } 558 | if tt.wantErr { 559 | return 560 | } 561 | if cacheDir != "" { 562 | validateCache(t, cacheDir, tt.modelID) 563 | } 564 | if err := tokenizer.Close(); err != nil { 565 | t.Fatalf("error closing tokenizer: %v", err) 566 | } 567 | }) 568 | } 569 | } 570 | 571 | func validateCache(t *testing.T, dir string, modelID string) { 572 | t.Helper() 573 | files := []string{"tokenizer.json", "vocab.txt"} 574 | for _, file := range files { 575 | path := filepath.Join(dir, modelID, file) 576 | if _, err := os.Stat(path); err != nil { 577 | t.Errorf("expected file %s to exist in cache for model %s", file, modelID) 578 | } 579 | } 580 | } 581 | -------------------------------------------------------------------------------- /tokenizers.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | struct tokenizers_encode_options { 5 | bool add_special_token; 6 | bool return_type_ids; 7 | bool return_tokens; 8 | bool return_special_tokens_mask; 9 | bool return_attention_mask; 10 | bool return_offsets; 11 | }; 12 | 13 | struct tokenizers_options { 14 | bool encode_special_tokens; 15 | }; 16 | 17 | struct tokenizers_buffer { 18 | uint32_t *ids; 19 | uint32_t *type_ids; 20 | uint32_t *special_tokens_mask; 21 | uint32_t *attention_mask; 22 | char *tokens; 23 | size_t *offsets; 24 | uint32_t len; 25 | }; 26 | 27 | const char *tokenizers_version(); 28 | 29 | void *tokenizers_from_bytes(const uint8_t *config, uint32_t len, const struct tokenizers_options *options); 30 | 31 | void *tokenizers_from_bytes_with_truncation(const uint8_t *config, uint32_t len, uint32_t max_len, uint8_t direction); 32 | 33 | void *tokenizers_from_file(const char *config); 34 | 35 | struct tokenizers_buffer tokenizers_encode(void *ptr, const char *message, const struct tokenizers_encode_options *options); 36 | 37 | char *tokenizers_decode(void *ptr, const uint32_t *ids, uint32_t len, bool skip_special_tokens); 38 | 39 | uint32_t tokenizers_vocab_size(void *ptr); 40 | 41 | void tokenizers_free_tokenizer(void *ptr); 42 | 43 | void tokenizers_free_buffer(struct tokenizers_buffer buffer); 44 | 45 | void tokenizers_free_string(char *string); 46 | --------------------------------------------------------------------------------