├── .npmrc ├── .czrc ├── .commitlintrc ├── .gitignore ├── .husky └── commit-msg ├── foundry.toml ├── src ├── test │ ├── Assertions.sol │ ├── SliceAssertions.sol │ └── StrSliceAssertions.sol ├── utils │ ├── PackPtrLen.sol │ ├── memascii.sol │ ├── toString.sol │ ├── utf8.sol │ ├── unicode.sol │ ├── mem.sol │ └── memchr.sol ├── SliceIter.sol ├── StrChar.sol ├── StrCharsIter.sol ├── StrSlice.sol └── Slice.sol ├── .github └── workflows │ └── ci.yml ├── test ├── Utils.t.sol ├── Examples.t.sol ├── StrSliceAssertions.t.sol ├── SliceIter.t.sol ├── StrCharsIter.t.sol ├── SliceAssertions.t.sol ├── StrSlice.t.sol ├── StrChar.t.sol └── Slice.t.sol ├── package.json ├── LICENSE ├── CHANGELOG.md └── README.md /.npmrc: -------------------------------------------------------------------------------- 1 | message="chore(release): %s" -------------------------------------------------------------------------------- /.czrc: -------------------------------------------------------------------------------- 1 | { 2 | "path": "cz-conventional-changelog" 3 | } -------------------------------------------------------------------------------- /.commitlintrc: -------------------------------------------------------------------------------- 1 | { 2 | "extends": ["@commitlint/config-conventional"] 3 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | cache 3 | abi 4 | out 5 | yarn-error.log 6 | .vscode -------------------------------------------------------------------------------- /.husky/commit-msg: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | . "$(dirname -- "$0")/_/husky.sh" 3 | 4 | npx --no -- commitlint --edit "${1}" 5 | -------------------------------------------------------------------------------- /foundry.toml: -------------------------------------------------------------------------------- 1 | [profile.default] 2 | src = "src" 3 | out = "out" 4 | libs = ["node_modules"] 5 | include_paths = ["node_modules"] 6 | optimizer = true 7 | optimizer_runs = 1000000 8 | verbosity = 2 9 | 10 | [fuzz] 11 | runs = 4096 12 | 13 | [profile.ci] 14 | verbosity = 4 -------------------------------------------------------------------------------- /src/test/Assertions.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | import { SliceAssertions } from "./SliceAssertions.sol"; 6 | import { StrSliceAssertions } from "./StrSliceAssertions.sol"; 7 | 8 | /// @title Extension to PRBTest with Slice and StrSlice assertions. 9 | /// @dev Also provides lt,lte,gt,gte,contains for 2 native `bytes` and 2 native `string`. 10 | contract Assertions is SliceAssertions, StrSliceAssertions { 11 | } -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - "main" 7 | pull_request: 8 | branches: 9 | - "main" 10 | 11 | jobs: 12 | ci: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v3 16 | 17 | - name: Install Node.js 18 | uses: actions/setup-node@v3 19 | with: 20 | node-version: 18.12.1 21 | cache: yarn 22 | 23 | - name: Install Foundry 24 | uses: foundry-rs/foundry-toolchain@v1 25 | with: 26 | version: nightly 27 | 28 | - name: Install dependencies 29 | run: yarn install --immutable 30 | 31 | - name: Run tests 32 | run: yarn test 33 | 34 | - name: Build the contracts 35 | run: forge build --sizes -------------------------------------------------------------------------------- /test/Utils.t.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | import { PRBTest } from "@prb/test/src/PRBTest.sol"; 6 | 7 | import { toString } from "../src/utils/toString.sol"; 8 | 9 | contract UtilsTest is PRBTest { 10 | function testUintToString() public { 11 | for (uint256 value; value < 10000; value++) { 12 | assertEq(toString(value), vm.toString(value)); 13 | } 14 | for (uint256 value; value < 10000; value++) { 15 | assertEq(toString(10**77 - value), vm.toString(10**77 - value)); 16 | assertEq(toString(10**77 + value), vm.toString(10**77 + value)); 17 | } 18 | assertEq(toString(type(uint256).max - 1), vm.toString(type(uint256).max - 1)); 19 | assertEq(toString(type(uint256).max), vm.toString(type(uint256).max)); 20 | } 21 | 22 | function testUintToString__Fuzz(uint256 value) public { 23 | assertEq(toString(value), vm.toString(value)); 24 | } 25 | } -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@dk1a/solidity-stringutils", 3 | "version": "0.3.3", 4 | "author": "dk1a ", 5 | "license": "MIT", 6 | "description": "StrSlice & Slice library for Solidity", 7 | "keywords": [ 8 | "solidity", 9 | "library", 10 | "string", 11 | "slice", 12 | "ethereum", 13 | "smart-contracts" 14 | ], 15 | "repository": { 16 | "type": "git", 17 | "url": "https://github.com/dk1a/solidity-stringutils.git" 18 | }, 19 | "publishConfig": { 20 | "access": "public" 21 | }, 22 | "scripts": { 23 | "prepare": "husky install", 24 | "test": "forge test", 25 | "build": "forge build", 26 | "version": "conventional-changelog -p angular -i CHANGELOG.md -s && git add CHANGELOG.md" 27 | }, 28 | "files": [ 29 | "/src" 30 | ], 31 | "devDependencies": { 32 | "@commitlint/cli": "^17.4.2", 33 | "@commitlint/config-conventional": "^17.4.2", 34 | "@prb/test": "^0.2.1", 35 | "conventional-changelog-cli": "^2.2.2", 36 | "husky": "^8.0.3" 37 | }, 38 | "dependencies": {} 39 | } 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2022 Kirill Dmitriev 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. -------------------------------------------------------------------------------- /src/utils/PackPtrLen.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | error PackedPtrLen__PtrOverflow(); 6 | error PackedPtrLen__LenOverflow(); 7 | 8 | /** 9 | * @title Pack ptr and len uint128 values into 1 uint256. 10 | * @dev ptr is left/MSB. len is right/LSB. 11 | */ 12 | library PackPtrLen { 13 | uint256 constant MAX = type(uint128).max; 14 | 15 | uint256 constant MASK_PTR = uint256(type(uint128).max) << 128; 16 | uint256 constant MASK_LEN = uint256(type(uint128).max); 17 | 18 | function pack(uint256 ptr, uint256 len) internal pure returns (uint256 packed) { 19 | if (ptr > MAX) revert PackedPtrLen__PtrOverflow(); 20 | if (len > MAX) revert PackedPtrLen__LenOverflow(); 21 | return (ptr << 128) | (len & MASK_LEN); 22 | } 23 | 24 | function getPtr(uint256 packed) internal pure returns (uint256) { 25 | return packed >> 128; 26 | } 27 | 28 | function getLen(uint256 packed) internal pure returns (uint256) { 29 | return packed & MASK_LEN; 30 | } 31 | 32 | function setPtr(uint256 packed, uint256 ptr) internal pure returns (uint256) { 33 | return (packed & MASK_PTR) | (ptr << 128); 34 | } 35 | 36 | function setLen(uint256 packed, uint256 len) internal pure returns (uint256) { 37 | return (packed & MASK_LEN) | (len); 38 | } 39 | } -------------------------------------------------------------------------------- /src/utils/memascii.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | import { leftMask } from "./mem.sol"; 6 | 7 | /* 8 | * These functions are VERY DANGEROUS! 9 | * They operate directly on memory pointers, use with caution. 10 | * 11 | * Assembly here is marked as memory-safe for optimization. 12 | * The caller MUST use pointers in a memory-safe way! 13 | * https://docs.soliditylang.org/en/latest/assembly.html#memory-safety 14 | */ 15 | 16 | /// @dev 32 0x80 bytes. 0x80 = 1000_0000 17 | uint256 constant ASCII_MASK = 0x80 * (type(uint256).max / type(uint8).max); 18 | 19 | /** 20 | * @dev Efficiently checks if all bytes are within the ASCII range. 21 | */ 22 | function memIsAscii(uint256 textPtr, uint256 textLen) pure returns (bool) { 23 | uint256 tailLen; 24 | uint256 endPtr; 25 | // safe because tailLen <= textLen (ptr+len is implicitly safe) 26 | unchecked { 27 | tailLen = textLen % 32; 28 | endPtr = textPtr + (textLen - tailLen); 29 | } 30 | 31 | // check 32 byte chunks with the ascii mask 32 | uint256 b; 33 | while (textPtr < endPtr) { 34 | /// @solidity memory-safe-assembly 35 | assembly { 36 | b := mload(textPtr) 37 | } 38 | // break if any non-ascii byte is found 39 | if (b & ASCII_MASK != 0) { 40 | return false; 41 | } 42 | // safe because textPtr < endPtr, and endPtr = textPtr + n*32 (see tailLen) 43 | unchecked { 44 | textPtr += 32; 45 | } 46 | } 47 | 48 | // this mask removes any trailing bytes 49 | uint256 trailingMask = leftMask(tailLen); 50 | /// @solidity memory-safe-assembly 51 | assembly { 52 | b := and(mload(endPtr), trailingMask) 53 | } 54 | // check tail with the ascii mask 55 | return b & ASCII_MASK == 0; 56 | } -------------------------------------------------------------------------------- /src/utils/toString.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | uint256 constant ASCII_DIGIT_OFFSET = 0x30; 6 | // 96 = 78 rounded up to a multiple of 32 7 | // 78 = ceil(log10(2**256)) 8 | uint256 constant MAX_UINT256_STRING_LENGTH = 96; 9 | 10 | /** 11 | * @dev uint256 to string (decimal). 12 | * WARNING: this function is very optimized for gas, it's almost pure assembly. 13 | * Just use OpenZeppelin's toString for safety and readability. 14 | * 15 | * (this is ~100 gas/digit, OZ is ~1000) 16 | * 17 | * Derived from https://github.com/moodlezoup/sol2string 18 | */ 19 | function toString(uint256 value) pure returns (string memory str) { 20 | if (value <= 9) { 21 | // very fast path for 1 digit 22 | /// @solidity memory-safe-assembly 23 | assembly { 24 | // allocate memory (0x20 for length, 0x20 for content) 25 | str := mload(0x40) 26 | mstore(0x40, add(str, 0x40)) 27 | // store length 28 | mstore(str, 1) 29 | // store content 30 | mstore8(add(str, 0x20), add(value, ASCII_DIGIT_OFFSET)) 31 | } 32 | return str; 33 | } 34 | 35 | uint256 startPtr; 36 | uint256 slidingPtr; 37 | /// @solidity memory-safe-assembly 38 | assembly { 39 | // slidingPtr is confusing, here's an example if MAX_UINT256_STRING_LENGTH were equal 5: 40 | // length (0x20) (5) 41 | // |0000000000000000000000000000000000000000000000000000000000000000|0000000000| 42 | // ^startPtr ^slidingPtr; mstore will write to the 32 bytes which end here ^ 43 | // <== and the pointer slides from right to left, filling each LSB 44 | 45 | startPtr := mload(0x40) 46 | // note how slidingPtr doesn't include 0x20 for length 47 | slidingPtr := add(startPtr, MAX_UINT256_STRING_LENGTH) 48 | // overallocate memory 49 | // 0x20 for length, MAX_UINT256_STRING_LENGTH for content 50 | mstore(0x40, add(0x20, slidingPtr)) 51 | } 52 | 53 | // populate from right to left (lsb to msb) 54 | while (value != 0) { 55 | /// @solidity memory-safe-assembly 56 | assembly { 57 | let char := add( 58 | mod(value, 10), 59 | ASCII_DIGIT_OFFSET 60 | ) 61 | mstore(slidingPtr, char) 62 | slidingPtr := sub(slidingPtr, 1) 63 | value := div(value, 10) 64 | } 65 | } 66 | 67 | /// @solidity memory-safe-assembly 68 | assembly { 69 | let realLen := sub(MAX_UINT256_STRING_LENGTH, sub(slidingPtr, startPtr)) 70 | // move `str` pointer to the start of the string 71 | str := slidingPtr 72 | // store the real length 73 | mstore(str, realLen) 74 | } 75 | return str; 76 | } -------------------------------------------------------------------------------- /test/Examples.t.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | import { PRBTest } from "@prb/test/src/PRBTest.sol"; 6 | import { StrSliceAssertions } from "../src/test/StrSliceAssertions.sol"; 7 | 8 | import { StrSlice, toSlice, StrCharsIter } from "../src/StrSlice.sol"; 9 | import { StrChar__InvalidUTF8 } from "../src/StrChar.sol"; 10 | 11 | using { toSlice } for string; 12 | 13 | /// @dev Returns the content of brackets, or empty string if not found 14 | function extractFromBrackets(string memory stuffInBrackets) pure returns (StrSlice extracted) { 15 | StrSlice s = stuffInBrackets.toSlice(); 16 | bool found; 17 | 18 | (found, , s) = s.splitOnce(toSlice("(")); 19 | if (!found) return toSlice(""); 20 | 21 | (found, s, ) = s.rsplitOnce(toSlice(")")); 22 | if (!found) return toSlice(""); 23 | 24 | return s; 25 | } 26 | 27 | /// @dev Counts number of disjoint `_pat` in `_haystack` from the start 28 | /// Assumes valid UTF-8 29 | function countOccurrences(string memory _haystack, string memory _pat) pure returns (uint256 counter) { 30 | uint256 index; 31 | StrSlice haystack = _haystack.toSlice(); 32 | StrSlice pat = _pat.toSlice(); 33 | 34 | while (true) { 35 | index = haystack.find(pat); 36 | if (index == type(uint256).max) break; 37 | haystack = haystack.getSubslice(index + pat.len(), haystack.len()); 38 | counter++; 39 | } 40 | return counter; 41 | } 42 | 43 | /// @dev Returns a StrSlice of `str` with the 2 first UTF-8 characters removed 44 | /// reverts on invalid UTF8 45 | function removeFirstTwoChars(string memory str) pure returns (StrSlice) { 46 | StrCharsIter memory chars = str.toSlice().chars(); 47 | for (uint256 i; i < 2; i++) { 48 | if (chars.isEmpty()) break; 49 | chars.next(); 50 | } 51 | return chars.asStr(); 52 | } 53 | 54 | contract ExamplesTest is PRBTest, StrSliceAssertions { 55 | function testExtractFromBrackets() public { 56 | assertEq( 57 | extractFromBrackets("((1 + 2) + 3) + 4"), 58 | toSlice("(1 + 2) + 3") 59 | ); 60 | assertEq( 61 | extractFromBrackets("((1 + 2) + 3"), 62 | toSlice("(1 + 2") 63 | ); 64 | assertEq( 65 | extractFromBrackets("((1 + 2 + 3"), 66 | toSlice("") 67 | ); 68 | } 69 | 70 | function testCountOccurrences() public { 71 | assertEq(countOccurrences(",", ","), 1); 72 | assertEq(countOccurrences("1,2,3,456789,10", ","), 4); 73 | assertEq(countOccurrences("123", ","), 0); 74 | assertEq(countOccurrences(string(bytes(hex"FF")), "1"), 0); 75 | } 76 | 77 | function testRemoveFirstTwoChars() public { 78 | assertEq(removeFirstTwoChars("1"), ""); 79 | assertEq(removeFirstTwoChars("12345"), "345"); 80 | assertEq(removeFirstTwoChars(unicode"こんにちは"), unicode"にちは"); 81 | assertEq(removeFirstTwoChars(unicode"📎!こんにちは"), unicode"こんにちは"); 82 | } 83 | 84 | function testRemoveFirstTwoChars__InvalidUTF8() public { 85 | vm.expectRevert(StrChar__InvalidUTF8.selector); 86 | removeFirstTwoChars(string(bytes(hex"FF"))); 87 | } 88 | } -------------------------------------------------------------------------------- /src/SliceIter.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | import { mload8 } from "./utils/mem.sol"; 6 | import { Slice, Slice__ } from "./Slice.sol"; 7 | 8 | /** 9 | * @title Slice iterator. 10 | * @dev This struct is created by the iter method on `Slice`. 11 | * Iterates only 1 byte (uint8) at a time. 12 | */ 13 | struct SliceIter { 14 | uint256 _ptr; 15 | uint256 _len; 16 | } 17 | 18 | /*////////////////////////////////////////////////////////////////////////// 19 | CUSTOM ERRORS 20 | //////////////////////////////////////////////////////////////////////////*/ 21 | 22 | error SliceIter__StopIteration(); 23 | 24 | /*////////////////////////////////////////////////////////////////////////// 25 | STATIC FUNCTIONS 26 | //////////////////////////////////////////////////////////////////////////*/ 27 | 28 | library SliceIter__ { 29 | /** 30 | * @dev Creates a new `SliceIter` from `Slice`. 31 | * Note the `Slice` is assumed to be memory-safe. 32 | */ 33 | function from(Slice slice) internal pure returns (SliceIter memory) { 34 | return SliceIter(slice.ptr(), slice.len()); 35 | } 36 | } 37 | 38 | /*////////////////////////////////////////////////////////////////////////// 39 | GLOBAL FUNCTIONS 40 | //////////////////////////////////////////////////////////////////////////*/ 41 | 42 | using { asSlice, ptr, len, isEmpty, next, nextBack } for SliceIter global; 43 | 44 | /** 45 | * @dev Views the underlying data as a subslice of the original data. 46 | */ 47 | function asSlice(SliceIter memory self) pure returns (Slice slice) { 48 | return Slice__.fromUnchecked(self._ptr, self._len); 49 | } 50 | 51 | /** 52 | * @dev Returns the pointer to the start of an in-memory slice. 53 | */ 54 | function ptr(SliceIter memory self) pure returns (uint256) { 55 | return self._ptr; 56 | } 57 | 58 | /** 59 | * @dev Returns the length in bytes. 60 | */ 61 | function len(SliceIter memory self) pure returns (uint256) { 62 | return self._len; 63 | } 64 | 65 | /** 66 | * @dev Returns true if the iterator is empty. 67 | */ 68 | function isEmpty(SliceIter memory self) pure returns (bool) { 69 | return self._len == 0; 70 | } 71 | 72 | /** 73 | * @dev Advances the iterator and returns the next value. 74 | * Reverts if len == 0. 75 | */ 76 | function next(SliceIter memory self) pure returns (uint8 value) { 77 | uint256 selfPtr = self._ptr; 78 | uint256 selfLen = self._len; 79 | if (selfLen == 0) revert SliceIter__StopIteration(); 80 | 81 | // safe because selfLen != 0 (ptr+len is implicitly safe and 1<=len) 82 | unchecked { 83 | // advance the iterator 84 | self._ptr = selfPtr + 1; 85 | self._len = selfLen - 1; 86 | } 87 | 88 | return mload8(selfPtr); 89 | } 90 | 91 | /** 92 | * @dev Advances the iterator from the back and returns the next value. 93 | * Reverts if len == 0. 94 | */ 95 | function nextBack(SliceIter memory self) pure returns (uint8 value) { 96 | uint256 selfPtr = self._ptr; 97 | uint256 selfLen = self._len; 98 | if (selfLen == 0) revert SliceIter__StopIteration(); 99 | 100 | // safe because selfLen != 0 (ptr+len is implicitly safe) 101 | unchecked { 102 | // advance the iterator 103 | self._len = selfLen - 1; 104 | 105 | return mload8(selfPtr + (selfLen - 1)); 106 | } 107 | } -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## [0.3.3](https://github.com/dk1a/solidity-stringutils/compare/v0.3.1...v0.3.3) (2023-01-18) 2 | 3 | 4 | ### Bug Fixes 5 | 6 | * correct an annotation placement ([d73c0e6](https://github.com/dk1a/solidity-stringutils/commit/d73c0e62c51a3f538ba91170a42678094aea402c)) 7 | 8 | 9 | ### Features 10 | 11 | * **Slice:** add copyFromValue ([120e375](https://github.com/dk1a/solidity-stringutils/commit/120e37525234a5d148f4fe6ec93cac3feb45982b)) 12 | 13 | 14 | 15 | ## [0.3.2](https://github.com/dk1a/solidity-stringutils/compare/v0.3.1...v0.3.2) (2022-12-21) 16 | 17 | 18 | ### Bug Fixes 19 | 20 | * correct an annotation placement ([d73c0e6](https://github.com/dk1a/solidity-stringutils/commit/d73c0e62c51a3f538ba91170a42678094aea402c)) 21 | 22 | 23 | 24 | ## [0.3.1](https://github.com/dk1a/solidity-stringutils/compare/v0.3.0...v0.3.1) (2022-12-15) 25 | 26 | 27 | ### Features 28 | 29 | * add fast uint to string conversion ([5a975fe](https://github.com/dk1a/solidity-stringutils/commit/5a975fe509ad6e5cac5f07590f28faf6b2ca65e5)) 30 | * add isAscii ([dfb9916](https://github.com/dk1a/solidity-stringutils/commit/dfb9916b4477e34c382016f2b977f24389812685)) 31 | 32 | 33 | 34 | # [0.3.0](https://github.com/dk1a/solidity-stringutils/compare/v0.2.2...v0.3.0) (2022-12-12) 35 | 36 | 37 | ### Features 38 | 39 | * add optimizations to StrCharsIter, StrChar ([06a0b55](https://github.com/dk1a/solidity-stringutils/commit/06a0b55171af1e0d31e86327f4be6dafe2a6e6fc)) 40 | * add unicode code point support and tests for StrChar ([07f2047](https://github.com/dk1a/solidity-stringutils/commit/07f2047962992ef18712103d8ac08bd856213cb0)) 41 | 42 | 43 | 44 | ## [0.2.2](https://github.com/dk1a/solidity-stringutils/compare/v0.2.1...v0.2.2) (2022-12-11) 45 | 46 | 47 | ### Bug Fixes 48 | 49 | * fix critical issues in SliceIter tests ([6887ae4](https://github.com/dk1a/solidity-stringutils/commit/6887ae48ceb59c789930f748d35432954f1453c0)) 50 | * fix critical issues with nextBack in StrCharsIter and its tests; add optimizations ([6fcb355](https://github.com/dk1a/solidity-stringutils/commit/6fcb355baef25ac11a54097a20313bfe7fe96ce0)) 51 | 52 | 53 | 54 | ## [0.2.1](https://github.com/dk1a/solidity-stringutils/compare/v0.2.0...v0.2.1) (2022-12-09) 55 | 56 | 57 | 58 | # [0.2.0](https://github.com/dk1a/solidity-stringutils/compare/v0.1.1...v0.2.0) (2022-12-09) 59 | 60 | 61 | ### Features 62 | 63 | * add replacen ([3cc586b](https://github.com/dk1a/solidity-stringutils/commit/3cc586be116be77279f2004323380ea6742709fe)) 64 | * update readme ([b269b98](https://github.com/dk1a/solidity-stringutils/commit/b269b98a34eea64e3173721fc6d42af2107b9367)) 65 | * use memmove instead of memcpy ([8b6a6a5](https://github.com/dk1a/solidity-stringutils/commit/8b6a6a5dd009cf4e16ce8a42a4470678e4018454)) 66 | 67 | 68 | 69 | ## [0.1.1](https://github.com/dk1a/solidity-stringutils/compare/v0.1.0...v0.1.1) (2022-12-08) 70 | 71 | 72 | 73 | # 0.1.0 (2022-12-07) 74 | 75 | 76 | ### Bug Fixes 77 | 78 | * stripSuffix ([6fabda5](https://github.com/dk1a/solidity-stringutils/commit/6fabda5d7abe1617dc278304b831f1d173ae2218)) 79 | 80 | 81 | ### Features 82 | 83 | * add getAfterStrict, more tests ([2e5d62b](https://github.com/dk1a/solidity-stringutils/commit/2e5d62b87d3a889229b424f899256d827d268936)) 84 | * add splitOnce ([501dc41](https://github.com/dk1a/solidity-stringutils/commit/501dc41807f33671ce87607b63d3ea66be560802)) 85 | * add string slice, char, char iterator ([fe0a65e](https://github.com/dk1a/solidity-stringutils/commit/fe0a65e24bcbc87bf77c00ea8e1df3258d89d0b0)) 86 | * add StrSlice assertions ([f069e7e](https://github.com/dk1a/solidity-stringutils/commit/f069e7e964596c9fc269bfee6dfe83104f3d01d1)) 87 | * initial commit ([d4b2ed0](https://github.com/dk1a/solidity-stringutils/commit/d4b2ed0d63167bf98a4476b68f36fa00a0268b4f)) 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /src/utils/utf8.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | /** 6 | * @dev Returns the byte length for a UTF-8 character with the leading byte. 7 | * Returns 0 for invalid leading bytes. 8 | */ 9 | function utf8CharWidth(uint256 leadingByte) pure returns (uint256) { 10 | if (leadingByte < 0x80) { 11 | return 1; 12 | } else if (leadingByte < 0xC2) { 13 | return 0; 14 | } else if (leadingByte < 0xE0) { 15 | return 2; 16 | } else if (leadingByte < 0xF0) { 17 | return 3; 18 | } else if (leadingByte < 0xF5) { 19 | return 4; 20 | } else { 21 | return 0; 22 | } 23 | } 24 | 25 | /** 26 | * @dev Returns true if `b` is a valid UTF-8 leading byte. 27 | */ 28 | function isLeadingByte(uint256 b) pure returns (bool) { 29 | return utf8CharWidth(b) > 0; 30 | } 31 | 32 | /** 33 | * @dev Returns character length if the 1-4 bytes at MSB are a valid UTF-8 encoded character. 34 | * Returns 0 for invalid characters. 35 | * (utf8CharWidth validates ONLY the leading byte, not the whole character) 36 | * 37 | * Note if MSB is 0x00, this will return 1, since 0x00 is valid UTF-8. 38 | * Works faster for smaller code points. 39 | * 40 | * https://www.rfc-editor.org/rfc/rfc3629#section-4 41 | * UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4 42 | * UTF8-1 = %x00-7F 43 | * UTF8-2 = %xC2-DF UTF8-tail 44 | * UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / 45 | * %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) 46 | * UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / 47 | * %xF4 %x80-8F 2( UTF8-tail ) 48 | * UTF8-tail = %x80-BF 49 | */ 50 | function isValidUtf8(bytes32 b) pure returns (uint256) { 51 | // TODO you can significantly optimize comparisons with bitmasks, 52 | // some stuff to look at: 53 | // https://github.com/zwegner/faster-utf8-validator/blob/master/z_validate.c 54 | // https://github.com/websockets/utf-8-validate/blob/master/src/validation.c 55 | // https://github.com/simdutf/simdutf/blob/master/src/scalar/utf8.h 56 | 57 | uint8 first = uint8(b[0]); 58 | // UTF8-1 = %x00-7F 59 | if (first <= 0x7F) { 60 | // fast path for ascii 61 | return 1; 62 | } 63 | 64 | uint256 w = utf8CharWidth(first); 65 | if (w == 2) { 66 | // UTF8-2 67 | if ( 68 | // %xC2-DF UTF8-tail 69 | 0xC2 <= first && first <= 0xDF 70 | && _utf8Tail(uint8(b[1])) 71 | ) { 72 | return 2; 73 | } else { 74 | return 0; 75 | } 76 | } else if (w == 3) { 77 | uint8 second = uint8(b[1]); 78 | // UTF8-3 79 | bool valid12 = 80 | // = %xE0 %xA0-BF UTF8-tail 81 | first == 0xE0 82 | && 0xA0 <= second && second <= 0xBF 83 | // / %xE1-EC 2( UTF8-tail ) 84 | || 0xE1 <= first && first <= 0xEC 85 | && _utf8Tail(second) 86 | // / %xED %x80-9F UTF8-tail 87 | || first == 0xED 88 | && 0x80 <= second && second <= 0x9F 89 | // / %xEE-EF 2( UTF8-tail ) 90 | || 0xEE <= first && first <= 0xEF 91 | && _utf8Tail(second); 92 | 93 | if (valid12 && _utf8Tail(uint8(b[2]))) { 94 | return 3; 95 | } else { 96 | return 0; 97 | } 98 | } else if (w == 4) { 99 | uint8 second = uint8(b[1]); 100 | // UTF8-4 101 | bool valid12 = 102 | // = %xF0 %x90-BF 2( UTF8-tail ) 103 | first == 0xF0 104 | && 0x90 <= second && second <= 0xBF 105 | // / %xF1-F3 3( UTF8-tail ) 106 | || 0xF1 <= first && first <= 0xF3 107 | && _utf8Tail(second) 108 | // / %xF4 %x80-8F 2( UTF8-tail ) 109 | || first == 0xF4 110 | && 0x80 <= second && second <= 0x8F; 111 | 112 | if (valid12 && _utf8Tail(uint8(b[2])) && _utf8Tail(uint8(b[3]))) { 113 | return 4; 114 | } else { 115 | return 0; 116 | } 117 | } else { 118 | return 0; 119 | } 120 | } 121 | 122 | /// @dev UTF8-tail = %x80-BF 123 | function _utf8Tail(uint256 b) pure returns (bool) { 124 | // and,cmp should be faster than cmp,cmp,and 125 | // 0xC0 = 0b1100_0000, 0x80 = 0b1000_0000 126 | return b & 0xC0 == 0x80; 127 | } -------------------------------------------------------------------------------- /src/utils/unicode.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | import { utf8CharWidth } from "./utf8.sol"; 6 | 7 | /* 8 | * IMPORTANT: Here `uint256` represents 1 code point (aka unicode scalar values), 9 | * NOT a UTF-8 encoded character! 10 | * E.g. for '€' code point = 0x20AC; wheareas UTF-8 encoding = 0xE282AC. 11 | * 12 | * Only conversion to/from UTF-8 is addressed here. 13 | * Note that UTF-16 surrogate halves are invalid code points even if UTF-16 was supported. 14 | */ 15 | 16 | error Unicode__InvalidCode(); 17 | 18 | /// @dev The highest valid code point. 19 | uint256 constant MAX = 0x10FFFF; 20 | 21 | // UTF-8 ranges 22 | uint256 constant MAX_ONE_B = 0x80; 23 | uint256 constant MAX_TWO_B = 0x800; 24 | uint256 constant MAX_THREE_B = 0x10000; 25 | // and tags for encoding characters 26 | uint256 constant TAG_CONT = 0x80; 27 | uint256 constant TAG_TWO_B = 0xC0; 28 | uint256 constant TAG_THREE_B = 0xE0; 29 | uint256 constant TAG_FOUR_B = 0xF0; 30 | // and continuation byte mask 31 | uint256 constant MASK_CONT = 0x3F; 32 | 33 | /** 34 | * @dev Encodes a unicode code point as UTF-8. 35 | * Reverts if the code point is invalid. 36 | * The result is 1-4 bytes starting at MSB. 37 | */ 38 | function encodeUtf8(uint256 code) pure returns (bytes32) { 39 | if (code < MAX_ONE_B) { 40 | return bytes32( 41 | (code ) << (31 * 8) 42 | ); 43 | } else if (code < MAX_TWO_B) { 44 | return bytes32( 45 | (code >> 6 | TAG_TWO_B ) << (31 * 8) | 46 | (code & MASK_CONT | TAG_CONT ) << (30 * 8) 47 | ); 48 | } else if (code < MAX_THREE_B) { 49 | if (code & 0xF800 == 0xD800) { 50 | // equivalent to `code >= 0xD800 && code <= 0xDFFF` 51 | // U+D800–U+DFFF are invalid UTF-16 surrogate halves 52 | revert Unicode__InvalidCode(); 53 | } 54 | return bytes32( 55 | (code >> 12 | TAG_THREE_B) << (31 * 8) | 56 | (code >> 6 & MASK_CONT | TAG_CONT ) << (30 * 8) | 57 | (code & MASK_CONT | TAG_CONT ) << (29 * 8) 58 | ); 59 | } else if (code <= MAX) { 60 | return bytes32( 61 | (code >> 18 | TAG_FOUR_B ) << (31 * 8) | 62 | (code >> 12 & MASK_CONT | TAG_CONT ) << (30 * 8) | 63 | (code >> 6 & MASK_CONT | TAG_CONT ) << (29 * 8) | 64 | (code & MASK_CONT | TAG_CONT ) << (28 * 8) 65 | ); 66 | } else { 67 | revert Unicode__InvalidCode(); 68 | } 69 | } 70 | 71 | /** 72 | * @dev Decodes a UTF-8 character into its code point. 73 | * Validates ONLY the leading byte, use `isValidCodePoint` on the result if UTF-8 wasn't validated. 74 | * The input is 1-4 bytes starting at MSB. 75 | */ 76 | function decodeUtf8(bytes32 str) pure returns (uint256) { 77 | uint256 leadingByte = uint256(uint8(str[0])); 78 | uint256 width = utf8CharWidth(leadingByte); 79 | 80 | if (width == 1) { 81 | return leadingByte; 82 | } else if (width == 2) { 83 | uint256 byte1 = uint256(uint8(str[1])); 84 | return uint256( 85 | // 0x1F = 0001_1111 86 | (leadingByte & 0x1F ) << 6 | 87 | (byte1 & MASK_CONT) 88 | ); 89 | } else if (width == 3) { 90 | uint256 byte1 = uint256(uint8(str[1])); 91 | uint256 byte2 = uint256(uint8(str[2])); 92 | return uint256( 93 | // 0x0F = 0000_1111 94 | (leadingByte & 0x0F ) << 12 | 95 | (byte1 & MASK_CONT) << 6 | 96 | (byte2 & MASK_CONT) 97 | ); 98 | } else if (width == 4) { 99 | uint256 byte1 = uint256(uint8(str[1])); 100 | uint256 byte2 = uint256(uint8(str[2])); 101 | uint256 byte3 = uint256(uint8(str[3])); 102 | return uint256( 103 | // 0x07 = 0000_0111 104 | (leadingByte & 0x07 ) << 18 | 105 | (byte1 & MASK_CONT) << 12 | 106 | (byte2 & MASK_CONT) << 6 | 107 | (byte3 & MASK_CONT) 108 | ); 109 | } else { 110 | revert Unicode__InvalidCode(); 111 | } 112 | } 113 | 114 | /** 115 | * @dev Returns the length of a code point in UTF-8 encoding. 116 | * Does NOT validate it. 117 | * WARNING: atm this function is neither used nor tested in this repo 118 | */ 119 | function lenUtf8(uint256 code) pure returns (uint256) { 120 | if (code < MAX_ONE_B) { 121 | return 1; 122 | } else if (code < MAX_TWO_B) { 123 | return 2; 124 | } else if (code < MAX_THREE_B) { 125 | return 3; 126 | } else { 127 | return 4; 128 | } 129 | } 130 | 131 | /** 132 | * @dev Returns true if the code point is valid. 133 | * WARNING: atm this function is neither used nor tested in this repo 134 | */ 135 | function isValidCodePoint(uint256 code) pure returns (bool) { 136 | // U+D800–U+DFFF are invalid UTF-16 surrogate halves 137 | if (code < 0xD800) { 138 | return true; 139 | } else { 140 | return code > 0xDFFF && code <= MAX; 141 | } 142 | } -------------------------------------------------------------------------------- /test/StrSliceAssertions.t.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | import { PRBTest } from "@prb/test/src/PRBTest.sol"; 6 | import { StrSliceAssertions } from "../src/test/StrSliceAssertions.sol"; 7 | 8 | import { StrSlice, toSlice } from "../src/StrSlice.sol"; 9 | 10 | using { toSlice } for string; 11 | 12 | // StrSlice just wraps Slice's comparators, so these tests don't fuzz 13 | // TODO currently invalid UTF-8 compares like bytes, but should it revert? 14 | contract StrSliceAssertionsTest is PRBTest, StrSliceAssertions { 15 | /*////////////////////////////////////////////////////////////////////////// 16 | EQUALITY 17 | //////////////////////////////////////////////////////////////////////////*/ 18 | 19 | function testEq() public { 20 | string memory b = unicode"こんにちは"; 21 | // compare new assertions 22 | assertEq(b.toSlice(), b.toSlice()); 23 | assertEq(b.toSlice(), b); 24 | assertEq(b, b.toSlice()); 25 | 26 | assertLte(b.toSlice(), b.toSlice()); 27 | assertLte(b.toSlice(), b); 28 | assertLte(b, b.toSlice()); 29 | 30 | assertGte(b.toSlice(), b.toSlice()); 31 | assertGte(b.toSlice(), b); 32 | assertGte(b, b.toSlice()); 33 | // to the existing ones 34 | assertEq(b.toSlice().toString(), b.toSlice().toString()); 35 | assertEq(b.toSlice().toString(), b); 36 | assertEq(b, b.toSlice().toString()); 37 | } 38 | 39 | function testFailEq() public { 40 | assertEq(string(unicode"こん"), string(unicode"こ")); 41 | } 42 | 43 | function testNotEq() public { 44 | string memory b1 = unicode"こ"; 45 | string memory b2 = unicode"ん"; 46 | // compare new assertions 47 | assertNotEq(b1.toSlice(), b2.toSlice()); 48 | assertNotEq(b1.toSlice(), b2); 49 | assertNotEq(b1, b2.toSlice()); 50 | // to the existing ones 51 | assertNotEq(b1.toSlice().toString(), b2.toSlice().toString()); 52 | assertNotEq(b1.toSlice().toString(), b2); 53 | assertNotEq(b1, b2.toSlice().toString()); 54 | } 55 | 56 | function testFailNotEq() public { 57 | assertNotEq(string(unicode"こんにちは"), string(unicode"こんにちは")); 58 | } 59 | 60 | /*////////////////////////////////////////////////////////////////////////// 61 | LESS-THAN 62 | //////////////////////////////////////////////////////////////////////////*/ 63 | 64 | function testLt() public { 65 | string memory b1 = unicode"こ"; 66 | string memory b2 = unicode"ん"; 67 | 68 | assertLt(b1.toSlice(), b2.toSlice()); 69 | assertLt(b1.toSlice(), b2); 70 | assertLt(b1, b2.toSlice()); 71 | assertLt(b1, b2); 72 | 73 | assertLte(b1.toSlice(), b2.toSlice()); 74 | assertLte(b1.toSlice(), b2); 75 | assertLte(b1, b2.toSlice()); 76 | assertLte(b1, b2); 77 | } 78 | 79 | function testFailLt() public { 80 | string memory b1 = unicode"こ"; 81 | string memory b2 = unicode"ん"; 82 | 83 | assertLt(b2, b1); 84 | } 85 | 86 | function testFailLt__ForEq() public { 87 | string memory b = unicode"こ"; 88 | assertLt(b, b); 89 | } 90 | 91 | function testFailLte() public { 92 | string memory b1 = unicode"こ"; 93 | string memory b2 = unicode"ん"; 94 | 95 | assertLte(b2, b1); 96 | } 97 | 98 | /*////////////////////////////////////////////////////////////////////////// 99 | GREATER-THAN 100 | //////////////////////////////////////////////////////////////////////////*/ 101 | 102 | function testGt() public { 103 | string memory b1 = unicode"ん"; 104 | string memory b2 = unicode"こ"; 105 | 106 | assertGt(b1.toSlice(), b2.toSlice()); 107 | assertGt(b1.toSlice(), b2); 108 | assertGt(b1, b2.toSlice()); 109 | assertGt(b1, b2); 110 | 111 | assertGte(b1.toSlice(), b2.toSlice()); 112 | assertGte(b1.toSlice(), b2); 113 | assertGte(b1, b2.toSlice()); 114 | assertGte(b1, b2); 115 | } 116 | 117 | function testFailGt() public { 118 | string memory b1 = unicode"ん"; 119 | string memory b2 = unicode"こ"; 120 | 121 | assertGt(b2, b1); 122 | } 123 | 124 | function testFailGt__ForEq() public { 125 | string memory b = unicode"こ"; 126 | assertGt(b, b); 127 | } 128 | 129 | function testFailGte() public { 130 | string memory b1 = unicode"ん"; 131 | string memory b2 = unicode"こ"; 132 | 133 | assertGte(b2, b1); 134 | } 135 | 136 | /*////////////////////////////////////////////////////////////////////////// 137 | CONTAINS 138 | //////////////////////////////////////////////////////////////////////////*/ 139 | 140 | function testContains() public { 141 | string memory b1 = unicode"こんにちは"; 142 | string memory b2 = unicode"んにち"; 143 | 144 | assertContains(b1.toSlice(), b2.toSlice()); 145 | assertContains(b1.toSlice(), b2); 146 | assertContains(b1, b2.toSlice()); 147 | assertContains(b1, b2); 148 | } 149 | 150 | function testFailContains() public { 151 | string memory b1 = unicode"こんにちは"; 152 | string memory b2 = unicode"ここ"; 153 | 154 | assertContains(b1, b2); 155 | } 156 | } -------------------------------------------------------------------------------- /src/utils/mem.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | /* 6 | * These functions are VERY DANGEROUS! 7 | * They operate directly on memory pointers, use with caution. 8 | * 9 | * Assembly here is marked as memory-safe for optimization. 10 | * The caller MUST use pointers in a memory-safe way! 11 | * https://docs.soliditylang.org/en/latest/assembly.html#memory-safety 12 | */ 13 | 14 | /** 15 | * @dev Load 1 byte from the pointer. 16 | * The result is in the least significant byte, hence uint8. 17 | */ 18 | function mload8(uint256 ptr) pure returns (uint8 item) { 19 | /// @solidity memory-safe-assembly 20 | assembly { 21 | item := byte(0, mload(ptr)) 22 | } 23 | return item; 24 | } 25 | 26 | /** 27 | * @dev Copy `n` memory bytes. 28 | * WARNING: Does not handle pointer overlap! 29 | */ 30 | function memcpy(uint256 ptrDest, uint256 ptrSrc, uint256 length) pure { 31 | // copy 32-byte chunks 32 | while (length >= 32) { 33 | /// @solidity memory-safe-assembly 34 | assembly { 35 | mstore(ptrDest, mload(ptrSrc)) 36 | } 37 | // safe because total addition will be <= length (ptr+len is implicitly safe) 38 | unchecked { 39 | ptrDest += 32; 40 | ptrSrc += 32; 41 | length -= 32; 42 | } 43 | } 44 | // copy the 0-31 length tail 45 | // (the rest is an inlined `mstoreN`) 46 | uint256 mask = leftMask(length); 47 | /// @solidity memory-safe-assembly 48 | assembly { 49 | mstore(ptrDest, 50 | or( 51 | // store the left part 52 | and(mload(ptrSrc), mask), 53 | // preserve the right part 54 | and(mload(ptrDest), not(mask)) 55 | ) 56 | ) 57 | } 58 | } 59 | 60 | /** 61 | * @dev mstore `n` bytes (left-aligned) of `data` 62 | */ 63 | function mstoreN(uint256 ptrDest, bytes32 data, uint256 n) pure { 64 | uint256 mask = leftMask(n); 65 | /// @solidity memory-safe-assembly 66 | assembly { 67 | mstore(ptrDest, 68 | or( 69 | // store the left part 70 | and(data, mask), 71 | // preserve the right part 72 | and(mload(ptrDest), not(mask)) 73 | ) 74 | ) 75 | } 76 | } 77 | 78 | /** 79 | * @dev Copy `n` memory bytes using identity precompile. 80 | */ 81 | function memmove(uint256 ptrDest, uint256 ptrSrc, uint256 n) view { 82 | /// @solidity memory-safe-assembly 83 | assembly { 84 | pop( 85 | staticcall( 86 | gas(), // gas (unused is returned) 87 | 0x04, // identity precompile address 88 | ptrSrc, // argsOffset 89 | n, // argsSize: byte size to copy 90 | ptrDest, // retOffset 91 | n // retSize: byte size to copy 92 | ) 93 | ) 94 | } 95 | } 96 | 97 | /** 98 | * @dev Compare `n` memory bytes lexicographically. 99 | * Returns 0 for equal, < 0 for less than and > 0 for greater than. 100 | * 101 | * https://doc.rust-lang.org/std/cmp/trait.Ord.html#lexicographical-comparison 102 | */ 103 | function memcmp(uint256 ptrSelf, uint256 ptrOther, uint256 n) pure returns (int256) { 104 | // binary search for the first inequality 105 | while (n >= 32) { 106 | // safe because total addition will be <= n (ptr+len is implicitly safe) 107 | unchecked { 108 | uint256 nHalf = n / 2; 109 | if (memeq(ptrSelf, ptrOther, nHalf)) { 110 | ptrSelf += nHalf; 111 | ptrOther += nHalf; 112 | // (can't do n /= 2 instead of nHalf, some bytes would be skipped) 113 | n -= nHalf; 114 | // an explicit continue is better for optimization here 115 | continue; 116 | } else { 117 | n -= nHalf; 118 | } 119 | } 120 | } 121 | 122 | uint256 mask = leftMask(n); 123 | int256 diff; 124 | /// @solidity memory-safe-assembly 125 | assembly { 126 | // for <32 bytes subtraction can be used for comparison, 127 | // just need to shift away from MSB 128 | diff := sub( 129 | shr(8, and(mload(ptrSelf), mask)), 130 | shr(8, and(mload(ptrOther), mask)) 131 | ) 132 | } 133 | return diff; 134 | } 135 | 136 | /** 137 | * @dev Returns true if `n` memory bytes are equal. 138 | * 139 | * It's faster (up to 4x) than memcmp, especially on medium byte lengths like 32-320. 140 | * The benefit gets smaller for larger lengths, for 10000 it's only 30% faster. 141 | */ 142 | function memeq(uint256 ptrSelf, uint256 ptrOther, uint256 n) pure returns (bool result) { 143 | /// @solidity memory-safe-assembly 144 | assembly { 145 | result := eq(keccak256(ptrSelf, n), keccak256(ptrOther, n)) 146 | } 147 | } 148 | 149 | /** 150 | * @dev Left-aligned byte mask (e.g. for partial mload/mstore). 151 | * For length >= 32 returns type(uint256).max 152 | * 153 | * length 0: 0x000000...000000 154 | * length 1: 0xff0000...000000 155 | * length 2: 0xffff00...000000 156 | * ... 157 | * length 30: 0xffffff...ff0000 158 | * length 31: 0xffffff...ffff00 159 | * length 32+: 0xffffff...ffffff 160 | */ 161 | function leftMask(uint256 length) pure returns (uint256) { 162 | unchecked { 163 | return ~( 164 | type(uint256).max >> (length * 8) 165 | ); 166 | } 167 | } -------------------------------------------------------------------------------- /src/StrChar.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | import { isValidUtf8 as _isValidUtf8, utf8CharWidth } from "./utils/utf8.sol"; 6 | import { decodeUtf8, encodeUtf8 } from "./utils/unicode.sol"; 7 | import { leftMask } from "./utils/mem.sol"; 8 | 9 | /** 10 | * @title A single UTF-8 encoded character. 11 | * @dev Internally it is stored as UTF-8 encoded bytes starting from left/MSB. 12 | */ 13 | type StrChar is bytes32; 14 | 15 | /*////////////////////////////////////////////////////////////////////////// 16 | CUSTOM ERRORS 17 | //////////////////////////////////////////////////////////////////////////*/ 18 | 19 | error StrChar__InvalidUTF8(); 20 | 21 | /*////////////////////////////////////////////////////////////////////////// 22 | STATIC FUNCTIONS 23 | //////////////////////////////////////////////////////////////////////////*/ 24 | 25 | library StrChar__ { 26 | /** 27 | * @dev Converts the first 1-4 bytes of `bytes32` to a `StrChar`. 28 | * Starts from left/MSB, reverts if not valid UTF-8. 29 | * @param b UTF-8 encoded character in the most significant bytes. 30 | */ 31 | function from(bytes32 b) internal pure returns (StrChar char) { 32 | uint256 charLen = _isValidUtf8(b); 33 | if (charLen == 0) revert StrChar__InvalidUTF8(); 34 | return fromUnchecked(b, charLen); 35 | } 36 | 37 | /** 38 | * @dev Converts a unicode code point to a `StrChar`. 39 | * E.g. for '€' code point = 0x20AC; wheareas UTF-8 = 0xE282AC. 40 | */ 41 | function fromCodePoint(uint256 code) internal pure returns (StrChar char) { 42 | return StrChar.wrap(encodeUtf8(code)); 43 | } 44 | 45 | /** 46 | * @dev Like `from`, but does NO validity checks. 47 | * Uses provided `_len` instead of calculating it. This allows invalid/malformed characters. 48 | * 49 | * MSB of `bytes32` SHOULD be valid UTF-8. 50 | * And `bytes32` SHOULD be zero-padded after the first UTF-8 character. 51 | * Primarily for internal use. 52 | */ 53 | function fromUnchecked(bytes32 b, uint256 _len) internal pure returns (StrChar char) { 54 | return StrChar.wrap(bytes32( 55 | // zero-pad after the character 56 | uint256(b) & leftMask(_len) 57 | )); 58 | } 59 | } 60 | 61 | /*////////////////////////////////////////////////////////////////////////// 62 | GLOBAL FUNCTIONS 63 | //////////////////////////////////////////////////////////////////////////*/ 64 | 65 | using { 66 | len, 67 | toBytes32, toString, toCodePoint, 68 | cmp, eq, ne, lt, lte, gt, gte, 69 | isValidUtf8, 70 | isAscii 71 | } for StrChar global; 72 | 73 | /** 74 | * @dev Returns the character's length in bytes (1-4). 75 | * Returns 0 for some (not all!) invalid characters (e.g. due to unsafe use of fromUnchecked). 76 | */ 77 | function len(StrChar self) pure returns (uint256) { 78 | return utf8CharWidth( 79 | // extract the leading byte 80 | uint256(uint8(StrChar.unwrap(self)[0])) 81 | ); 82 | } 83 | 84 | /** 85 | * @dev Converts a `StrChar` to its underlying bytes32 value. 86 | */ 87 | function toBytes32(StrChar self) pure returns (bytes32) { 88 | return StrChar.unwrap(self); 89 | } 90 | 91 | /** 92 | * @dev Converts a `StrChar` to a newly allocated `string`. 93 | */ 94 | function toString(StrChar self) pure returns (string memory str) { 95 | uint256 _len = self.len(); 96 | str = new string(_len); 97 | /// @solidity memory-safe-assembly 98 | assembly { 99 | mstore(add(str, 0x20), self) 100 | } 101 | return str; 102 | } 103 | 104 | /** 105 | * @dev Converts a `StrChar` to its unicode code point (aka unicode scalar value). 106 | */ 107 | function toCodePoint(StrChar self) pure returns (uint256) { 108 | return decodeUtf8(StrChar.unwrap(self)); 109 | } 110 | 111 | /** 112 | * @dev Compare characters lexicographically. 113 | * @return result 0 for equal, < 0 for less than and > 0 for greater than. 114 | */ 115 | function cmp(StrChar self, StrChar other) pure returns (int256 result) { 116 | uint256 selfUint = uint256(StrChar.unwrap(self)); 117 | uint256 otherUint = uint256(StrChar.unwrap(other)); 118 | if (selfUint > otherUint) { 119 | return 1; 120 | } else if (selfUint < otherUint) { 121 | return -1; 122 | } else { 123 | return 0; 124 | } 125 | } 126 | 127 | /// @dev `self` == `other` 128 | function eq(StrChar self, StrChar other) pure returns (bool) { 129 | return uint256(StrChar.unwrap(self)) == uint256(StrChar.unwrap(other)); 130 | } 131 | 132 | /// @dev `self` != `other` 133 | function ne(StrChar self, StrChar other) pure returns (bool) { 134 | return uint256(StrChar.unwrap(self)) != uint256(StrChar.unwrap(other)); 135 | } 136 | 137 | /// @dev `self` < `other` 138 | function lt(StrChar self, StrChar other) pure returns (bool) { 139 | return uint256(StrChar.unwrap(self)) < uint256(StrChar.unwrap(other)); 140 | } 141 | 142 | /// @dev `self` <= `other` 143 | function lte(StrChar self, StrChar other) pure returns (bool) { 144 | return uint256(StrChar.unwrap(self)) <= uint256(StrChar.unwrap(other)); 145 | } 146 | 147 | /// @dev `self` > `other` 148 | function gt(StrChar self, StrChar other) pure returns (bool) { 149 | return uint256(StrChar.unwrap(self)) > uint256(StrChar.unwrap(other)); 150 | } 151 | 152 | /// @dev `self` >= `other` 153 | function gte(StrChar self, StrChar other) pure returns (bool) { 154 | return uint256(StrChar.unwrap(self)) >= uint256(StrChar.unwrap(other)); 155 | } 156 | 157 | /** 158 | * @dev Returns true if `StrChar` is valid UTF-8. 159 | * Can be false if it was formed with an unsafe method (fromUnchecked, wrap). 160 | */ 161 | function isValidUtf8(StrChar self) pure returns (bool) { 162 | return _isValidUtf8(StrChar.unwrap(self)) != 0; 163 | } 164 | 165 | /** 166 | * @dev Returns true if `StrChar` is within the ASCII range. 167 | */ 168 | function isAscii(StrChar self) pure returns (bool) { 169 | return StrChar.unwrap(self)[0] < 0x80; 170 | } -------------------------------------------------------------------------------- /test/SliceIter.t.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | import { PRBTest } from "@prb/test/src/PRBTest.sol"; 6 | 7 | import { Slice, toSlice } from "../src/Slice.sol"; 8 | import { SliceIter } from "../src/SliceIter.sol"; 9 | import { SliceIter__StopIteration } from "../src/SliceIter.sol"; 10 | 11 | using { toSlice } for bytes; 12 | 13 | contract SliceIterTest is PRBTest { 14 | function testLen(bytes calldata _b) public { 15 | SliceIter memory iter = _b.toSlice().iter(); 16 | assertEq(iter.len(), _b.length); 17 | } 18 | 19 | function testIsEmpty() public { 20 | assertTrue(bytes("").toSlice().iter().isEmpty()); 21 | assertFalse(new bytes(1).toSlice().iter().isEmpty()); 22 | } 23 | 24 | /*////////////////////////////////////////////////////////////////////////// 25 | NEXT 26 | //////////////////////////////////////////////////////////////////////////*/ 27 | 28 | function testNext() public { 29 | Slice s = bytes("123").toSlice(); 30 | SliceIter memory iter = s.iter(); 31 | 32 | assertEq(iter.next(), uint8(bytes1("1"))); 33 | assertEq(iter.asSlice().toBytes(), bytes("23")); 34 | assertEq(iter.next(), uint8(bytes1("2"))); 35 | assertEq(iter.asSlice().toBytes(), bytes("3")); 36 | assertEq(iter.next(), uint8(bytes1("3"))); 37 | assertEq(iter.asSlice().toBytes(), bytes("")); 38 | 39 | vm.expectRevert(SliceIter__StopIteration.selector); 40 | iter.next(); 41 | } 42 | 43 | function testNext__StopIteration() public { 44 | Slice s = bytes("123").toSlice(); 45 | SliceIter memory iter = s.iter(); 46 | 47 | iter.next(); 48 | iter.next(); 49 | iter.next(); 50 | 51 | vm.expectRevert(SliceIter__StopIteration.selector); 52 | iter.next(); 53 | } 54 | 55 | function testNext__Fuzz(bytes calldata _b) public { 56 | SliceIter memory iter = _b.toSlice().iter(); 57 | 58 | uint256 i; 59 | while (!iter.isEmpty()) { 60 | assertEq(iter.next(), uint8(_b[i])); 61 | assertEq(iter.asSlice().toBytes(), _b[i + 1:]); 62 | i++; 63 | } 64 | 65 | vm.expectRevert(SliceIter__StopIteration.selector); 66 | iter.next(); 67 | } 68 | 69 | function testNext__StopIteration__Fuzz(bytes calldata _b) public { 70 | SliceIter memory iter = _b.toSlice().iter(); 71 | 72 | uint256 i; 73 | while (!iter.isEmpty()) { 74 | iter.next(); 75 | i++; 76 | } 77 | 78 | vm.expectRevert(SliceIter__StopIteration.selector); 79 | iter.next(); 80 | } 81 | 82 | /*////////////////////////////////////////////////////////////////////////// 83 | NEXT_BACK 84 | //////////////////////////////////////////////////////////////////////////*/ 85 | 86 | function testNextBack() public { 87 | Slice s = bytes("123").toSlice(); 88 | SliceIter memory iter = s.iter(); 89 | 90 | assertEq(iter.nextBack(), uint8(bytes1("3"))); 91 | assertEq(iter.asSlice().toBytes(), bytes("12")); 92 | assertEq(iter.nextBack(), uint8(bytes1("2"))); 93 | assertEq(iter.asSlice().toBytes(), bytes("1")); 94 | assertEq(iter.nextBack(), uint8(bytes1("1"))); 95 | assertEq(iter.asSlice().toBytes(), bytes("")); 96 | 97 | vm.expectRevert(SliceIter__StopIteration.selector); 98 | iter.nextBack(); 99 | } 100 | 101 | function testNextBack__StopIteration() public { 102 | Slice s = bytes("123").toSlice(); 103 | SliceIter memory iter = s.iter(); 104 | 105 | iter.nextBack(); 106 | iter.nextBack(); 107 | iter.nextBack(); 108 | 109 | vm.expectRevert(SliceIter__StopIteration.selector); 110 | iter.nextBack(); 111 | } 112 | 113 | function testNextBack__Fuzz(bytes calldata _b) public { 114 | SliceIter memory iter = _b.toSlice().iter(); 115 | 116 | uint256 i; 117 | while (!iter.isEmpty()) { 118 | assertEq(iter.nextBack(), uint8(_b[_b.length - i - 1])); 119 | assertEq(iter.asSlice().toBytes(), _b[:_b.length - i - 1]); 120 | i++; 121 | } 122 | } 123 | 124 | function testNextBack__StopIteration__Fuzz(bytes calldata _b) public { 125 | SliceIter memory iter = _b.toSlice().iter(); 126 | 127 | uint256 i; 128 | while (!iter.isEmpty()) { 129 | iter.nextBack(); 130 | i++; 131 | } 132 | 133 | vm.expectRevert(SliceIter__StopIteration.selector); 134 | iter.nextBack(); 135 | } 136 | 137 | /*////////////////////////////////////////////////////////////////////////// 138 | NEXT MIXED 139 | //////////////////////////////////////////////////////////////////////////*/ 140 | 141 | function testNextMixed() public { 142 | Slice s = bytes("12345").toSlice(); 143 | SliceIter memory iter = s.iter(); 144 | 145 | assertEq(iter.next(), uint8(bytes1("1"))); 146 | assertEq(iter.asSlice().toBytes(), bytes("2345")); 147 | assertEq(iter.nextBack(), uint8(bytes1("5"))); 148 | assertEq(iter.asSlice().toBytes(), bytes("234")); 149 | assertEq(iter.next(), uint8(bytes1("2"))); 150 | assertEq(iter.asSlice().toBytes(), bytes("34")); 151 | assertEq(iter.next(), uint8(bytes1("3"))); 152 | assertEq(iter.asSlice().toBytes(), bytes("4")); 153 | assertEq(iter.nextBack(), uint8(bytes1("4"))); 154 | assertEq(iter.asSlice().toBytes(), bytes("")); 155 | } 156 | 157 | function testNextMixed__StopIteration() public { 158 | Slice s = bytes("12345").toSlice(); 159 | SliceIter memory iter = s.iter(); 160 | 161 | iter.next(); 162 | iter.nextBack(); 163 | iter.next(); 164 | iter.next(); 165 | iter.nextBack(); 166 | 167 | vm.expectRevert(SliceIter__StopIteration.selector); 168 | iter.next(); 169 | } 170 | } -------------------------------------------------------------------------------- /test/StrCharsIter.t.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | import { PRBTest } from "@prb/test/src/PRBTest.sol"; 6 | 7 | import { StrSlice, toSlice, StrCharsIter } from "../src/StrSlice.sol"; 8 | import { SliceIter__StopIteration } from "../src/SliceIter.sol"; 9 | import { StrChar__InvalidUTF8 } from "../src/StrChar.sol"; 10 | 11 | using { toSlice } for string; 12 | 13 | contract StrCharsIterTest is PRBTest { 14 | function testCount() public { 15 | assertEq(toSlice("").chars().count(), 0); 16 | assertEq(toSlice("Hello, world!").chars().count(), 13); 17 | assertEq(toSlice(unicode"naïve").chars().count(), 5); 18 | assertEq(toSlice(unicode"こんにちは").chars().count(), 5); 19 | assertEq(toSlice(unicode"Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇").chars().count(), 56); 20 | assertEq(toSlice(unicode"🗮🐵🌝👤👿🗉💀🉄🍨🉔🈥🔥🏅🔪🉣📷🉳🍠🈃🉌🖷👍🌐💎🋀🌙💼💮🗹🗘💬🖜🐥🖸🈰🍦💈📆🋬🏇🖒🐜👮🊊🗒🈆🗻🏁🈰🎎🊶🉠🍖🉪🌖📎🌄💵🕷🔧🍸🋗🍁🋸") 21 | .chars().count(), 64); 22 | } 23 | 24 | function testUnsafeCount() public { 25 | assertEq(toSlice("").chars().unsafeCount(), 0); 26 | assertEq(toSlice("Hello, world!").chars().unsafeCount(), 13); 27 | assertEq(toSlice(unicode"naïve").chars().unsafeCount(), 5); 28 | assertEq(toSlice(unicode"こんにちは").chars().unsafeCount(), 5); 29 | assertEq(toSlice(unicode"Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇").chars().unsafeCount(), 56); 30 | assertEq(toSlice(unicode"🗮🐵🌝👤👿🗉💀🉄🍨🉔🈥🔥🏅🔪🉣📷🉳🍠🈃🉌🖷👍🌐💎🋀🌙💼💮🗹🗘💬🖜🐥🖸🈰🍦💈📆🋬🏇🖒🐜👮🊊🗒🈆🗻🏁🈰🎎🊶🉠🍖🉪🌖📎🌄💵🕷🔧🍸🋗🍁🋸") 31 | .chars().unsafeCount(), 64); 32 | } 33 | 34 | function testValidateUtf8() public { 35 | assertTrue(toSlice("").chars().validateUtf8()); 36 | assertTrue(toSlice("Hello, world!").chars().validateUtf8()); 37 | assertTrue(toSlice(unicode"naïve").chars().validateUtf8()); 38 | assertTrue(toSlice(unicode"こんにちは").chars().validateUtf8()); 39 | assertTrue(toSlice(unicode"Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇").chars().validateUtf8()); 40 | assertTrue(toSlice(unicode"🗮🐵🌝👤👿🗉💀🉄🍨🉔🈥🔥🏅🔪🉣📷🉳🍠🈃🉌🖷👍🌐💎🋀🌙💼💮🗹🗘💬🖜🐥🖸🈰🍦💈📆🋬🏇🖒🐜👮🊊🗒🈆🗻🏁🈰🎎🊶🉠🍖🉪🌖📎🌄💵🕷🔧🍸🋗🍁🋸") 41 | .chars().validateUtf8()); 42 | } 43 | 44 | function testValidateUtf8__False() public { 45 | assertFalse(toSlice(string(bytes(hex"80"))).chars().validateUtf8()); 46 | assertFalse(toSlice(string(bytes(hex"E0"))).chars().validateUtf8()); 47 | assertFalse(toSlice(string(bytes(hex"C000"))).chars().validateUtf8()); 48 | assertFalse(toSlice(string(bytes(hex"F880808080"))).chars().validateUtf8()); 49 | assertFalse(toSlice(string(bytes(hex"E08080"))).chars().validateUtf8()); 50 | assertFalse(toSlice(string(bytes(hex"F0808080"))).chars().validateUtf8()); 51 | assertFalse(toSlice(string(abi.encodePacked(unicode"こんにちは", hex"80"))).chars().validateUtf8()); 52 | assertFalse(toSlice(string(abi.encodePacked(unicode"Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇", hex"F0808080"))).chars().validateUtf8()); 53 | } 54 | 55 | function testCount__InvalidUTF8() public { 56 | vm.expectRevert(StrChar__InvalidUTF8.selector); 57 | toSlice(string(bytes(hex"FFFF"))).chars().count(); 58 | } 59 | 60 | function testNext() public { 61 | StrSlice s = string(unicode"a¡ࠀ𐀡").toSlice(); 62 | StrCharsIter memory iter = s.chars(); 63 | 64 | assertEq(iter.next().toString(), unicode"a"); 65 | assertEq(iter.asStr().toString(), unicode"¡ࠀ𐀡"); 66 | assertEq(iter.next().toString(), unicode"¡"); 67 | assertEq(iter.asStr().toString(), unicode"ࠀ𐀡"); 68 | assertEq(iter.next().toString(), unicode"ࠀ"); 69 | assertEq(iter.asStr().toString(), unicode"𐀡"); 70 | assertEq(iter.next().toString(), unicode"𐀡"); 71 | assertEq(iter.asStr().toString(), unicode""); 72 | } 73 | 74 | function testNext__StopIteration() public { 75 | StrSlice s = string(unicode"💀!").toSlice(); 76 | StrCharsIter memory iter = s.chars(); 77 | 78 | iter.next(); 79 | iter.next(); 80 | vm.expectRevert(SliceIter__StopIteration.selector); 81 | iter.next(); 82 | } 83 | 84 | function testNextBack() public { 85 | StrSlice s = string(unicode"a¡ࠀ𐀡").toSlice(); 86 | StrCharsIter memory iter = s.chars(); 87 | 88 | assertEq(iter.nextBack().toString(), unicode"𐀡"); 89 | assertEq(iter.asStr().toString(), unicode"a¡ࠀ"); 90 | assertEq(iter.nextBack().toString(), unicode"ࠀ"); 91 | assertEq(iter.asStr().toString(), unicode"a¡"); 92 | assertEq(iter.nextBack().toString(), unicode"¡"); 93 | assertEq(iter.asStr().toString(), unicode"a"); 94 | assertEq(iter.nextBack().toString(), unicode"a"); 95 | assertEq(iter.asStr().toString(), unicode""); 96 | } 97 | 98 | function testNextBack__StopIteration() public { 99 | StrSlice s = string(unicode"💀!").toSlice(); 100 | StrCharsIter memory iter = s.chars(); 101 | 102 | iter.nextBack(); 103 | iter.nextBack(); 104 | vm.expectRevert(SliceIter__StopIteration.selector); 105 | iter.nextBack(); 106 | } 107 | 108 | function testUnsafeNext() public { 109 | StrSlice s = string(unicode"a¡ࠀ𐀡").toSlice(); 110 | StrCharsIter memory iter = s.chars(); 111 | 112 | assertEq(iter.unsafeNext().toString(), unicode"a"); 113 | assertEq(iter.asStr().toString(), unicode"¡ࠀ𐀡"); 114 | assertEq(iter.unsafeNext().toString(), unicode"¡"); 115 | assertEq(iter.asStr().toString(), unicode"ࠀ𐀡"); 116 | assertEq(iter.unsafeNext().toString(), unicode"ࠀ"); 117 | assertEq(iter.asStr().toString(), unicode"𐀡"); 118 | assertEq(iter.unsafeNext().toString(), unicode"𐀡"); 119 | assertEq(iter.asStr().toString(), unicode""); 120 | } 121 | 122 | function testUnsafeNext__InvalidUtf8() public { 123 | StrSlice s = string(bytes(hex"00FF80")).toSlice(); 124 | StrCharsIter memory iter = s.chars(); 125 | 126 | // this works kinda weirdly for invalid chars 127 | // TODO test toBytes32 too (it will be non-empty here) 128 | assertEq(iter.unsafeNext().toString(), string(bytes(hex"00"))); 129 | assertEq(iter.asStr().toString(), string(bytes(hex"FF80"))); 130 | assertEq(iter.unsafeNext().toString(), ""); 131 | assertEq(iter.asStr().toString(), string(bytes(hex"80"))); 132 | assertEq(iter.unsafeNext().toString(), ""); 133 | assertEq(iter.asStr().toString(), ""); 134 | } 135 | } -------------------------------------------------------------------------------- /test/SliceAssertions.t.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | import { PRBTest } from "@prb/test/src/PRBTest.sol"; 6 | import { SliceAssertions } from "../src/test/SliceAssertions.sol"; 7 | 8 | import { Slice, toSlice } from "../src/Slice.sol"; 9 | 10 | using { toSlice } for bytes; 11 | 12 | contract SliceAssertionsTest is PRBTest, SliceAssertions { 13 | // 100 bytes 14 | bytes constant LOREM_IPSUM = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore."; 15 | 16 | /// @dev simple byte-by-byte comparison to test more complicated comparisons 17 | function naiveCmp(bytes memory b1, bytes memory b2) internal pure returns (int256) { 18 | uint256 shortest = b1.length < b2.length ? b1.length : b2.length; 19 | for (uint256 i; i < shortest; i++) { 20 | if (b1[i] < b2[i]) { 21 | return -1; 22 | } else if (b1[i] > b2[i]) { 23 | return 1; 24 | } 25 | } 26 | if (b1.length < b2.length) { 27 | return -1; 28 | } else if (b1.length > b2.length) { 29 | return 1; 30 | } else { 31 | return 0; 32 | } 33 | } 34 | 35 | /// @dev split calldata bytes in half 36 | function b1b2(bytes calldata b) internal pure returns (bytes memory b1, bytes memory b2) { 37 | b1 = b[:b.length / 2]; 38 | // b2 can be 1 byte longer sometimes 39 | b2 = b[b.length / 2:]; 40 | 41 | // this is useful to test a special case of initially similar sequences 42 | // TODO fix self-referential pseudorandomness 43 | uint256 random = uint256(keccak256(abi.encode(b, "randomlyAddPrefix"))) % 4; 44 | if (random == 1) { 45 | // prefix 46 | b1 = abi.encodePacked(LOREM_IPSUM, b1); 47 | b2 = abi.encodePacked(LOREM_IPSUM, b2); 48 | } else if (random == 2) { 49 | // suffix 50 | b1 = abi.encodePacked(b1, LOREM_IPSUM); 51 | b2 = abi.encodePacked(b2, LOREM_IPSUM); 52 | } else if (random == 3) { 53 | // prefix and suffix 54 | b1 = abi.encodePacked(LOREM_IPSUM, b1, LOREM_IPSUM); 55 | b2 = abi.encodePacked(LOREM_IPSUM, b2, LOREM_IPSUM); 56 | } 57 | } 58 | 59 | function testNaiveCmp() public { 60 | assertEq(naiveCmp("1", "0"), 1); 61 | assertEq(naiveCmp("1", "1"), 0); 62 | assertEq(naiveCmp("0", "1"), -1); 63 | assertEq(naiveCmp("1", ""), 1); 64 | assertEq(naiveCmp("", ""), 0); 65 | assertEq(naiveCmp("", "1"), -1); 66 | assertEq(naiveCmp("12", "1"), 1); 67 | assertEq(naiveCmp("1", "12"), -1); 68 | } 69 | 70 | /*////////////////////////////////////////////////////////////////////////// 71 | EQUALITY 72 | //////////////////////////////////////////////////////////////////////////*/ 73 | 74 | function testEq(bytes memory b) public { 75 | // compare new assertions 76 | assertEq(b.toSlice(), b.toSlice()); 77 | assertEq(b.toSlice(), b); 78 | assertEq(b, b.toSlice()); 79 | 80 | assertLte(b.toSlice(), b.toSlice()); 81 | assertLte(b.toSlice(), b); 82 | assertLte(b, b.toSlice()); 83 | 84 | assertGte(b.toSlice(), b.toSlice()); 85 | assertGte(b.toSlice(), b); 86 | assertGte(b, b.toSlice()); 87 | // to the existing ones 88 | assertEq(b.toSlice().toBytes(), b.toSlice().toBytes()); 89 | assertEq(b.toSlice().toBytes(), b); 90 | assertEq(b, b.toSlice().toBytes()); 91 | } 92 | 93 | function testFailEq(bytes calldata _b) public { 94 | (bytes memory b1, bytes memory b2) = b1b2(_b); 95 | vm.assume(keccak256(b1) != keccak256(b2)); 96 | assertEq(b1.toSlice(), b2.toSlice()); 97 | } 98 | 99 | function testNotEq(bytes calldata _b) public { 100 | (bytes memory b1, bytes memory b2) = b1b2(_b); 101 | vm.assume(keccak256(b1) != keccak256(b2)); 102 | // compare new assertions 103 | assertNotEq(b1.toSlice(), b2.toSlice()); 104 | assertNotEq(b1.toSlice(), b2); 105 | assertNotEq(b1, b2.toSlice()); 106 | // to the existing ones 107 | assertNotEq(b1.toSlice().toBytes(), b2.toSlice().toBytes()); 108 | assertNotEq(b1.toSlice().toBytes(), b2); 109 | assertNotEq(b1, b2.toSlice().toBytes()); 110 | } 111 | 112 | function testFailNotEq(bytes memory b) public { 113 | assertNotEq(b.toSlice(), b.toSlice()); 114 | } 115 | 116 | /*////////////////////////////////////////////////////////////////////////// 117 | LESS-THAN 118 | //////////////////////////////////////////////////////////////////////////*/ 119 | 120 | function testLt(bytes calldata _b) public { 121 | (bytes memory b1, bytes memory b2) = b1b2(_b); 122 | vm.assume(naiveCmp(b1, b2) < 0); 123 | 124 | assertLt(b1.toSlice(), b2.toSlice()); 125 | assertLt(b1.toSlice(), b2); 126 | assertLt(b1, b2.toSlice()); 127 | assertLt(b1, b2); 128 | 129 | assertLte(b1.toSlice(), b2.toSlice()); 130 | assertLte(b1.toSlice(), b2); 131 | assertLte(b1, b2.toSlice()); 132 | assertLte(b1, b2); 133 | } 134 | 135 | function testFailLt(bytes calldata _b) public { 136 | (bytes memory b1, bytes memory b2) = b1b2(_b); 137 | vm.assume(naiveCmp(b1, b2) > 0); 138 | 139 | assertLt(b1.toSlice(), b2.toSlice()); 140 | } 141 | 142 | function testFailLt__ForEq(bytes memory b) public { 143 | assertLt(b.toSlice(), b.toSlice()); 144 | } 145 | 146 | function testFailLte(bytes calldata _b) public { 147 | (bytes memory b1, bytes memory b2) = b1b2(_b); 148 | vm.assume(naiveCmp(b1, b2) > 0); 149 | 150 | assertLte(b1.toSlice(), b2.toSlice()); 151 | } 152 | 153 | /*////////////////////////////////////////////////////////////////////////// 154 | GREATER-THAN 155 | //////////////////////////////////////////////////////////////////////////*/ 156 | 157 | function testGt(bytes calldata _b) public { 158 | (bytes memory b1, bytes memory b2) = b1b2(_b); 159 | vm.assume(naiveCmp(b1, b2) > 0); 160 | 161 | assertGt(b1.toSlice(), b2.toSlice()); 162 | assertGt(b1.toSlice(), b2); 163 | assertGt(b1, b2.toSlice()); 164 | assertGt(b1, b2); 165 | 166 | assertGte(b1.toSlice(), b2.toSlice()); 167 | assertGte(b1.toSlice(), b2); 168 | assertGte(b1, b2.toSlice()); 169 | assertGte(b1, b2); 170 | } 171 | 172 | function testFailGt(bytes calldata _b) public { 173 | (bytes memory b1, bytes memory b2) = b1b2(_b); 174 | vm.assume(naiveCmp(b1, b2) < 0); 175 | 176 | assertGt(b1.toSlice(), b2.toSlice()); 177 | } 178 | 179 | function testFailGt__ForEq(bytes memory b) public { 180 | assertGt(b.toSlice(), b.toSlice()); 181 | } 182 | 183 | function testFailGte(bytes calldata _b) public { 184 | (bytes memory b1, bytes memory b2) = b1b2(_b); 185 | vm.assume(naiveCmp(b1, b2) < 0); 186 | 187 | assertGte(b1.toSlice(), b2.toSlice()); 188 | } 189 | 190 | /*////////////////////////////////////////////////////////////////////////// 191 | CONTAINS 192 | //////////////////////////////////////////////////////////////////////////*/ 193 | 194 | function testContains(bytes calldata _b) public { 195 | bytes memory b1 = _b; 196 | bytes memory b2 = _b[_b.length / 3 : _b.length * 2 / 3]; 197 | 198 | assertContains(b1.toSlice(), b2.toSlice()); 199 | assertContains(b1.toSlice(), b2); 200 | assertContains(b1, b2.toSlice()); 201 | assertContains(b1, b2); 202 | } 203 | 204 | function testFailContains(bytes calldata _b) public { 205 | bytes memory b1 = _b; 206 | bytes memory b2 = _b; 207 | // change 1 byte 208 | b2[0] = bytes1(uint8(b2[0]) ^ uint8(0x01)); 209 | 210 | assertContains(b1.toSlice(), b2.toSlice()); 211 | } 212 | 213 | function testFailContains__1Byte(bytes calldata _b) public { 214 | bytes1 pat = bytes1(keccak256(abi.encode(_b, "1Byte"))); 215 | 216 | bytes memory b1 = _b; 217 | bytes memory b2 = new bytes(1); 218 | b2[0] = pat; 219 | // replace all pat 220 | for (uint256 i; i < b1.length; i++) { 221 | if (b1[i] == pat) { 222 | b1[i] = ~pat; 223 | } 224 | } 225 | 226 | assertContains(b1.toSlice(), b2.toSlice()); 227 | } 228 | } -------------------------------------------------------------------------------- /src/test/SliceAssertions.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | import { PRBTest } from "@prb/test/src/PRBTest.sol"; 6 | 7 | import { Slice, toSlice } from "../Slice.sol"; 8 | 9 | using { toSlice } for bytes; 10 | 11 | /// @title Extension to PRBTest with Slice assertions. 12 | /// @dev Also provides lt,lte,gt,gte,contains for 2 native `bytes`. 13 | contract SliceAssertions is PRBTest { 14 | // Eq 15 | 16 | function assertEq(Slice a, Slice b) internal { 17 | assertEq(a.toBytes(), b.toBytes()); 18 | } 19 | 20 | function assertEq(Slice a, Slice b, string memory err) internal { 21 | assertEq(a.toBytes(), b.toBytes(), err); 22 | } 23 | 24 | function assertEq(Slice a, bytes memory b) internal { 25 | assertEq(a.toBytes(), b); 26 | } 27 | 28 | function assertEq(Slice a, bytes memory b, string memory err) internal { 29 | assertEq(a.toBytes(), b, err); 30 | } 31 | 32 | function assertEq(bytes memory a, Slice b) internal { 33 | assertEq(a, b.toBytes()); 34 | } 35 | 36 | function assertEq(bytes memory a, Slice b, string memory err) internal { 37 | assertEq(a, b.toBytes(), err); 38 | } 39 | 40 | // NotEq 41 | 42 | function assertNotEq(Slice a, Slice b) internal { 43 | assertNotEq(a.toBytes(), b.toBytes()); 44 | } 45 | 46 | function assertNotEq(Slice a, Slice b, string memory err) internal { 47 | assertNotEq(a.toBytes(), b.toBytes(), err); 48 | } 49 | 50 | function assertNotEq(Slice a, bytes memory b) internal { 51 | assertNotEq(a.toBytes(), b); 52 | } 53 | 54 | function assertNotEq(Slice a, bytes memory b, string memory err) internal { 55 | assertNotEq(a.toBytes(), b, err); 56 | } 57 | 58 | function assertNotEq(bytes memory a, Slice b) internal { 59 | assertNotEq(a, b.toBytes()); 60 | } 61 | 62 | function assertNotEq(bytes memory a, Slice b, string memory err) internal { 63 | assertNotEq(a, b.toBytes(), err); 64 | } 65 | 66 | // Lt 67 | 68 | function assertLt(Slice a, Slice b) internal virtual { 69 | if (!a.lt(b)) { 70 | emit Log("Error: a < b not satisfied [bytes]"); 71 | emit LogNamedBytes(" Value a", a.toBytes()); 72 | emit LogNamedBytes(" Value b", a.toBytes()); 73 | fail(); 74 | } 75 | } 76 | 77 | function assertLt(Slice a, Slice b, string memory err) internal virtual { 78 | if (!a.lt(b)) { 79 | emit LogNamedString("Error", err); 80 | assertLt(a, b); 81 | } 82 | } 83 | 84 | function assertLt(Slice a, bytes memory b) internal virtual { 85 | assertLt(a, b.toSlice()); 86 | } 87 | 88 | function assertLt(Slice a, bytes memory b, string memory err) internal virtual { 89 | assertLt(a, b.toSlice(), err); 90 | } 91 | 92 | function assertLt(bytes memory a, Slice b) internal virtual { 93 | assertLt(a.toSlice(), b); 94 | } 95 | 96 | function assertLt(bytes memory a, Slice b, string memory err) internal virtual { 97 | assertLt(a.toSlice(), b, err); 98 | } 99 | 100 | function assertLt(bytes memory a, bytes memory b) internal virtual { 101 | assertLt(a.toSlice(), b.toSlice()); 102 | } 103 | 104 | function assertLt(bytes memory a, bytes memory b, string memory err) internal virtual { 105 | assertLt(a.toSlice(), b.toSlice(), err); 106 | } 107 | 108 | // Lte 109 | 110 | function assertLte(Slice a, Slice b) internal virtual { 111 | if (!a.lte(b)) { 112 | emit Log("Error: a <= b not satisfied [bytes]"); 113 | emit LogNamedBytes(" Value a", a.toBytes()); 114 | emit LogNamedBytes(" Value b", a.toBytes()); 115 | fail(); 116 | } 117 | } 118 | 119 | function assertLte(Slice a, Slice b, string memory err) internal virtual { 120 | if (!a.lte(b)) { 121 | emit LogNamedString("Error", err); 122 | assertLte(a, b); 123 | } 124 | } 125 | 126 | function assertLte(Slice a, bytes memory b) internal virtual { 127 | assertLte(a, b.toSlice()); 128 | } 129 | 130 | function assertLte(Slice a, bytes memory b, string memory err) internal virtual { 131 | assertLte(a, b.toSlice(), err); 132 | } 133 | 134 | function assertLte(bytes memory a, Slice b) internal virtual { 135 | assertLte(a.toSlice(), b); 136 | } 137 | 138 | function assertLte(bytes memory a, Slice b, string memory err) internal virtual { 139 | assertLte(a.toSlice(), b, err); 140 | } 141 | 142 | function assertLte(bytes memory a, bytes memory b) internal virtual { 143 | assertLte(a.toSlice(), b.toSlice()); 144 | } 145 | 146 | function assertLte(bytes memory a, bytes memory b, string memory err) internal virtual { 147 | assertLte(a.toSlice(), b.toSlice(), err); 148 | } 149 | 150 | // Gt 151 | 152 | function assertGt(Slice a, Slice b) internal virtual { 153 | if (!a.gt(b)) { 154 | emit Log("Error: a > b not satisfied [bytes]"); 155 | emit LogNamedBytes(" Value a", a.toBytes()); 156 | emit LogNamedBytes(" Value b", a.toBytes()); 157 | fail(); 158 | } 159 | } 160 | 161 | function assertGt(Slice a, Slice b, string memory err) internal virtual { 162 | if (!a.gt(b)) { 163 | emit LogNamedString("Error", err); 164 | assertGt(a, b); 165 | } 166 | } 167 | 168 | function assertGt(Slice a, bytes memory b) internal virtual { 169 | assertGt(a, b.toSlice()); 170 | } 171 | 172 | function assertGt(Slice a, bytes memory b, string memory err) internal virtual { 173 | assertGt(a, b.toSlice(), err); 174 | } 175 | 176 | function assertGt(bytes memory a, Slice b) internal virtual { 177 | assertGt(a.toSlice(), b); 178 | } 179 | 180 | function assertGt(bytes memory a, Slice b, string memory err) internal virtual { 181 | assertGt(a.toSlice(), b, err); 182 | } 183 | 184 | function assertGt(bytes memory a, bytes memory b) internal virtual { 185 | assertGt(a.toSlice(), b.toSlice()); 186 | } 187 | 188 | function assertGt(bytes memory a, bytes memory b, string memory err) internal virtual { 189 | assertGt(a.toSlice(), b.toSlice(), err); 190 | } 191 | 192 | // Gte 193 | 194 | function assertGte(Slice a, Slice b) internal virtual { 195 | if (!a.gte(b)) { 196 | emit Log("Error: a >= b not satisfied [bytes]"); 197 | emit LogNamedBytes(" Value a", a.toBytes()); 198 | emit LogNamedBytes(" Value b", a.toBytes()); 199 | fail(); 200 | } 201 | } 202 | 203 | function assertGte(Slice a, Slice b, string memory err) internal virtual { 204 | if (!a.gte(b)) { 205 | emit LogNamedString("Error", err); 206 | assertGte(a, b); 207 | } 208 | } 209 | 210 | function assertGte(Slice a, bytes memory b) internal virtual { 211 | assertGte(a, b.toSlice()); 212 | } 213 | 214 | function assertGte(Slice a, bytes memory b, string memory err) internal virtual { 215 | assertGte(a, b.toSlice(), err); 216 | } 217 | 218 | function assertGte(bytes memory a, Slice b) internal virtual { 219 | assertGte(a.toSlice(), b); 220 | } 221 | 222 | function assertGte(bytes memory a, Slice b, string memory err) internal virtual { 223 | assertGte(a.toSlice(), b, err); 224 | } 225 | 226 | function assertGte(bytes memory a, bytes memory b) internal virtual { 227 | assertGte(a.toSlice(), b.toSlice()); 228 | } 229 | 230 | function assertGte(bytes memory a, bytes memory b, string memory err) internal virtual { 231 | assertGte(a.toSlice(), b.toSlice(), err); 232 | } 233 | 234 | // Contains 235 | 236 | function assertContains(Slice a, Slice b) internal virtual { 237 | if (!a.contains(b)) { 238 | emit Log("Error: a does not contain b [bytes]"); 239 | emit LogNamedBytes(" Bytes a", a.toBytes()); 240 | emit LogNamedBytes(" Bytes b", b.toBytes()); 241 | fail(); 242 | } 243 | } 244 | 245 | function assertContains(Slice a, Slice b, string memory err) internal virtual { 246 | if (!a.contains(b)) { 247 | emit LogNamedString("Error", err); 248 | assertContains(a, b); 249 | } 250 | } 251 | 252 | function assertContains(Slice a, bytes memory b) internal virtual { 253 | assertContains(a, b.toSlice()); 254 | } 255 | 256 | function assertContains(Slice a, bytes memory b, string memory err) internal virtual { 257 | assertContains(a, b.toSlice(), err); 258 | } 259 | 260 | function assertContains(bytes memory a, Slice b) internal virtual { 261 | assertContains(a.toSlice(), b); 262 | } 263 | 264 | function assertContains(bytes memory a, Slice b, string memory err) internal virtual { 265 | assertContains(a.toSlice(), b, err); 266 | } 267 | 268 | function assertContains(bytes memory a, bytes memory b) internal virtual { 269 | assertContains(a.toSlice(), b.toSlice()); 270 | } 271 | 272 | function assertContains(bytes memory a, bytes memory b, string memory err) internal virtual { 273 | assertContains(a.toSlice(), b.toSlice(), err); 274 | } 275 | } -------------------------------------------------------------------------------- /src/test/StrSliceAssertions.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | import { PRBTest } from "@prb/test/src/PRBTest.sol"; 6 | 7 | import { StrSlice, toSlice } from "../StrSlice.sol"; 8 | 9 | using { toSlice } for string; 10 | 11 | /// @title Extension to PRBTest with StrSlice assertions. 12 | /// @dev Also provides lt,lte,gt,gte,contains for 2 native `string`. 13 | contract StrSliceAssertions is PRBTest { 14 | // Eq 15 | 16 | function assertEq(StrSlice a, StrSlice b) internal { 17 | assertEq(a.toString(), b.toString()); 18 | } 19 | 20 | function assertEq(StrSlice a, StrSlice b, string memory err) internal { 21 | assertEq(a.toString(), b.toString(), err); 22 | } 23 | 24 | function assertEq(StrSlice a, string memory b) internal { 25 | assertEq(a.toString(), b); 26 | } 27 | 28 | function assertEq(StrSlice a, string memory b, string memory err) internal { 29 | assertEq(a.toString(), b, err); 30 | } 31 | 32 | function assertEq(string memory a, StrSlice b) internal { 33 | assertEq(a, b.toString()); 34 | } 35 | 36 | function assertEq(string memory a, StrSlice b, string memory err) internal { 37 | assertEq(a, b.toString(), err); 38 | } 39 | 40 | // NotEq 41 | 42 | function assertNotEq(StrSlice a, StrSlice b) internal { 43 | assertNotEq(a.toString(), b.toString()); 44 | } 45 | 46 | function assertNotEq(StrSlice a, StrSlice b, string memory err) internal { 47 | assertNotEq(a.toString(), b.toString(), err); 48 | } 49 | 50 | function assertNotEq(StrSlice a, string memory b) internal { 51 | assertNotEq(a.toString(), b); 52 | } 53 | 54 | function assertNotEq(StrSlice a, string memory b, string memory err) internal { 55 | assertNotEq(a.toString(), b, err); 56 | } 57 | 58 | function assertNotEq(string memory a, StrSlice b) internal { 59 | assertNotEq(a, b.toString()); 60 | } 61 | 62 | function assertNotEq(string memory a, StrSlice b, string memory err) internal { 63 | assertNotEq(a, b.toString(), err); 64 | } 65 | 66 | // Lt 67 | 68 | function assertLt(StrSlice a, StrSlice b) internal virtual { 69 | if (!a.lt(b)) { 70 | emit Log("Error: a < b not satisfied [string]"); 71 | emit LogNamedString(" Value a", a.toString()); 72 | emit LogNamedString(" Value b", a.toString()); 73 | fail(); 74 | } 75 | } 76 | 77 | function assertLt(StrSlice a, StrSlice b, string memory err) internal virtual { 78 | if (!a.lt(b)) { 79 | emit LogNamedString("Error", err); 80 | assertLt(a, b); 81 | } 82 | } 83 | 84 | function assertLt(StrSlice a, string memory b) internal virtual { 85 | assertLt(a, b.toSlice()); 86 | } 87 | 88 | function assertLt(StrSlice a, string memory b, string memory err) internal virtual { 89 | assertLt(a, b.toSlice(), err); 90 | } 91 | 92 | function assertLt(string memory a, StrSlice b) internal virtual { 93 | assertLt(a.toSlice(), b); 94 | } 95 | 96 | function assertLt(string memory a, StrSlice b, string memory err) internal virtual { 97 | assertLt(a.toSlice(), b, err); 98 | } 99 | 100 | function assertLt(string memory a, string memory b) internal virtual { 101 | assertLt(a.toSlice(), b.toSlice()); 102 | } 103 | 104 | function assertLt(string memory a, string memory b, string memory err) internal virtual { 105 | assertLt(a.toSlice(), b.toSlice(), err); 106 | } 107 | 108 | // Lte 109 | 110 | function assertLte(StrSlice a, StrSlice b) internal virtual { 111 | if (!a.lte(b)) { 112 | emit Log("Error: a <= b not satisfied [string]"); 113 | emit LogNamedString(" Value a", a.toString()); 114 | emit LogNamedString(" Value b", a.toString()); 115 | fail(); 116 | } 117 | } 118 | 119 | function assertLte(StrSlice a, StrSlice b, string memory err) internal virtual { 120 | if (!a.lte(b)) { 121 | emit LogNamedString("Error", err); 122 | assertLte(a, b); 123 | } 124 | } 125 | 126 | function assertLte(StrSlice a, string memory b) internal virtual { 127 | assertLte(a, b.toSlice()); 128 | } 129 | 130 | function assertLte(StrSlice a, string memory b, string memory err) internal virtual { 131 | assertLte(a, b.toSlice(), err); 132 | } 133 | 134 | function assertLte(string memory a, StrSlice b) internal virtual { 135 | assertLte(a.toSlice(), b); 136 | } 137 | 138 | function assertLte(string memory a, StrSlice b, string memory err) internal virtual { 139 | assertLte(a.toSlice(), b, err); 140 | } 141 | 142 | function assertLte(string memory a, string memory b) internal virtual { 143 | assertLte(a.toSlice(), b.toSlice()); 144 | } 145 | 146 | function assertLte(string memory a, string memory b, string memory err) internal virtual { 147 | assertLte(a.toSlice(), b.toSlice(), err); 148 | } 149 | 150 | // Gt 151 | 152 | function assertGt(StrSlice a, StrSlice b) internal virtual { 153 | if (!a.gt(b)) { 154 | emit Log("Error: a > b not satisfied [string]"); 155 | emit LogNamedString(" Value a", a.toString()); 156 | emit LogNamedString(" Value b", a.toString()); 157 | fail(); 158 | } 159 | } 160 | 161 | function assertGt(StrSlice a, StrSlice b, string memory err) internal virtual { 162 | if (!a.gt(b)) { 163 | emit LogNamedString("Error", err); 164 | assertGt(a, b); 165 | } 166 | } 167 | 168 | function assertGt(StrSlice a, string memory b) internal virtual { 169 | assertGt(a, b.toSlice()); 170 | } 171 | 172 | function assertGt(StrSlice a, string memory b, string memory err) internal virtual { 173 | assertGt(a, b.toSlice(), err); 174 | } 175 | 176 | function assertGt(string memory a, StrSlice b) internal virtual { 177 | assertGt(a.toSlice(), b); 178 | } 179 | 180 | function assertGt(string memory a, StrSlice b, string memory err) internal virtual { 181 | assertGt(a.toSlice(), b, err); 182 | } 183 | 184 | function assertGt(string memory a, string memory b) internal virtual { 185 | assertGt(a.toSlice(), b.toSlice()); 186 | } 187 | 188 | function assertGt(string memory a, string memory b, string memory err) internal virtual { 189 | assertGt(a.toSlice(), b.toSlice(), err); 190 | } 191 | 192 | // Gte 193 | 194 | function assertGte(StrSlice a, StrSlice b) internal virtual { 195 | if (!a.gte(b)) { 196 | emit Log("Error: a >= b not satisfied [string]"); 197 | emit LogNamedString(" Value a", a.toString()); 198 | emit LogNamedString(" Value b", a.toString()); 199 | fail(); 200 | } 201 | } 202 | 203 | function assertGte(StrSlice a, StrSlice b, string memory err) internal virtual { 204 | if (!a.gte(b)) { 205 | emit LogNamedString("Error", err); 206 | assertGte(a, b); 207 | } 208 | } 209 | 210 | function assertGte(StrSlice a, string memory b) internal virtual { 211 | assertGte(a, b.toSlice()); 212 | } 213 | 214 | function assertGte(StrSlice a, string memory b, string memory err) internal virtual { 215 | assertGte(a, b.toSlice(), err); 216 | } 217 | 218 | function assertGte(string memory a, StrSlice b) internal virtual { 219 | assertGte(a.toSlice(), b); 220 | } 221 | 222 | function assertGte(string memory a, StrSlice b, string memory err) internal virtual { 223 | assertGte(a.toSlice(), b, err); 224 | } 225 | 226 | function assertGte(string memory a, string memory b) internal virtual { 227 | assertGte(a.toSlice(), b.toSlice()); 228 | } 229 | 230 | function assertGte(string memory a, string memory b, string memory err) internal virtual { 231 | assertGte(a.toSlice(), b.toSlice(), err); 232 | } 233 | 234 | // Contains 235 | 236 | function assertContains(StrSlice a, StrSlice b) internal virtual { 237 | if (!a.contains(b)) { 238 | emit Log("Error: a does not contain b [string]"); 239 | emit LogNamedString(" String a", a.toString()); 240 | emit LogNamedString(" String b", b.toString()); 241 | fail(); 242 | } 243 | } 244 | 245 | function assertContains(StrSlice a, StrSlice b, string memory err) internal virtual { 246 | if (!a.contains(b)) { 247 | emit LogNamedString("Error", err); 248 | assertContains(a, b); 249 | } 250 | } 251 | 252 | function assertContains(StrSlice a, string memory b) internal virtual { 253 | assertContains(a, b.toSlice()); 254 | } 255 | 256 | function assertContains(StrSlice a, string memory b, string memory err) internal virtual { 257 | assertContains(a, b.toSlice(), err); 258 | } 259 | 260 | function assertContains(string memory a, StrSlice b) internal virtual { 261 | assertContains(a.toSlice(), b); 262 | } 263 | 264 | function assertContains(string memory a, StrSlice b, string memory err) internal virtual { 265 | assertContains(a.toSlice(), b, err); 266 | } 267 | 268 | function assertContains(string memory a, string memory b) internal virtual { 269 | assertContains(a.toSlice(), b.toSlice()); 270 | } 271 | 272 | function assertContains(string memory a, string memory b, string memory err) internal virtual { 273 | assertContains(a.toSlice(), b.toSlice(), err); 274 | } 275 | } -------------------------------------------------------------------------------- /src/utils/memchr.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | /* 6 | * These functions are VERY DANGEROUS! 7 | * They operate directly on memory pointers, use with caution. 8 | * 9 | * Assembly here is marked as memory-safe for optimization. 10 | * The caller MUST use pointers in a memory-safe way! 11 | * https://docs.soliditylang.org/en/latest/assembly.html#memory-safety 12 | * 13 | * Loosely based on https://doc.rust-lang.org/1.65.0/core/slice/memchr/ 14 | */ 15 | 16 | /** 17 | * @dev Returns the first index matching the byte `x` in text; 18 | * or type(uint256).max if not found. 19 | */ 20 | function memchr(uint256 ptrText, uint256 lenText, uint8 x) pure returns (uint256 index) { 21 | if (lenText <= 32) { 22 | // Fast path for small slices. 23 | return memchrWord(ptrText, lenText, x); 24 | } 25 | 26 | uint256 ptrStart = ptrText; 27 | uint256 lenTail; 28 | uint256 ptrEnd; 29 | // safe because lenTail <= lenText (ptr+len is implicitly safe) 30 | unchecked { 31 | // (unchecked % saves a little gas) 32 | lenTail = lenText % 32; 33 | ptrEnd = ptrText + (lenText - lenTail); 34 | } 35 | uint256 repeatedX = repeatByte(x); 36 | while (ptrText < ptrEnd) { 37 | // any bytes equal to `x` become zeros 38 | // (this helps find `x` faster, values of non-zero bytes don't matter) 39 | uint256 chunkXZero; 40 | /// @solidity memory-safe-assembly 41 | assembly { 42 | chunkXZero := xor(mload(ptrText), repeatedX) 43 | } 44 | // break if there is a matching byte 45 | if (nonZeroIfXcontainsZeroByte(chunkXZero) != 0) { 46 | // - is safe because ptrText >= ptrStart (ptrText = ptrStart + 32*n) 47 | // + is safe because index + offsetLen < lenText 48 | // (ptr+len is implicitly safe) 49 | unchecked { 50 | return 51 | // index 52 | memchrWord(ptrText, 32, x) 53 | // + offsetLen 54 | + (ptrText - ptrStart); 55 | } 56 | } 57 | 58 | // safe because ptrText < ptrEnd, and ptrEnd = ptrText + n*32 (see lenTail) 59 | unchecked { 60 | ptrText += 32; 61 | } 62 | } 63 | 64 | if (lenTail == 0) return type(uint256).max; 65 | 66 | index = memchrWord(ptrEnd, lenTail, x); 67 | if (index == type(uint256).max) { 68 | return type(uint256).max; 69 | } else { 70 | // - is safe because ptrEnd >= ptrStart (ptrEnd = ptrStart + lenText - lenTail) 71 | // + is safe because index + offsetLen < lenText 72 | // (ptr+len is implicitly safe) 73 | unchecked { 74 | return index 75 | // + offsetLen 76 | + (ptrEnd - ptrStart); 77 | } 78 | } 79 | } 80 | 81 | /** 82 | * @dev Returns the last index matching the byte `x` in text; 83 | * or type(uint256).max if not found. 84 | */ 85 | function memrchr(uint256 ptrText, uint256 lenText, uint8 x) pure returns (uint256) { 86 | if (lenText <= 32) { 87 | // Fast path for small slices. 88 | return memrchrWord(ptrText, lenText, x); 89 | } 90 | 91 | uint256 lenTail; 92 | uint256 offsetPtr; 93 | // safe because pointers are guaranteed to be valid by the caller 94 | unchecked { 95 | // (unchecked % saves a little gas) 96 | lenTail = lenText % 32; 97 | offsetPtr = ptrText + lenText; 98 | } 99 | 100 | if (lenTail != 0) { 101 | // remove tail length 102 | // - is safe because lenTail <= lenText <= offsetPtr 103 | unchecked { 104 | offsetPtr -= lenTail; 105 | } 106 | // return if there is a matching byte 107 | uint256 index = memrchrWord(offsetPtr, lenTail, x); 108 | if (index != type(uint256).max) { 109 | // - is safe because offsetPtr > ptrText (offsetPtr = ptrText + lenText - lenTail) 110 | // + is safe because index + offsetLen < lenText 111 | unchecked { 112 | return index 113 | // + offsetLen 114 | + (offsetPtr - ptrText); 115 | } 116 | } 117 | } 118 | 119 | uint256 repeatedX = repeatByte(x); 120 | while (offsetPtr > ptrText) { 121 | // - is safe because 32 <= lenText <= offsetPtr 122 | unchecked { 123 | offsetPtr -= 32; 124 | } 125 | 126 | // any bytes equal to `x` become zeros 127 | // (this helps find `x` faster, values of non-zero bytes don't matter) 128 | uint256 chunkXZero; 129 | /// @solidity memory-safe-assembly 130 | assembly { 131 | chunkXZero := xor(mload(offsetPtr), repeatedX) 132 | } 133 | // break if there is a matching byte 134 | if (nonZeroIfXcontainsZeroByte(chunkXZero) != 0) { 135 | // - is safe because offsetPtr > ptrText (see the while condition) 136 | // + is safe because index + offsetLen < lenText 137 | unchecked { 138 | return 139 | // index 140 | memrchrWord(offsetPtr, 32, x) 141 | // + offsetLen 142 | + (offsetPtr - ptrText); 143 | } 144 | } 145 | } 146 | // not found 147 | return type(uint256).max; 148 | } 149 | 150 | /** 151 | * @dev Returns the first index matching the byte `x` in text; 152 | * or type(uint256).max if not found. 153 | * 154 | * WARNING: it works ONLY for length 32 or less. 155 | * This is for use by memchr after its chunk search. 156 | */ 157 | function memchrWord(uint256 ptrText, uint256 lenText, uint8 x) pure returns (uint256) { 158 | uint256 chunk; 159 | /// @solidity memory-safe-assembly 160 | assembly { 161 | chunk := mload(ptrText) 162 | } 163 | 164 | uint256 i; 165 | if (lenText > 32) { 166 | lenText = 32; 167 | } 168 | 169 | ////////binary search start 170 | // Some manual binary searches, cost ~50gas, could save up to ~1500 171 | // (comment them out and the function will work fine) 172 | if (lenText >= 16 + 2) { 173 | uint256 repeatedX = chunk ^ repeatByte(x); 174 | 175 | if (nonZeroIfXcontainsZeroByte(repeatedX | type(uint128).max) == 0) { 176 | i = 16; 177 | 178 | if (lenText >= 24 + 2) { 179 | if (nonZeroIfXcontainsZeroByte(repeatedX | type(uint64).max) == 0) { 180 | i = 24; 181 | } 182 | } 183 | } else if (nonZeroIfXcontainsZeroByte(repeatedX | type(uint192).max) == 0) { 184 | i = 8; 185 | } 186 | } else if (lenText >= 8 + 2) { 187 | uint256 repeatedX = chunk ^ repeatByte(x); 188 | 189 | if (nonZeroIfXcontainsZeroByte(repeatedX | type(uint192).max) == 0) { 190 | i = 8; 191 | } 192 | } 193 | ////////binary search end 194 | 195 | // ++ is safe because lenText <= 32 196 | unchecked { 197 | for (i; i < lenText; i++) { 198 | uint8 b; 199 | assembly { 200 | b := byte(i, chunk) 201 | } 202 | if (b == x) return i; 203 | } 204 | } 205 | // not found 206 | return type(uint256).max; 207 | } 208 | 209 | /** 210 | * @dev Returns the last index matching the byte `x` in text; 211 | * or type(uint256).max if not found. 212 | * 213 | * WARNING: it works ONLY for length 32 or less. 214 | * This is for use by memrchr after its chunk search. 215 | */ 216 | function memrchrWord(uint256 ptrText, uint256 lenText, uint8 x) pure returns (uint256) { 217 | if (lenText > 32) { 218 | lenText = 32; 219 | } 220 | uint256 chunk; 221 | /// @solidity memory-safe-assembly 222 | assembly { 223 | chunk := mload(ptrText) 224 | } 225 | 226 | while (lenText > 0) { 227 | // -- is safe because lenText > 0 228 | unchecked { 229 | lenText--; 230 | } 231 | uint8 b; 232 | assembly { 233 | b := byte(lenText, chunk) 234 | } 235 | if (b == x) return lenText; 236 | } 237 | // not found 238 | return type(uint256).max; 239 | } 240 | 241 | /// @dev repeating low bit for containsZeroByte 242 | uint256 constant LO_U256 = 0x0101010101010101010101010101010101010101010101010101010101010101; 243 | /// @dev repeating high bit for containsZeroByte 244 | uint256 constant HI_U256 = 0x8080808080808080808080808080808080808080808080808080808080808080; 245 | 246 | /** 247 | * @dev Returns a non-zero value if `x` contains any zero byte. 248 | * (returning a bool would be less efficient) 249 | * 250 | * From *Matters Computational*, J. Arndt: 251 | * 252 | * "The idea is to subtract one from each of the bytes and then look for 253 | * bytes where the borrow propagated all the way to the most significant bit." 254 | */ 255 | function nonZeroIfXcontainsZeroByte(uint256 x) pure returns (uint256) { 256 | unchecked { 257 | return (x - LO_U256) & (~x) & HI_U256; 258 | } 259 | /* 260 | * An example of how it works: 261 | * here is 00 262 | * x 0x0101010101010101010101010101010101010101010101000101010101010101 263 | * x-LO 0xffffffffffffffffffffffffffffffffffffffffffffffff0000000000000000 264 | * ~x 0xfefefefefefefefefefefefefefefefefefefefefefefefffefefefefefefefe 265 | * &1 0xfefefefefefefefefefefefefefefefefefefefefefefeff0000000000000000 266 | * &2 0x8080808080808080808080808080808080808080808080800000000000000000 267 | */ 268 | } 269 | 270 | /// @dev Repeat byte `b` 32 times 271 | function repeatByte(uint8 b) pure returns (uint256) { 272 | // safe because uint8 can't cause overflow: 273 | // e.g. 0x5A * 0x010101..010101 = 0x5A5A5A..5A5A5A 274 | // and 0xFF * 0x010101..010101 = 0xFFFFFF..FFFFFF 275 | unchecked { 276 | return b * (type(uint256).max / type(uint8).max); 277 | } 278 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # StrSlice & Slice library for Solidity 2 | 3 | - Types: [StrSlice](src/StrSlice.sol) for strings, [Slice](src/Slice.sol) for bytes, [StrChar](src/StrChar.sol) for characters 4 | - [Gas efficient](https://github.com/dk1a/solidity-stringutils-gas) 5 | - Versioned releases, available for both foundry and hardhat 6 | - Simple imports, you only need e.g. `StrSlice` and `toSlice` 7 | - `StrSlice` enforces UTF-8 character boundaries; `StrChar` validates character encoding 8 | - Clean, well-documented and thoroughly-tested source code 9 | - Optional [PRBTest](https://github.com/paulrberg/prb-test) extension with assertions like `assertContains` and `assertLt` for both slices and native `bytes`, `string` 10 | - `Slice` and `StrSlice` are value types, not structs 11 | - Low-level functions like [memchr](src/utils/memchr.sol), [memcmp, memmove etc](src/utils/mem.sol) 12 | 13 | ## Install 14 | 15 | ### Node 16 | ```sh 17 | yarn add @dk1a/solidity-stringutils 18 | ``` 19 | 20 | ### Forge 21 | ```sh 22 | forge install --no-commit dk1a/solidity-stringutils 23 | ``` 24 | 25 | ## StrSlice 26 | 27 | ```solidity 28 | import { StrSlice, toSlice } from "@dk1a/solidity-stringutils/src/StrSlice.sol"; 29 | 30 | using { toSlice } for string; 31 | 32 | /// @dev Returns the content of brackets, or empty string if not found 33 | function extractFromBrackets(string memory stuffInBrackets) pure returns (StrSlice extracted) { 34 | StrSlice s = stuffInBrackets.toSlice(); 35 | bool found; 36 | 37 | (found, , s) = s.splitOnce(toSlice("(")); 38 | if (!found) return toSlice(""); 39 | 40 | (found, s, ) = s.rsplitOnce(toSlice(")")); 41 | if (!found) return toSlice(""); 42 | 43 | return s; 44 | } 45 | /* 46 | assertEq( 47 | extractFromBrackets("((1 + 2) + 3) + 4"), 48 | toSlice("(1 + 2) + 3") 49 | ); 50 | */ 51 | ``` 52 | 53 | See [ExamplesTest](test/Examples.t.sol). 54 | 55 | Internally `StrSlice` uses `Slice` and extends it with logic for multibyte UTF-8 where necessary. 56 | 57 | | Method | Description | 58 | | ---------------- | ------------------------------------------------ | 59 | | `len` | length in **bytes** | 60 | | `isEmpty` | true if len == 0 | 61 | | `toString` | copy slice contents to a **new** string | 62 | | `keccak` | equal to `keccak256(s.toString())`, but cheaper | 63 | **concatenate** 64 | | `add` | Concatenate 2 slices into a **new** string | 65 | | `join` | Join slice array on `self` as separator | 66 | **compare** 67 | | `cmp` | 0 for eq, < 0 for lt, > 0 for gt | 68 | | `eq`,`ne` | ==, != (more efficient than cmp) | 69 | | `lt`,`lte` | <, <= | 70 | | `gt`,`gte` | >, >= | 71 | **index** 72 | | `isCharBoundary` | true if given index is an allowed boundary | 73 | | `get` | get 1 UTF-8 character at given index | 74 | | `splitAt` | (slice[:index], slice[index:]) | 75 | | `getSubslice` | slice[start:end] | 76 | **search** 77 | | `find` | index of the start of the **first** match | 78 | | `rfind` | index of the start of the **last** match | 79 | | | *return `type(uint256).max` for no matches* | 80 | | `contains` | true if a match is found | 81 | | `startsWith` | true if starts with pattern | 82 | | `endsWith` | true if ends with pattern | 83 | **modify** 84 | | `stripPrefix` | returns subslice without the prefix | 85 | | `stripSuffix` | returns subslice without the suffix | 86 | | `splitOnce` | split into 2 subslices on the **first** match | 87 | | `rsplitOnce` | split into 2 subslices on the **last** match | 88 | | `replacen` | *experimental* replace `n` matches | 89 | | | *replacen requires 0 < pattern.len() <= to.len()*| 90 | **iterate** 91 | | `chars` | character iterator over the slice | 92 | **ascii** 93 | | `isAscii` | true if all chars are ASCII | 94 | **dangerous** 95 | | `asSlice` | get underlying Slice | 96 | | `ptr` | get memory pointer | 97 | 98 | Indexes are in **bytes**, not characters. Indexing methods revert if `isCharBoundary` is false. 99 | 100 | ## StrCharsIter 101 | 102 | *Returned by `chars` method of `StrSlice`* 103 | 104 | ```solidity 105 | import { StrSlice, toSlice, StrCharsIter } from "@dk1a/solidity-stringutils/src/StrSlice.sol"; 106 | 107 | using { toSlice } for string; 108 | 109 | /// @dev Returns a StrSlice of `str` with the 2 first UTF-8 characters removed 110 | /// reverts on invalid UTF8 111 | function removeFirstTwoChars(string memory str) pure returns (StrSlice) { 112 | StrCharsIter memory chars = str.toSlice().chars(); 113 | for (uint256 i; i < 2; i++) { 114 | if (chars.isEmpty()) break; 115 | chars.next(); 116 | } 117 | return chars.asStr(); 118 | } 119 | /* 120 | assertEq(removeFirstTwoChars(unicode"📎!こんにちは"), unicode"こんにちは"); 121 | */ 122 | ``` 123 | 124 | | Method | Description | 125 | | ---------------- | ------------------------------------------------ | 126 | | `asStr` | get underlying StrSlice of the remainder | 127 | | `len` | remainder length in **bytes** | 128 | | `isEmpty` | true if len == 0 | 129 | | `next` | advance the iterator, return the next StrChar | 130 | | `nextBack` | advance from the back, return the next StrChar | 131 | | `count` | returns the number of UTF-8 characters | 132 | | `validateUtf8` | returns true if the sequence is valid UTF-8 | 133 | **dangerous** 134 | | `unsafeNext` | advance unsafely, return the next StrChar | 135 | | `unsafeCount` | unsafely count chars, read the source for caveats| 136 | | `ptr` | get memory pointer | 137 | 138 | `count`, `validateUtf8`, `unsafeCount` consume the iterator in O(n). 139 | 140 | Safe methods revert on an invalid UTF-8 byte sequence. 141 | 142 | `unsafeNext` does NOT check if the iterator is empty, may underflow! Does not revert on invalid UTF-8. If returned `StrChar` is invalid, it will have length 0. Otherwise length 1-4. 143 | 144 | Internally `next`, `unsafeNext`, `count` all use `_nextRaw`. It's very efficient, but very unsafe and complicated. Read the source and import it separately if you need it. 145 | 146 | ## StrChar 147 | 148 | Represents a single UTF-8 encoded character. 149 | Internally it's bytes32 with leading byte at MSB. 150 | 151 | It's returned by some methods of `StrSlice` and `StrCharsIter`. 152 | 153 | | Method | Description | 154 | | ---------------- | ------------------------------------------------ | 155 | | `len` | character length in bytes | 156 | | `toBytes32` | returns the underlying `bytes32` value | 157 | | `toString` | copy the character to a new string | 158 | | `toCodePoint` | returns the unicode code point (`ord` in python) | 159 | | `cmp` | 0 for eq, < 0 for lt, > 0 for gt | 160 | | `eq`,`ne` | ==, != | 161 | | `lt`,`lte` | <, <= | 162 | | `gt`,`gte` | >, >= | 163 | | `isValidUtf8` | usually true | 164 | | `isAscii` | true if the char is ASCII | 165 | 166 | Import `StrChar__` (static function lib) to use `StrChar__.fromCodePoint` for code point to `StrChar` conversion. 167 | 168 | `len` can return `0` *only* for invalid UTF-8 characters. But some invalid chars *may* have non-zero len! (use `isValidUtf8` to check validity). Note that `0x00` is a valid 1-byte UTF-8 character, its len is 1. 169 | 170 | `isValidUtf8` can be false if the character was formed with an unsafe method (fromUnchecked, wrap). 171 | 172 | ## Slice 173 | 174 | ```solidity 175 | import { Slice, toSlice } from "@dk1a/solidity-stringutils/src/Slice.sol"; 176 | 177 | using { toSlice } for bytes; 178 | 179 | function findZeroByte(bytes memory b) pure returns (uint256 index) { 180 | return b.toSlice().find( 181 | bytes(hex"00").toSlice() 182 | ); 183 | } 184 | ``` 185 | 186 | See `using {...} for Slice global` in the source for a function summary. Many are shared between `Slice` and `StrSlice`, but there are differences. 187 | 188 | Internally Slice has very minimal assembly, instead using `memcpy`, `memchr`, `memcmp` and others; if you need the low-level functions, see `src/utils/`. 189 | 190 | ## Assertions (PRBTest extension) 191 | 192 | ```solidity 193 | import { PRBTest } from "@prb/test/src/PRBTest.sol"; 194 | import { Assertions } from "@dk1a/solidity-stringutils/src/test/Assertions.sol"; 195 | 196 | contract StrSliceTest is PRBTest, Assertions { 197 | function testContains() public { 198 | bytes memory b1 = "12345"; 199 | bytes memory b2 = "3"; 200 | assertContains(b1, b2); 201 | } 202 | 203 | function testLt() public { 204 | string memory s1 = "123"; 205 | string memory s2 = "124"; 206 | assertLt(s1, s2); 207 | } 208 | } 209 | ``` 210 | 211 | You can completely ignore slices if all you want is e.g. `assertContains` for native `bytes`/`string`. 212 | 213 | ## Acknowledgements 214 | - [Arachnid/solidity-stringutils](https://github.com/Arachnid/solidity-stringutils) - I basically wanted to make an updated version of solidity-stringutils 215 | - [rust](https://doc.rust-lang.org/core/index.html) - most similarities are in names and general structure; the implementation can't really be similar (solidity doesn't even have generics) 216 | - [paulrberg/prb-math](https://github.com/paulrberg/prb-math) - good template for solidity data structure libraries with `using {...} for ... global` 217 | - [brockelmore/memmove](https://github.com/brockelmore/memmove) - good assembly memory management examples -------------------------------------------------------------------------------- /test/StrSlice.t.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | import { PRBTest } from "@prb/test/src/PRBTest.sol"; 6 | import { StrSliceAssertions } from "../src/test/StrSliceAssertions.sol"; 7 | 8 | import { StrSlice, toSlice, StrSlice__InvalidCharBoundary } from "../src/StrSlice.sol"; 9 | 10 | using { toSlice } for string; 11 | 12 | contract StrSliceTest is PRBTest, StrSliceAssertions { 13 | function testToString() public { 14 | string memory _s = unicode"Hello, world!"; 15 | assertEq(_s, _s.toSlice().toString()); 16 | } 17 | 18 | function testLen() public { 19 | string memory _s = unicode"こんにちは"; 20 | assertEq(bytes(_s).length, _s.toSlice().len()); 21 | } 22 | 23 | function testIsEmpty() public { 24 | assertTrue(string("").toSlice().isEmpty()); 25 | assertFalse(new string(1).toSlice().isEmpty()); 26 | } 27 | 28 | /*////////////////////////////////////////////////////////////////////////// 29 | CONCATENATION 30 | //////////////////////////////////////////////////////////////////////////*/ 31 | 32 | function testAdd() public { 33 | assertEq(unicode"こんにちは", toSlice(unicode"こん").add(toSlice(unicode"にちは"))); 34 | } 35 | 36 | function testJoin() public { 37 | StrSlice[] memory sliceArr = new StrSlice[](3); 38 | sliceArr[0] = toSlice("Hello"); 39 | sliceArr[1] = toSlice(unicode"こんにちは"); 40 | sliceArr[2] = toSlice(""); 41 | assertEq( 42 | toSlice(unicode"📎!").join(sliceArr), 43 | unicode"Hello📎!こんにちは📎!" 44 | ); 45 | } 46 | 47 | /*////////////////////////////////////////////////////////////////////////// 48 | INDEX 49 | //////////////////////////////////////////////////////////////////////////*/ 50 | 51 | function testIsCharBoundary() public { 52 | string memory _s = unicode"こ"; 53 | // start 54 | assertTrue(toSlice(_s).isCharBoundary(0)); 55 | // mid 56 | assertFalse(toSlice(_s).isCharBoundary(1)); 57 | assertFalse(toSlice(_s).isCharBoundary(2)); 58 | // end (isn't a valid index, but is a valid boundary) 59 | assertTrue(toSlice(_s).isCharBoundary(3)); 60 | // out of bounds 61 | assertFalse(toSlice(_s).isCharBoundary(4)); 62 | } 63 | 64 | function testGet() public { 65 | string memory _s = unicode"こんにちは"; 66 | assertEq(_s.toSlice().get(3).toString(), unicode"ん"); 67 | } 68 | 69 | function testGet__InvalidCharBoundary() public { 70 | string memory _s = unicode"こんにちは"; 71 | vm.expectRevert(StrSlice__InvalidCharBoundary.selector); 72 | _s.toSlice().get(1); 73 | } 74 | 75 | function testSplitAt() public { 76 | string memory _s = unicode"こんにちは"; 77 | (StrSlice s1, StrSlice s2) = _s.toSlice().splitAt(3); 78 | assertEq(s1.toString(), unicode"こ"); 79 | assertEq(s2.toString(), unicode"んにちは"); 80 | } 81 | 82 | function testSplitAt__InvalidCharBoundary() public { 83 | string memory _s = unicode"こんにちは"; 84 | vm.expectRevert(StrSlice__InvalidCharBoundary.selector); 85 | _s.toSlice().splitAt(1); 86 | } 87 | 88 | function testGetSubslice() public { 89 | string memory _s = unicode"こんにちは"; 90 | assertEq(_s.toSlice().getSubslice(3, 9).toString(), unicode"んに"); 91 | } 92 | 93 | function testGetSubslice__InvalidCharBoundary() public { 94 | string memory _s = unicode"こんにちは"; 95 | vm.expectRevert(StrSlice__InvalidCharBoundary.selector); 96 | _s.toSlice().getSubslice(3, 8); 97 | } 98 | 99 | /*////////////////////////////////////////////////////////////////////////// 100 | SEARCH 101 | //////////////////////////////////////////////////////////////////////////*/ 102 | 103 | function testFind() public { 104 | string memory s1 = unicode"012こんにちはこんにちは34"; 105 | string memory s2 = unicode"んに"; 106 | uint256 index = s1.toSlice().find(s2.toSlice()); 107 | assertEq(index, 6); 108 | (, StrSlice rSlice) = s1.toSlice().splitAt(index); 109 | assertEq(rSlice, unicode"んにちはこんにちは34"); 110 | } 111 | 112 | function testRfind() public { 113 | string memory s1 = unicode"012こんにちはこんにちは34"; 114 | string memory s2 = unicode"んに"; 115 | uint256 index = s1.toSlice().rfind(s2.toSlice()); 116 | assertEq(index, 21); 117 | (, StrSlice rSlice) = s1.toSlice().splitAt(index); 118 | assertEq(rSlice, unicode"んにちは34"); 119 | } 120 | 121 | function testContains() public { 122 | string memory s1 = unicode"「lorem ipsum」の典型的なテキストのほかにも、原典からの距離の様々なバリエーションが存在する。他のバージョンでは、ラテン語にはあまり登場しないか存在しない"; 123 | string memory s2 = unicode"登場"; 124 | assertTrue(s1.toSlice().contains(s2.toSlice())); 125 | } 126 | 127 | function testNotContains() public { 128 | string memory s1 = unicode"「lorem ipsum」の典型的なテキストのほかにも、原典からの距離の様々なバリエーションが存在する。他のバージョンでは、ラテン語にはあまり登場しないか存在しない"; 129 | string memory s2 = unicode"0"; 130 | assertFalse(s1.toSlice().contains(s2.toSlice())); 131 | } 132 | 133 | /*////////////////////////////////////////////////////////////////////////// 134 | MODIFY 135 | //////////////////////////////////////////////////////////////////////////*/ 136 | 137 | function testStripPrefix() public { 138 | StrSlice slice = string(unicode"こんにちは").toSlice(); 139 | assertEq(slice.stripPrefix(string(unicode"こん").toSlice()), string(unicode"にちは")); 140 | assertEq(slice.stripPrefix(slice), ""); 141 | assertEq(slice.stripPrefix(string("").toSlice()), slice); 142 | assertEq(slice.stripPrefix(string(unicode"は").toSlice()), slice); 143 | assertEq(slice.stripPrefix(string(unicode"こんにちはは").toSlice()), slice); 144 | } 145 | 146 | function testStripPrefix__FromEmpty() public { 147 | StrSlice slice = string("").toSlice(); 148 | assertEq(slice.stripPrefix(string(unicode"こ").toSlice()), slice); 149 | assertEq(slice.stripPrefix(string("").toSlice()), slice); 150 | } 151 | 152 | function testStripSuffix() public { 153 | StrSlice slice = string(unicode"こんにちは").toSlice(); 154 | assertEq(slice.stripSuffix(string(unicode"ちは").toSlice()), string(unicode"こんに")); 155 | assertEq(slice.stripSuffix(slice), ""); 156 | assertEq(slice.stripSuffix(string("").toSlice()), slice); 157 | assertEq(slice.stripSuffix(string(unicode"こ").toSlice()), slice); 158 | assertEq(slice.stripSuffix(string(unicode"ここんにちは").toSlice()), slice); 159 | } 160 | 161 | function testStripSuffix__FromEmpty() public { 162 | StrSlice slice = string("").toSlice(); 163 | assertEq(slice.stripSuffix(string(unicode"こ").toSlice()), slice); 164 | assertEq(slice.stripSuffix(string("").toSlice()), slice); 165 | } 166 | 167 | function testSplitOnce() public { 168 | StrSlice slice = string(unicode"こんにちはこんにちは").toSlice(); 169 | StrSlice pat = string(unicode"に").toSlice(); 170 | (bool found, StrSlice prefix, StrSlice suffix) = slice.splitOnce(pat); 171 | assertTrue(found); 172 | assertEq(prefix, unicode"こん"); 173 | assertEq(suffix, unicode"ちはこんにちは"); 174 | } 175 | 176 | function testSplitOnce__NotFound() public { 177 | StrSlice slice = string(unicode"こんにちはこんにちは").toSlice(); 178 | StrSlice pat = string(unicode"こに").toSlice(); 179 | (bool found, StrSlice prefix, StrSlice suffix) = slice.splitOnce(pat); 180 | assertFalse(found); 181 | assertEq(prefix, unicode"こんにちはこんにちは"); 182 | assertEq(suffix, unicode""); 183 | } 184 | 185 | function testRsplitOnce() public { 186 | StrSlice slice = string(unicode"こんにちはこんにちは").toSlice(); 187 | StrSlice pat = string(unicode"に").toSlice(); 188 | (bool found, StrSlice prefix, StrSlice suffix) = slice.rsplitOnce(pat); 189 | assertTrue(found); 190 | assertEq(prefix, unicode"こんにちはこん"); 191 | assertEq(suffix, unicode"ちは"); 192 | } 193 | 194 | function testRsplitOnce__NotFound() public { 195 | StrSlice slice = string(unicode"こんにちはこんにちは").toSlice(); 196 | StrSlice pat = string(unicode"こに").toSlice(); 197 | (bool found, StrSlice prefix, StrSlice suffix) = slice.rsplitOnce(pat); 198 | assertFalse(found); 199 | assertEq(prefix, unicode""); 200 | assertEq(suffix, unicode"こんにちはこんにちは"); 201 | } 202 | 203 | // TODO both replacen and its tests are rather unfinished 204 | function testReplacen() public { 205 | string memory s = unicode"0110110110110"; 206 | string memory pat = unicode"11"; 207 | string memory to = unicode"__"; 208 | string memory result = unicode"0__0__0__0__0"; 209 | assertEq(s.toSlice().replacen(pat.toSlice(), to.toSlice(), 4), result); 210 | } 211 | 212 | function testReplacen__Unicode() public { 213 | string memory s = unicode"012こんにちはこんにちはこんにちは34"; 214 | string memory pat = unicode"んに"; 215 | string memory to = unicode"📎"; 216 | string memory result = unicode"012こ📎ちはこ📎ちはこんにちは34"; 217 | assertEq(s.toSlice().replacen(pat.toSlice(), to.toSlice(), 2), result); 218 | } 219 | 220 | // TODO more tests 221 | 222 | /*////////////////////////////////////////////////////////////////////////// 223 | ASCII 224 | //////////////////////////////////////////////////////////////////////////*/ 225 | 226 | function testIsAscii() public { 227 | string memory ascii = hex"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f"; 228 | assertTrue(toSlice("").isAscii()); 229 | assertTrue(toSlice("a").isAscii()); 230 | assertTrue(toSlice(ascii).isAscii()); 231 | assertTrue(toSlice(string(abi.encodePacked(ascii, ascii, ascii, ascii))).isAscii()); 232 | assertFalse(toSlice(unicode"📎").isAscii()); 233 | assertFalse(toSlice(unicode"012こ").isAscii()); 234 | assertFalse(toSlice(string(bytes(hex"FF"))).isAscii()); 235 | assertFalse(toSlice(string(abi.encodePacked(hex"80", ascii))).isAscii()); 236 | assertFalse(toSlice(string(abi.encodePacked(ascii, hex"80"))).isAscii()); 237 | assertFalse(toSlice(string(abi.encodePacked(ascii, unicode"📎"))).isAscii()); 238 | } 239 | } -------------------------------------------------------------------------------- /src/StrCharsIter.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | import { Slice, Slice__ } from "./Slice.sol"; 6 | import { StrSlice } from "./StrSlice.sol"; 7 | import { SliceIter, SliceIter__, SliceIter__StopIteration } from "./SliceIter.sol"; 8 | import { StrChar, StrChar__, StrChar__InvalidUTF8 } from "./StrChar.sol"; 9 | import { isValidUtf8, utf8CharWidth } from "./utils/utf8.sol"; 10 | import { leftMask } from "./utils/mem.sol"; 11 | 12 | /** 13 | * @title String chars iterator. 14 | * @dev This struct is created by the iter method on `StrSlice`. 15 | * Iterates 1 UTF-8 encoded character at a time (which may have 1-4 bytes). 16 | * 17 | * Note StrCharsIter iterates over UTF-8 encoded codepoints, not unicode scalar values. 18 | * This is mostly done for simplicity, since solidity doesn't care about unicode anyways. 19 | * 20 | * TODO think about actually adding char and unicode awareness? 21 | * https://github.com/devstein/unicode-eth attempts something like that 22 | */ 23 | struct StrCharsIter { 24 | uint256 _ptr; 25 | uint256 _len; 26 | } 27 | 28 | /*////////////////////////////////////////////////////////////////////////// 29 | STATIC FUNCTIONS 30 | //////////////////////////////////////////////////////////////////////////*/ 31 | 32 | library StrCharsIter__ { 33 | /** 34 | * @dev Creates a new `StrCharsIter` from `StrSlice`. 35 | * Note the `StrSlice` is assumed to be memory-safe. 36 | */ 37 | function from(StrSlice slice) internal pure returns (StrCharsIter memory) { 38 | return StrCharsIter(slice.ptr(), slice.len()); 39 | 40 | // TODO I'm curious about gas differences 41 | // return StrCharsIter(SliceIter__.from(str.asSlice())); 42 | } 43 | } 44 | 45 | /*////////////////////////////////////////////////////////////////////////// 46 | GLOBAL FUNCTIONS 47 | //////////////////////////////////////////////////////////////////////////*/ 48 | 49 | using { 50 | asStr, 51 | ptr, len, isEmpty, 52 | next, nextBack, unsafeNext, 53 | count, validateUtf8, unsafeCount 54 | } for StrCharsIter global; 55 | 56 | /** 57 | * @dev Views the underlying data as a subslice of the original data. 58 | */ 59 | function asStr(StrCharsIter memory self) pure returns (StrSlice slice) { 60 | return StrSlice.wrap(Slice.unwrap( 61 | self.asSlice() 62 | )); 63 | } 64 | 65 | /** 66 | * @dev Returns the pointer to the start of an in-memory string slice. 67 | * This method is primarily for internal use. 68 | */ 69 | function ptr(StrCharsIter memory self) pure returns (uint256) { 70 | return self._ptr; 71 | } 72 | 73 | /** 74 | * @dev Returns the length in bytes, not codepoints. 75 | */ 76 | function len(StrCharsIter memory self) pure returns (uint256) { 77 | return self._len; 78 | } 79 | 80 | /** 81 | * @dev Returns true if the iterator is empty. 82 | */ 83 | function isEmpty(StrCharsIter memory self) pure returns (bool) { 84 | return self._len == 0; 85 | } 86 | 87 | /** 88 | * @dev Advances the iterator and returns the next character. 89 | * Reverts if len == 0. 90 | * Reverts on invalid UTF-8. 91 | */ 92 | function next(StrCharsIter memory self) pure returns (StrChar) { 93 | if (self._len == 0) revert SliceIter__StopIteration(); 94 | (bytes32 b, uint256 charLen) = self._nextRaw(true); 95 | // safe because _nextRaw guarantees charLen <= selfLen as long as selfLen != 0. 96 | unchecked { 97 | // charLen > 0 because of `revertOnInvalid` flag 98 | self._len -= charLen; 99 | } 100 | // safe because _nextRaw reverts on invalid UTF-8 101 | return StrChar__.fromUnchecked(b, charLen); 102 | } 103 | 104 | /** 105 | * @dev Advances the iterator from the back and returns the next character. 106 | * Reverts if len == 0. 107 | * Reverts on invalid UTF-8. 108 | */ 109 | function nextBack(StrCharsIter memory self) pure returns (StrChar char) { 110 | if (self._len == 0) revert SliceIter__StopIteration(); 111 | 112 | // _self shares memory with self! 113 | SliceIter memory _self = self._sliceIter(); 114 | 115 | bool isValid; 116 | uint256 b; 117 | for (uint256 i; i < 4; i++) { 118 | // an example of what's going on in the loop: 119 | // b = 0x0000000000..00 120 | // nextBack = 0x80 121 | // b = 0x8000000000..00 (not valid UTF-8) 122 | // nextBack = 0x92 123 | // b = 0x9280000000..00 (not valid UTF-8) 124 | // nextBack = 0x9F 125 | // b = 0x9F92800000..00 (not valid UTF-8) 126 | // nextBack = 0xF0 127 | // b = 0xF09F928000..00 (valid UTF-8, break) 128 | 129 | // safe because i < 4 130 | unchecked { 131 | // free the space in MSB 132 | b = (b >> 8) | ( 133 | // get 1 byte in LSB 134 | uint256(_self.nextBack()) 135 | // flip it to MSB 136 | << (31 * 8) 137 | ); 138 | } 139 | // break if the char is valid 140 | if (isValidUtf8(bytes32(b)) != 0) { 141 | isValid = true; 142 | break; 143 | } 144 | } 145 | if (!isValid) revert StrChar__InvalidUTF8(); 146 | 147 | // construct the character; 148 | // wrap is safe, because UTF-8 was validated, 149 | // and the trailing bytes are 0 (since the loop went byte-by-byte) 150 | char = StrChar.wrap(bytes32(b)); 151 | // the iterator was already advanced by `_self.nextBack()` 152 | return char; 153 | } 154 | 155 | /** 156 | * @dev Advances the iterator and returns the next character. 157 | * Does NOT validate iterator length. It could underflow! 158 | * Does NOT revert on invalid UTF-8. 159 | * WARNING: for invalid UTF-8 bytes, advances by 1 and returns an invalid `StrChar` with len 0! 160 | */ 161 | function unsafeNext(StrCharsIter memory self) pure returns (StrChar char) { 162 | // _nextRaw guarantees charLen <= selfLen IF selfLen != 0 163 | (bytes32 b, uint256 charLen) = self._nextRaw(false); 164 | if (charLen > 0) { 165 | // safe IF the caller ensures that self._len != 0 166 | unchecked { 167 | self._len -= charLen; 168 | } 169 | // ALWAYS produces a valid character 170 | return StrChar__.fromUnchecked(b, charLen); 171 | } else { 172 | // safe IF the caller ensures that self._len != 0 173 | unchecked { 174 | self._len -= 1; 175 | } 176 | // NEVER produces a valid character (this is always a single 0x80-0xFF byte) 177 | return StrChar__.fromUnchecked(b, 1); 178 | } 179 | } 180 | 181 | /** 182 | * @dev Consumes the iterator, counting the number of UTF-8 characters. 183 | * Note O(n) time! 184 | * Reverts on invalid UTF-8. 185 | */ 186 | function count(StrCharsIter memory self) pure returns (uint256 result) { 187 | uint256 endPtr; 188 | // (ptr+len is implicitly safe) 189 | unchecked { 190 | endPtr = self._ptr + self._len; 191 | } 192 | while (self._ptr < endPtr) { 193 | self._nextRaw(true); 194 | // +1 is safe because 2**256 cycles are impossible 195 | unchecked { 196 | result += 1; 197 | } 198 | } 199 | // _nextRaw does NOT modify len to allow optimizations like setting it once at the end 200 | self._len = 0; 201 | return result; 202 | } 203 | 204 | /** 205 | * @dev Consumes the iterator, validating UTF-8 characters. 206 | * Note O(n) time! 207 | * Returns true if all are valid; otherwise false on the first invalid UTF-8 character. 208 | */ 209 | function validateUtf8(StrCharsIter memory self) pure returns (bool) { 210 | uint256 endPtr; 211 | // (ptr+len is implicitly safe) 212 | unchecked { 213 | endPtr = self._ptr + self._len; 214 | } 215 | while (self._ptr < endPtr) { 216 | (, uint256 charLen) = self._nextRaw(false); 217 | if (charLen == 0) return false; 218 | } 219 | return true; 220 | } 221 | 222 | /** 223 | * @dev VERY UNSAFE - a single invalid UTF-8 character can severely alter the result! 224 | * Consumes the iterator, counting the number of UTF-8 characters. 225 | * Significantly faster than safe `count`, especially for long mutlibyte strings. 226 | * 227 | * Note `count` is actually a bit more efficient than `validateUtf8`. 228 | * `count` is much more efficient than calling `validateUtf8` and `unsafeCount` together. 229 | * Use `unsafeCount` only when you are already certain that UTF-8 is valid. 230 | * If you want speed and no validation, just use byte length, it's faster and more predictably wrong. 231 | * 232 | * Some gas usage metrics: 233 | * 1 ascii char: 234 | * count: 571 gas 235 | * unsafeCount: 423 gas 236 | * 100 ascii chars: 237 | * count: 27406 gas 238 | * unsafeCount: 12900 gas 239 | * 1000 chinese chars (3000 bytes): 240 | * count: 799305 gas 241 | * unsafeCount: 178301 gas 242 | */ 243 | function unsafeCount(StrCharsIter memory self) pure returns (uint256 result) { 244 | uint256 endPtr; 245 | // (ptr+len is implicitly safe) 246 | unchecked { 247 | endPtr = self._ptr + self._len; 248 | } 249 | while (self._ptr < endPtr) { 250 | uint256 leadingByte; 251 | // unchecked mload 252 | // (unsafe, the last character could move the pointer past the boundary, but only once) 253 | /// @solidity memory-safe-assembly 254 | assembly { 255 | leadingByte := byte(0, mload( 256 | // load self._ptr (this is an optimization trick, since it's 1st in the struct) 257 | mload(self) 258 | )) 259 | } 260 | unchecked { 261 | // this is a very unsafe version of `utf8CharWidth`, 262 | // basically 1 invalid UTF-8 character can severely change the count result 263 | // (no real infinite loop risks, only one potential corrupt memory read) 264 | if (leadingByte < 0x80) { 265 | self._ptr += 1; 266 | } else if (leadingByte < 0xE0) { 267 | self._ptr += 2; 268 | } else if (leadingByte < 0xF0) { 269 | self._ptr += 3; 270 | } else { 271 | self._ptr += 4; 272 | } 273 | // +1 is safe because 2**256 cycles are impossible 274 | result += 1; 275 | } 276 | } 277 | self._len = 0; 278 | 279 | return result; 280 | } 281 | 282 | /*////////////////////////////////////////////////////////////////////////// 283 | FILE-LEVEL FUNCTIONS 284 | //////////////////////////////////////////////////////////////////////////*/ 285 | 286 | using { asSlice, _nextRaw, _sliceIter } for StrCharsIter; 287 | 288 | /** 289 | * @dev Views the underlying data as a `bytes` subslice of the original data. 290 | */ 291 | function asSlice(StrCharsIter memory self) pure returns (Slice slice) { 292 | return Slice__.fromUnchecked(self._ptr, self._len); 293 | } 294 | 295 | /** 296 | * @dev Used internally to efficiently reuse iteration logic. Has a lot of caveats. 297 | * NEITHER checks NOR modifies iterator length. 298 | * (Caller MUST guarantee that len != 0. Caller MUST modify len correctly themselves.) 299 | * Does NOT form the character properly, and returns raw unmasked bytes and length. 300 | * Does advance the iterator pointer. 301 | * 302 | * Validates UTF-8. 303 | * For valid chars advances the pointer by charLen. 304 | * For invalid chars behaviour depends on `revertOnInvalid`: 305 | * revertOnInvalid == true: revert. 306 | * revertOnInvalid == false: advance the pointer by 1, but return charLen 0. 307 | * 308 | * @return b raw unmasked bytes; if not discarded, then charLen SHOULD be used to mask it. 309 | * @return charLen length of a valid UTF-8 char; 0 for invalid chars. 310 | * Guarantees that charLen <= self._len (as long as self._len != 0, which is the caller's guarantee) 311 | */ 312 | function _nextRaw(StrCharsIter memory self, bool revertOnInvalid) 313 | pure 314 | returns (bytes32 b, uint256 charLen) 315 | { 316 | // unchecked mload 317 | // (isValidUtf8 only checks the 1st character, which exists since caller guarantees len != 0) 318 | /// @solidity memory-safe-assembly 319 | assembly { 320 | b := mload( 321 | // load self._ptr (this is an optimization trick, since it's 1st in the struct) 322 | mload(self) 323 | ) 324 | } 325 | // validate character (0 => invalid; 1-4 => valid) 326 | charLen = isValidUtf8(b); 327 | 328 | if (charLen > self._len) { 329 | // mload didn't check bounds, 330 | // so a character that goes out of bounds could've been seen as valid. 331 | if (revertOnInvalid) revert StrChar__InvalidUTF8(); 332 | // safe because caller guarantees _len != 0 333 | unchecked { 334 | self._ptr += 1; 335 | } 336 | // invalid 337 | return (b, 0); 338 | } else if (charLen == 0) { 339 | if (revertOnInvalid) revert StrChar__InvalidUTF8(); 340 | // safe because caller guarantees _len != 0 341 | unchecked { 342 | self._ptr += 1; 343 | } 344 | // invalid 345 | return (b, 0); 346 | } else { 347 | // safe because of the `charLen > self._len` check earlier 348 | unchecked { 349 | self._ptr += charLen; 350 | } 351 | // valid 352 | return (b, charLen); 353 | } 354 | } 355 | 356 | /** 357 | * @dev Returns the underlying `SliceIter`. 358 | * AVOID USING THIS EXTERNALLY! 359 | * Advancing the underlying slice could lead to invalid UTF-8 for StrCharsIter. 360 | */ 361 | function _sliceIter(StrCharsIter memory self) pure returns (SliceIter memory result) { 362 | assembly { 363 | result := self 364 | } 365 | } -------------------------------------------------------------------------------- /test/StrChar.t.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | import { PRBTest } from "@prb/test/src/PRBTest.sol"; 6 | 7 | import { StrChar, StrChar__, StrChar__InvalidUTF8 } from "../src/StrChar.sol"; 8 | import { Unicode__InvalidCode } from "../src/utils/unicode.sol"; 9 | 10 | contract StrCharTest is PRBTest { 11 | StrCharRevertHelper revertHelper; 12 | 13 | function setUp() public { 14 | revertHelper = new StrCharRevertHelper(); 15 | } 16 | 17 | function testCmp(uint32 _a, uint32 _b) public { 18 | vm.assume( 19 | !(0xD800 <= _a && _a <= 0xDFFF) && _a <= 0x10FFFF 20 | && !(0xD800 <= _b && _b <= 0xDFFF) && _b <= 0x10FFFF 21 | ); 22 | StrChar a = StrChar__.fromCodePoint(_a); 23 | StrChar b = StrChar__.fromCodePoint(_b); 24 | 25 | if (_a < _b) { 26 | assertTrue(a.cmp(b) < 0); 27 | assertFalse(a.eq(b)); 28 | assertTrue(a.ne(b)); 29 | assertTrue(a.lt(b)); 30 | assertTrue(a.lte(b)); 31 | assertFalse(a.gt(b)); 32 | assertFalse(a.gte(b)); 33 | } else if (_a > _b) { 34 | assertTrue(a.cmp(b) > 0); 35 | assertFalse(a.eq(b)); 36 | assertTrue(a.ne(b)); 37 | assertFalse(a.lt(b)); 38 | assertFalse(a.lte(b)); 39 | assertTrue(a.gt(b)); 40 | assertTrue(a.gte(b)); 41 | } else if (_a == _b) { 42 | assertTrue(a.cmp(b) == 0); 43 | assertTrue(a.eq(b)); 44 | assertFalse(a.ne(b)); 45 | assertFalse(a.lt(b)); 46 | assertTrue(a.lte(b)); 47 | assertFalse(a.gt(b)); 48 | assertTrue(a.gte(b)); 49 | } 50 | } 51 | 52 | function testCmp__Manual() public { 53 | StrChar a = StrChar__.fromCodePoint(0x00); 54 | StrChar b = StrChar__.fromCodePoint(0x01); 55 | assertTrue(a.cmp(b) < 0); 56 | assertFalse(a.eq(b)); 57 | assertTrue(a.ne(b)); 58 | assertTrue(a.lt(b)); 59 | assertTrue(a.lte(b)); 60 | assertFalse(a.gt(b)); 61 | assertFalse(a.gte(b)); 62 | 63 | a = StrChar__.fromCodePoint(0x757); 64 | b = StrChar__.fromCodePoint(0x7); 65 | assertTrue(a.cmp(b) > 0); 66 | assertFalse(a.eq(b)); 67 | assertTrue(a.ne(b)); 68 | assertFalse(a.lt(b)); 69 | assertFalse(a.lte(b)); 70 | assertTrue(a.gt(b)); 71 | assertTrue(a.gte(b)); 72 | 73 | a = StrChar__.fromCodePoint(0x10FFFF); 74 | b = StrChar__.fromCodePoint(0x10FFFF); 75 | assertTrue(a.cmp(b) == 0); 76 | assertTrue(a.eq(b)); 77 | assertFalse(a.ne(b)); 78 | assertFalse(a.lt(b)); 79 | assertTrue(a.lte(b)); 80 | assertFalse(a.gt(b)); 81 | assertTrue(a.gte(b)); 82 | } 83 | 84 | /*////////////////////////////////////////////////////////////////////////// 85 | 1 BYTE 86 | //////////////////////////////////////////////////////////////////////////*/ 87 | 88 | function testOneByte() public { 89 | for (uint256 i; i < 0x80; i++) { 90 | StrChar char = StrChar__.fromCodePoint(i); 91 | assertTrue(char.isValidUtf8()); 92 | assertEq(char.len(), 1); 93 | assertEq(char.toCodePoint(), i); 94 | assertEq(uint256(uint8(char.toBytes32()[0])), i); 95 | assertEq(uint256(uint8(bytes(char.toString())[0])), i); 96 | } 97 | } 98 | 99 | function testOneByte__Invalid() public { 100 | for (uint256 i = 0x80; i < 0x100; i++) { 101 | vm.expectRevert(StrChar__InvalidUTF8.selector); 102 | revertHelper.from(bytes32(i << 248)); 103 | } 104 | } 105 | 106 | // anything after a valid UTF-8 character is ignored 107 | function testOneByte__Trailing() public { 108 | assertEq(StrChar__.from(bytes32(hex"0080")).toCodePoint(), 0); 109 | assertEq(StrChar__.from(bytes32(hex"0011111111")).toCodePoint(), 0); 110 | } 111 | 112 | /*////////////////////////////////////////////////////////////////////////// 113 | 2 BYTES 114 | //////////////////////////////////////////////////////////////////////////*/ 115 | 116 | function testTwoByte() public { 117 | for (uint256 i = 0x80; i < 0x800; i++) { 118 | StrChar char = StrChar__.fromCodePoint(i); 119 | assertTrue(char.isValidUtf8()); 120 | assertEq(char.len(), 2); 121 | assertEq(char.toCodePoint(), i); 122 | } 123 | } 124 | 125 | // testing against solidity's own encoder 126 | function testTwoByte__Manual() public { 127 | assertEq(StrChar__.fromCodePoint(0x80).toBytes32(), bytes32("\u0080")); 128 | assertEq(StrChar__.fromCodePoint(0x80).toString(), string("\u0080")); 129 | assertEq(StrChar__.fromCodePoint(0x81).toBytes32(), bytes32("\u0081")); 130 | assertEq(StrChar__.fromCodePoint(0x81).toString(), string("\u0081")); 131 | assertEq(StrChar__.fromCodePoint(0x100).toBytes32(), bytes32("\u0100")); 132 | assertEq(StrChar__.fromCodePoint(0x100).toString(), string("\u0100")); 133 | assertEq(StrChar__.fromCodePoint(0x101).toBytes32(), bytes32("\u0101")); 134 | assertEq(StrChar__.fromCodePoint(0x101).toString(), string("\u0101")); 135 | assertEq(StrChar__.fromCodePoint(0x256).toBytes32(), bytes32("\u0256")); 136 | assertEq(StrChar__.fromCodePoint(0x256).toString(), string("\u0256")); 137 | assertEq(StrChar__.fromCodePoint(0x600).toBytes32(), bytes32("\u0600")); 138 | assertEq(StrChar__.fromCodePoint(0x600).toString(), string("\u0600")); 139 | assertEq(StrChar__.fromCodePoint(0x799).toBytes32(), bytes32("\u0799")); 140 | assertEq(StrChar__.fromCodePoint(0x799).toString(), string("\u0799")); 141 | } 142 | 143 | function testTwoByte__Invalid() public { 144 | vm.expectRevert(StrChar__InvalidUTF8.selector); 145 | revertHelper.from(bytes32(hex"E000")); 146 | vm.expectRevert(StrChar__InvalidUTF8.selector); 147 | revertHelper.from(bytes32(hex"E555")); 148 | vm.expectRevert(StrChar__InvalidUTF8.selector); 149 | revertHelper.from(bytes32(hex"FFFF")); 150 | vm.expectRevert(StrChar__InvalidUTF8.selector); 151 | revertHelper.from(bytes32(hex"C000")); 152 | vm.expectRevert(StrChar__InvalidUTF8.selector); 153 | revertHelper.from(bytes32(hex"C080")); 154 | vm.expectRevert(StrChar__InvalidUTF8.selector); 155 | revertHelper.from(bytes32(hex"C0C0")); 156 | vm.expectRevert(StrChar__InvalidUTF8.selector); 157 | revertHelper.from(bytes32(hex"C190")); 158 | } 159 | 160 | function testTwoByte__Trailing() public { 161 | assertEq(StrChar__.from(bytes32(hex"C280111111")).toCodePoint(), 0x80); 162 | assertEq(StrChar__.from(bytes32(hex"C28000FFFF")).toCodePoint(), 0x80); 163 | } 164 | 165 | /*////////////////////////////////////////////////////////////////////////// 166 | 3 BYTES 167 | //////////////////////////////////////////////////////////////////////////*/ 168 | 169 | function testThreeByte() public { 170 | for (uint256 i = 0x800; i < 0x10000; i++) { 171 | if (0xD800 <= i && i <= 0xDFFF) { 172 | // skip surrogate halves 173 | continue; 174 | } 175 | StrChar char = StrChar__.fromCodePoint(i); 176 | assertTrue(char.isValidUtf8()); 177 | assertEq(char.len(), 3); 178 | assertEq(char.toCodePoint(), i); 179 | } 180 | } 181 | 182 | function testThreeByte__InvalidSurrogateHalf() public { 183 | for (uint256 i = 0xD800; i <= 0xDFFF; i++) { 184 | vm.expectRevert(Unicode__InvalidCode.selector); 185 | revertHelper.fromCodePoint(i); 186 | } 187 | } 188 | 189 | function testThreeByte__Manual() public { 190 | assertEq(StrChar__.fromCodePoint(0x800).toBytes32(), bytes32("\u0800")); 191 | assertEq(StrChar__.fromCodePoint(0x800).toString(), string("\u0800")); 192 | assertEq(StrChar__.fromCodePoint(0x801).toBytes32(), bytes32("\u0801")); 193 | assertEq(StrChar__.fromCodePoint(0x801).toString(), string("\u0801")); 194 | assertEq(StrChar__.fromCodePoint(0x999).toBytes32(), bytes32("\u0999")); 195 | assertEq(StrChar__.fromCodePoint(0x999).toString(), string("\u0999")); 196 | assertEq(StrChar__.fromCodePoint(0xFFF).toBytes32(), bytes32("\u0FFF")); 197 | assertEq(StrChar__.fromCodePoint(0xFFF).toString(), string("\u0FFF")); 198 | assertEq(StrChar__.fromCodePoint(0x1000).toBytes32(), bytes32("\u1000")); 199 | assertEq(StrChar__.fromCodePoint(0x1000).toString(), string("\u1000")); 200 | assertEq(StrChar__.fromCodePoint(0x1001).toBytes32(), bytes32("\u1001")); 201 | assertEq(StrChar__.fromCodePoint(0x1001).toString(), string("\u1001")); 202 | assertEq(StrChar__.fromCodePoint(0x2500).toBytes32(), bytes32("\u2500")); 203 | assertEq(StrChar__.fromCodePoint(0x2500).toString(), string("\u2500")); 204 | assertEq(StrChar__.fromCodePoint(0xD799).toBytes32(), bytes32("\uD799")); 205 | assertEq(StrChar__.fromCodePoint(0xD799).toString(), string("\uD799")); 206 | assertEq(StrChar__.fromCodePoint(0xE000).toBytes32(), bytes32("\uE000")); 207 | assertEq(StrChar__.fromCodePoint(0xE000).toString(), string("\uE000")); 208 | assertEq(StrChar__.fromCodePoint(0xF0FF).toBytes32(), bytes32("\uF0FF")); 209 | assertEq(StrChar__.fromCodePoint(0xF0FF).toString(), string("\uF0FF")); 210 | assertEq(StrChar__.fromCodePoint(0xFFFF).toBytes32(), bytes32("\uFFFF")); 211 | assertEq(StrChar__.fromCodePoint(0xFFFF).toString(), string("\uFFFF")); 212 | } 213 | 214 | function testThreeByte__Invalid() public { 215 | vm.expectRevert(StrChar__InvalidUTF8.selector); 216 | revertHelper.from(bytes32(hex"F00000")); 217 | vm.expectRevert(StrChar__InvalidUTF8.selector); 218 | revertHelper.from(bytes32(hex"F08080")); 219 | vm.expectRevert(StrChar__InvalidUTF8.selector); 220 | revertHelper.from(bytes32(hex"FFFFFF")); 221 | vm.expectRevert(StrChar__InvalidUTF8.selector); 222 | revertHelper.from(bytes32(hex"E08080")); 223 | vm.expectRevert(StrChar__InvalidUTF8.selector); 224 | revertHelper.from(bytes32(hex"E09F80")); 225 | vm.expectRevert(StrChar__InvalidUTF8.selector); 226 | revertHelper.from(bytes32(hex"E0C080")); 227 | vm.expectRevert(StrChar__InvalidUTF8.selector); 228 | revertHelper.from(bytes32(hex"E0A07F")); 229 | vm.expectRevert(StrChar__InvalidUTF8.selector); 230 | revertHelper.from(bytes32(hex"EDA080")); 231 | } 232 | 233 | function testThreeByte__Trailing() public { 234 | assertEq(StrChar__.from(bytes32(hex"E0A0801111")).toCodePoint(), 0x800); 235 | assertEq(StrChar__.from(bytes32(hex"E0A08000FF")).toCodePoint(), 0x800); 236 | } 237 | 238 | /*////////////////////////////////////////////////////////////////////////// 239 | 4 BYTES 240 | //////////////////////////////////////////////////////////////////////////*/ 241 | 242 | function testFourByte() public { 243 | // it's a ~million, don't really want to loop the whole thing (takes like 15 secs), 244 | // so just take 65k from each side 245 | for (uint256 i = 0x10000; i < 0x20000; i++) { 246 | StrChar char = StrChar__.fromCodePoint(i); 247 | assertTrue(char.isValidUtf8()); 248 | assertEq(char.len(), 4); 249 | assertEq(char.toCodePoint(), i); 250 | } 251 | for (uint256 i = 0x100000; i <= 0x10FFFF; i++) { 252 | StrChar char = StrChar__.fromCodePoint(i); 253 | assertTrue(char.isValidUtf8()); 254 | assertEq(char.len(), 4); 255 | assertEq(char.toCodePoint(), i); 256 | } 257 | } 258 | 259 | function testFourByte__Manual() public { 260 | // solidity's \u doesn't work with 4-byte code points :( 261 | assertEq(StrChar__.fromCodePoint(0x10000).toBytes32(), unicode"𐀀"); 262 | assertEq(StrChar__.fromCodePoint(0x10000).toString(), unicode"𐀀"); 263 | assertEq(StrChar__.fromCodePoint(0x10001).toBytes32(), unicode"𐀁"); 264 | assertEq(StrChar__.fromCodePoint(0x10001).toString(), unicode"𐀁"); 265 | assertEq(StrChar__.fromCodePoint(0x20000).toBytes32(), unicode"𠀀"); 266 | assertEq(StrChar__.fromCodePoint(0x20000).toString(), unicode"𠀀"); 267 | assertEq(StrChar__.fromCodePoint(0x34567).toBytes32(), unicode"𴕧"); 268 | assertEq(StrChar__.fromCodePoint(0x34567).toString(), unicode"𴕧"); 269 | assertEq(StrChar__.fromCodePoint(0xF0000).toBytes32(), unicode"󰀀"); 270 | assertEq(StrChar__.fromCodePoint(0xF0000).toString(), unicode"󰀀"); 271 | assertEq(StrChar__.fromCodePoint(0xFFFFF).toBytes32(), unicode"󿿿"); 272 | assertEq(StrChar__.fromCodePoint(0xFFFFF).toString(), unicode"󿿿"); 273 | assertEq(StrChar__.fromCodePoint(0x100000).toBytes32(), unicode"􀀀"); 274 | assertEq(StrChar__.fromCodePoint(0x100000).toString(), unicode"􀀀"); 275 | assertEq(StrChar__.fromCodePoint(0x10FFFF).toBytes32(), unicode"􏿿"); 276 | assertEq(StrChar__.fromCodePoint(0x10FFFF).toString(), unicode"􏿿"); 277 | } 278 | 279 | function testFourByte__Invalid() public { 280 | vm.expectRevert(StrChar__InvalidUTF8.selector); 281 | revertHelper.from(bytes32(hex"F0000000")); 282 | vm.expectRevert(StrChar__InvalidUTF8.selector); 283 | revertHelper.from(bytes32(hex"F0808080")); 284 | vm.expectRevert(StrChar__InvalidUTF8.selector); 285 | revertHelper.from(bytes32(hex"FFFFFFFF")); 286 | vm.expectRevert(StrChar__InvalidUTF8.selector); 287 | revertHelper.from(bytes32(hex"F08F8080")); 288 | vm.expectRevert(StrChar__InvalidUTF8.selector); 289 | revertHelper.from(bytes32(hex"F0C08080")); 290 | vm.expectRevert(StrChar__InvalidUTF8.selector); 291 | revertHelper.from(bytes32(hex"F17F8080")); 292 | vm.expectRevert(StrChar__InvalidUTF8.selector); 293 | revertHelper.from(bytes32(hex"F4908080")); 294 | vm.expectRevert(StrChar__InvalidUTF8.selector); 295 | revertHelper.from(bytes32(hex"F4BF8080")); 296 | } 297 | 298 | function testFourByte__Trailing() public { 299 | assertEq(StrChar__.from(bytes32(hex"F09080801111")).toCodePoint(), 0x10000); 300 | assertEq(StrChar__.from(bytes32(hex"F090808000FF")).toCodePoint(), 0x10000); 301 | } 302 | 303 | /*////////////////////////////////////////////////////////////////////////// 304 | ASCII 305 | //////////////////////////////////////////////////////////////////////////*/ 306 | 307 | function testIsAscii() public { 308 | for (uint256 i; i < 0x80; i++) { 309 | assertTrue(StrChar__.fromCodePoint(i).isAscii()); 310 | } 311 | 312 | for (uint256 i = 0x80; i < 0x20000; i++) { 313 | if (0xD800 <= i && i <= 0xDFFF) { 314 | // skip surrogate halves 315 | continue; 316 | } 317 | assertFalse(StrChar__.fromCodePoint(i).isAscii()); 318 | } 319 | assertFalse(StrChar__.fromCodePoint(0x10FFFF).isAscii()); 320 | } 321 | } 322 | 323 | contract StrCharRevertHelper { 324 | function from(bytes32 b) public pure returns (StrChar char) { 325 | return StrChar__.from(b); 326 | } 327 | 328 | function fromCodePoint(uint256 code) public pure returns (StrChar char) { 329 | return StrChar__.fromCodePoint(code); 330 | } 331 | } -------------------------------------------------------------------------------- /src/StrSlice.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | import { Slice, Slice__, Slice__OutOfBounds } from "./Slice.sol"; 6 | import { StrChar, StrChar__ } from "./StrChar.sol"; 7 | import { StrCharsIter, StrCharsIter__ } from "./StrCharsIter.sol"; 8 | import { isValidUtf8 } from "./utils/utf8.sol"; 9 | import { memIsAscii } from "./utils/memascii.sol"; 10 | import { PackPtrLen } from "./utils/PackPtrLen.sol"; 11 | 12 | /** 13 | * @title A string slice. 14 | * @dev String slices must always be valid UTF-8. 15 | * Internally `StrSlice` uses `Slice`, adding only UTF-8 related logic on top. 16 | */ 17 | type StrSlice is uint256; 18 | 19 | /*////////////////////////////////////////////////////////////////////////// 20 | CUSTOM ERRORS 21 | //////////////////////////////////////////////////////////////////////////*/ 22 | 23 | error StrSlice__InvalidCharBoundary(); 24 | 25 | /*////////////////////////////////////////////////////////////////////////// 26 | STATIC FUNCTIONS 27 | //////////////////////////////////////////////////////////////////////////*/ 28 | 29 | library StrSlice__ { 30 | /** 31 | * @dev Converts a `string` to a `StrSlice`. 32 | * The string is not copied. 33 | * `StrSlice` points to the memory of `string`, right after the length word. 34 | */ 35 | function from(string memory str) internal pure returns (StrSlice slice) { 36 | uint256 _ptr; 37 | assembly { 38 | _ptr := add(str, 0x20) 39 | } 40 | return fromRawParts(_ptr, bytes(str).length); 41 | } 42 | 43 | /** 44 | * @dev Creates a new `StrSlice` directly from length and memory pointer. 45 | * Note that the caller MUST guarantee memory-safety. 46 | * This method is primarily for internal use. 47 | */ 48 | function fromRawParts(uint256 _ptr, uint256 _len) internal pure returns (StrSlice slice) { 49 | return StrSlice.wrap(Slice.unwrap( 50 | Slice__.fromRawParts(_ptr, _len) 51 | )); 52 | } 53 | 54 | /** 55 | * @dev Returns true if the byte slice starts with a valid UTF-8 character. 56 | * Note this does not validate the whole slice. 57 | */ 58 | function isBoundaryStart(Slice slice) internal pure returns (bool) { 59 | bytes32 b = slice.toBytes32(); 60 | return isValidUtf8(b) != 0; 61 | } 62 | } 63 | 64 | /** 65 | * @dev Alternative to StrSlice__.from() 66 | * Put this in your file (using for global is only for user-defined types): 67 | * ``` 68 | * using { toSlice } for string; 69 | * ``` 70 | */ 71 | function toSlice(string memory str) pure returns (StrSlice slice) { 72 | return StrSlice__.from(str); 73 | } 74 | 75 | /*////////////////////////////////////////////////////////////////////////// 76 | GLOBAL FUNCTIONS 77 | //////////////////////////////////////////////////////////////////////////*/ 78 | 79 | using { 80 | asSlice, 81 | ptr, len, isEmpty, 82 | // conversion 83 | toString, 84 | keccak, 85 | // concatenation 86 | add, join, 87 | // compare 88 | cmp, eq, ne, lt, lte, gt, gte, 89 | // index 90 | isCharBoundary, 91 | get, 92 | splitAt, getSubslice, 93 | // search 94 | find, rfind, contains, 95 | startsWith, endsWith, 96 | // modify 97 | stripPrefix, stripSuffix, 98 | splitOnce, rsplitOnce, 99 | replacen, 100 | // iteration 101 | chars, 102 | // ascii 103 | isAscii 104 | } for StrSlice global; 105 | 106 | /** 107 | * @dev Returns the underlying `Slice`. 108 | * WARNING: manipulating `Slice`s can break UTF-8 for related `StrSlice`s! 109 | */ 110 | function asSlice(StrSlice self) pure returns (Slice) { 111 | return Slice.wrap(StrSlice.unwrap(self)); 112 | } 113 | 114 | /** 115 | * @dev Returns the pointer to the start of an in-memory string slice. 116 | * This method is primarily for internal use. 117 | */ 118 | function ptr(StrSlice self) pure returns (uint256) { 119 | return StrSlice.unwrap(self) >> 128; 120 | } 121 | 122 | /** 123 | * @dev Returns the length in bytes, not codepoints. 124 | */ 125 | function len(StrSlice self) pure returns (uint256) { 126 | return StrSlice.unwrap(self) & PackPtrLen.MASK_LEN; 127 | } 128 | 129 | /** 130 | * @dev Returns true if the slice has a length of 0. 131 | */ 132 | function isEmpty(StrSlice self) pure returns (bool) { 133 | return StrSlice.unwrap(self) & PackPtrLen.MASK_LEN == 0; 134 | } 135 | 136 | /** 137 | * @dev Copies `StrSlice` to a newly allocated string. 138 | * The `StrSlice` will NOT point to the new string. 139 | */ 140 | function toString(StrSlice self) view returns (string memory) { 141 | return string(self.asSlice().toBytes()); 142 | } 143 | 144 | /** 145 | * @dev Returns keccak256 of all the bytes of `StrSlice`. 146 | * Note that for any `string memory b`, keccak256(b) == b.toSlice().keccak() 147 | * (keccak256 does not include the length byte) 148 | */ 149 | function keccak(StrSlice self) pure returns (bytes32 result) { 150 | return self.asSlice().keccak(); 151 | } 152 | 153 | /** 154 | * @dev Concatenates two `StrSlice`s into a newly allocated string. 155 | */ 156 | function add(StrSlice self, StrSlice other) view returns (string memory) { 157 | return string(self.asSlice().add(other.asSlice())); 158 | } 159 | 160 | /** 161 | * @dev Flattens an array of `StrSlice`s into a single newly allocated string, 162 | * placing `self` as the separator between each. 163 | */ 164 | function join(StrSlice self, StrSlice[] memory strs) view returns (string memory) { 165 | Slice[] memory slices; 166 | assembly { 167 | slices := strs 168 | } 169 | return string(self.asSlice().join(slices)); 170 | } 171 | 172 | /** 173 | * @dev Compare string slices lexicographically. 174 | * @return result 0 for equal, < 0 for less than and > 0 for greater than. 175 | */ 176 | function cmp(StrSlice self, StrSlice other) pure returns (int256 result) { 177 | return self.asSlice().cmp(other.asSlice()); 178 | } 179 | 180 | /// @dev `self` == `other` 181 | /// Note more efficient than cmp 182 | function eq(StrSlice self, StrSlice other) pure returns (bool) { 183 | return self.asSlice().eq(other.asSlice()); 184 | } 185 | 186 | /// @dev `self` != `other` 187 | /// Note more efficient than cmp 188 | function ne(StrSlice self, StrSlice other) pure returns (bool) { 189 | return self.asSlice().ne(other.asSlice()); 190 | } 191 | 192 | /// @dev `self` < `other` 193 | function lt(StrSlice self, StrSlice other) pure returns (bool) { 194 | return self.cmp(other) < 0; 195 | } 196 | 197 | /// @dev `self` <= `other` 198 | function lte(StrSlice self, StrSlice other) pure returns (bool) { 199 | return self.cmp(other) <= 0; 200 | } 201 | 202 | /// @dev `self` > `other` 203 | function gt(StrSlice self, StrSlice other) pure returns (bool) { 204 | return self.cmp(other) > 0; 205 | } 206 | 207 | /// @dev `self` >= `other` 208 | function gte(StrSlice self, StrSlice other) pure returns (bool) { 209 | return self.cmp(other) >= 0; 210 | } 211 | 212 | /** 213 | * @dev Checks that `index`-th byte is safe to split on. 214 | * The start and end of the string (when index == self.len()) are considered to be boundaries. 215 | * Returns false if index is greater than self.len(). 216 | */ 217 | function isCharBoundary(StrSlice self, uint256 index) pure returns (bool) { 218 | if (index < self.len()) { 219 | return isValidUtf8(self.asSlice().getAfter(index).toBytes32()) != 0; 220 | } else if (index == self.len()) { 221 | return true; 222 | } else { 223 | return false; 224 | } 225 | } 226 | 227 | /** 228 | * @dev Returns the character at `index` (in bytes). 229 | * Reverts if index is out of bounds. 230 | */ 231 | function get(StrSlice self, uint256 index) pure returns (StrChar char) { 232 | bytes32 b = self.asSlice().getAfterStrict(index).toBytes32(); 233 | uint256 charLen = isValidUtf8(b); 234 | if (charLen == 0) revert StrSlice__InvalidCharBoundary(); 235 | return StrChar__.fromUnchecked(b, charLen); 236 | } 237 | 238 | /** 239 | * @dev Divides one string slice into two at an index. 240 | * Reverts when splitting on a non-boundary (use isCharBoundary). 241 | */ 242 | function splitAt(StrSlice self, uint256 mid) pure returns (StrSlice, StrSlice) { 243 | (Slice lSlice, Slice rSlice) = self.asSlice().splitAt(mid); 244 | if (!StrSlice__.isBoundaryStart(lSlice) || !StrSlice__.isBoundaryStart(rSlice)) { 245 | revert StrSlice__InvalidCharBoundary(); 246 | } 247 | return ( 248 | StrSlice.wrap(Slice.unwrap(lSlice)), 249 | StrSlice.wrap(Slice.unwrap(rSlice)) 250 | ); 251 | } 252 | 253 | /** 254 | * @dev Returns a subslice [start..end) of `self`. 255 | * Reverts when slicing a non-boundary (use isCharBoundary). 256 | */ 257 | function getSubslice(StrSlice self, uint256 start, uint256 end) pure returns (StrSlice) { 258 | Slice subslice = self.asSlice().getSubslice(start, end); 259 | if (!StrSlice__.isBoundaryStart(subslice)) revert StrSlice__InvalidCharBoundary(); 260 | if (end != self.len()) { 261 | (, Slice nextSubslice) = self.asSlice().splitAt(end); 262 | if (!StrSlice__.isBoundaryStart(nextSubslice)) revert StrSlice__InvalidCharBoundary(); 263 | } 264 | return StrSlice.wrap(Slice.unwrap(subslice)); 265 | } 266 | 267 | /** 268 | * @dev Returns the byte index of the first slice of `self` that matches `pattern`. 269 | * Returns type(uint256).max if the `pattern` does not match. 270 | */ 271 | function find(StrSlice self, StrSlice pattern) pure returns (uint256) { 272 | return self.asSlice().find(pattern.asSlice()); 273 | } 274 | 275 | /** 276 | * @dev Returns the byte index of the last slice of `self` that matches `pattern`. 277 | * Returns type(uint256).max if the `pattern` does not match. 278 | */ 279 | function rfind(StrSlice self, StrSlice pattern) pure returns (uint256) { 280 | return self.asSlice().rfind(pattern.asSlice()); 281 | } 282 | 283 | /** 284 | * @dev Returns true if the given pattern matches a sub-slice of this string slice. 285 | */ 286 | function contains(StrSlice self, StrSlice pattern) pure returns (bool) { 287 | return self.asSlice().contains(pattern.asSlice()); 288 | } 289 | 290 | /** 291 | * @dev Returns true if the given pattern matches a prefix of this string slice. 292 | */ 293 | function startsWith(StrSlice self, StrSlice pattern) pure returns (bool) { 294 | return self.asSlice().startsWith(pattern.asSlice()); 295 | } 296 | 297 | /** 298 | * @dev Returns true if the given pattern matches a suffix of this string slice. 299 | */ 300 | function endsWith(StrSlice self, StrSlice pattern) pure returns (bool) { 301 | return self.asSlice().endsWith(pattern.asSlice()); 302 | } 303 | 304 | /** 305 | * @dev Returns a subslice with the prefix removed. 306 | * If it does not start with `prefix`, returns `self` unmodified. 307 | */ 308 | function stripPrefix(StrSlice self, StrSlice pattern) pure returns (StrSlice result) { 309 | return StrSlice.wrap(Slice.unwrap( 310 | self.asSlice().stripPrefix(pattern.asSlice()) 311 | )); 312 | } 313 | 314 | /** 315 | * @dev Returns a subslice with the suffix removed. 316 | * If it does not end with `suffix`, returns `self` unmodified. 317 | */ 318 | function stripSuffix(StrSlice self, StrSlice pattern) pure returns (StrSlice result) { 319 | return StrSlice.wrap(Slice.unwrap( 320 | self.asSlice().stripSuffix(pattern.asSlice()) 321 | )); 322 | } 323 | 324 | /** 325 | * @dev Splits a slice into 2 on the first match of `pattern`. 326 | * If found == true, `prefix` and `suffix` will be strictly before and after the match. 327 | * If found == false, `prefix` will be the entire string and `suffix` will be empty. 328 | */ 329 | function splitOnce(StrSlice self, StrSlice pattern) 330 | pure 331 | returns (bool found, StrSlice prefix, StrSlice suffix) 332 | { 333 | uint256 index = self.asSlice().find(pattern.asSlice()); 334 | if (index == type(uint256).max) { 335 | // not found 336 | return (false, self, StrSlice.wrap(0)); 337 | } else { 338 | // found 339 | return self._splitFound(index, pattern.len()); 340 | } 341 | } 342 | 343 | /** 344 | * @dev Splits a slice into 2 on the last match of `pattern`. 345 | * If found == true, `prefix` and `suffix` will be strictly before and after the match. 346 | * If found == false, `prefix` will be empty and `suffix` will be the entire string. 347 | */ 348 | function rsplitOnce(StrSlice self, StrSlice pattern) 349 | pure 350 | returns (bool found, StrSlice prefix, StrSlice suffix) 351 | { 352 | uint256 index = self.asSlice().rfind(pattern.asSlice()); 353 | if (index == type(uint256).max) { 354 | // not found 355 | return (false, StrSlice.wrap(0), self); 356 | } else { 357 | // found 358 | return self._splitFound(index, pattern.len()); 359 | } 360 | } 361 | 362 | /** 363 | * *EXPERIMENTAL* 364 | * @dev Replaces first `n` matches of a pattern with another string slice. 365 | * Returns the result in a newly allocated string. 366 | * Note this does not modify the string `self` is a slice of. 367 | * WARNING: Requires 0 < pattern.len() <= to.len() 368 | */ 369 | function replacen( 370 | StrSlice self, 371 | StrSlice pattern, 372 | StrSlice to, 373 | uint256 n 374 | ) view returns (string memory str) { 375 | uint256 patLen = pattern.len(); 376 | uint256 toLen = to.len(); 377 | // TODO dynamic string; atm length can be reduced but not increased 378 | assert(patLen >= toLen); 379 | assert(patLen > 0); 380 | 381 | str = new string(self.len()); 382 | Slice iterSlice = self.asSlice(); 383 | Slice resultSlice = Slice__.from(bytes(str)); 384 | 385 | uint256 matchNum; 386 | while (matchNum < n) { 387 | uint256 index = iterSlice.find(pattern.asSlice()); 388 | // break if no more matches 389 | if (index == type(uint256).max) break; 390 | // copy prefix 391 | if (index > 0) { 392 | resultSlice 393 | .getBefore(index) 394 | .copyFromSlice( 395 | iterSlice.getBefore(index) 396 | ); 397 | } 398 | 399 | uint256 indexToEnd; 400 | // TODO this is fine atm only because patLen <= toLen 401 | unchecked { 402 | indexToEnd = index + toLen; 403 | } 404 | 405 | // copy replacement 406 | resultSlice 407 | .getSubslice(index, indexToEnd) 408 | .copyFromSlice(to.asSlice()); 409 | 410 | // advance slices past the match 411 | iterSlice = iterSlice.getAfter(index + patLen); 412 | resultSlice = resultSlice.getAfter(indexToEnd); 413 | 414 | // break if iterSlice is done 415 | if (iterSlice.len() == 0) { 416 | break; 417 | } 418 | // safe because of `while` condition 419 | unchecked { 420 | matchNum++; 421 | } 422 | } 423 | 424 | uint256 realLen = resultSlice.ptr() - StrSlice__.from(str).ptr(); 425 | // copy suffix 426 | uint256 iterLen = iterSlice.len(); 427 | if (iterLen > 0) { 428 | resultSlice 429 | .getBefore(iterLen) 430 | .copyFromSlice(iterSlice); 431 | realLen += iterLen; 432 | } 433 | // remove extra length 434 | if (bytes(str).length != realLen) { 435 | // TODO atm only accepting patLen <= toLen 436 | assert(realLen <= bytes(str).length); 437 | /// @solidity memory-safe-assembly 438 | assembly { 439 | mstore(str, realLen) 440 | } 441 | } 442 | return str; 443 | } 444 | 445 | /** 446 | * @dev Returns an character iterator over the slice. 447 | * The iterator yields items from either side. 448 | */ 449 | function chars(StrSlice self) pure returns (StrCharsIter memory) { 450 | return StrCharsIter(self.ptr(), self.len()); 451 | } 452 | 453 | /** 454 | * @dev Checks if all characters are within the ASCII range. 455 | * 456 | * Note this does NOT explicitly validate UTF-8. 457 | * Whereas ASCII certainly is valid UTF-8, non-ASCII *could* be invalid UTF-8. 458 | * Use `StrCharsIter` for explicit validation. 459 | */ 460 | function isAscii(StrSlice self) pure returns (bool) { 461 | return memIsAscii(self.ptr(), self.len()); 462 | } 463 | 464 | /*////////////////////////////////////////////////////////////////////////// 465 | FILE FUNCTIONS 466 | //////////////////////////////////////////////////////////////////////////*/ 467 | 468 | using { _splitFound } for StrSlice; 469 | 470 | /** 471 | * @dev Splits a slice into [:index] and [index+patLen:]. 472 | * CALLER GUARANTEE: `index` < self.len() 473 | * For internal use by split/rsplit. 474 | * 475 | * This is mostly just a faster alternative to `getBefore`+`getAfter`. 476 | */ 477 | function _splitFound(StrSlice self, uint256 index, uint256 patLen) 478 | pure 479 | returns (bool, StrSlice prefix, StrSlice suffix) 480 | { 481 | uint256 selfPtr = self.ptr(); 482 | uint256 selfLen = self.len(); 483 | uint256 indexAfterPat; 484 | // safe because caller guarantees index to be < selfLen 485 | unchecked { 486 | indexAfterPat = index + patLen; 487 | if (indexAfterPat > selfLen) revert Slice__OutOfBounds(); 488 | } 489 | // [:index] (inlined `getBefore`) 490 | prefix = StrSlice.wrap(Slice.unwrap( 491 | Slice__.fromUnchecked(selfPtr, index) 492 | )); 493 | // [(index+patLen):] (inlined `getAfter`) 494 | // safe because indexAfterPat <= selfLen 495 | unchecked { 496 | suffix = StrSlice.wrap(Slice.unwrap( 497 | Slice__.fromUnchecked(selfPtr + indexAfterPat, selfLen - indexAfterPat) 498 | )); 499 | } 500 | return (true, prefix, suffix); 501 | } -------------------------------------------------------------------------------- /test/Slice.t.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | import { PRBTest } from "@prb/test/src/PRBTest.sol"; 6 | import { SliceAssertions } from "../src/test/SliceAssertions.sol"; 7 | 8 | import { Slice, Slice__, toSlice } from "../src/Slice.sol"; 9 | import { Slice__OutOfBounds } from "../src/Slice.sol"; 10 | 11 | using { toSlice } for bytes; 12 | 13 | contract SliceTest is PRBTest, SliceAssertions { 14 | function checkOffset(bytes memory b1, bytes memory b2, uint256 offset) internal { 15 | require(b2.length <= b1.length, "checkOffset expects b2.length <= b1.length"); 16 | for (uint256 i; i < b2.length; i++) { 17 | assertEq(b1[offset + i], b2[i]); 18 | } 19 | } 20 | 21 | function testLen(bytes calldata _b) public { 22 | assertEq(_b.toSlice().len(), _b.length); 23 | } 24 | 25 | function testIsEmpty() public { 26 | assertTrue(bytes("").toSlice().isEmpty()); 27 | assertFalse(new bytes(1).toSlice().isEmpty()); 28 | } 29 | 30 | function testToBytes(bytes calldata _b) public { 31 | assertEq(_b, _b.toSlice().toBytes()); 32 | } 33 | 34 | function testToBytes32(bytes memory _b) public { 35 | bytes32 b32; 36 | if (_b.length > 0) { 37 | /// @solidity memory-safe-assembly 38 | assembly { 39 | b32 := mload(add(_b, 0x20)) 40 | } 41 | } 42 | assertEq(b32, _b.toSlice().toBytes32()); 43 | } 44 | 45 | function testKeccak__Eq(bytes calldata _b) public { 46 | bytes memory b1 = _b; 47 | bytes memory b2 = _b; 48 | 49 | assertEq(b1.toSlice().keccak(), b2.toSlice().keccak()); 50 | assertEq(keccak256(b1), keccak256(b2)); 51 | assertEq(b1.toSlice().keccak(), keccak256(b1)); 52 | } 53 | 54 | function testKeccak__NotEq(bytes calldata _b) public { 55 | vm.assume(_b.length > 0); 56 | bytes memory b1 = _b; 57 | bytes memory b2 = _b; 58 | 59 | uint256 i = uint256(keccak256(abi.encode(_b, "i"))) % _b.length; 60 | b1[i] ^= 0x01; 61 | assertEq(b1.toSlice().keccak(), keccak256(b1)); 62 | assertNotEq(b1.toSlice().keccak(), b2.toSlice().keccak()); 63 | assertNotEq(keccak256(b1), keccak256(b2)); 64 | } 65 | 66 | /*////////////////////////////////////////////////////////////////////////// 67 | COMPARE 68 | //////////////////////////////////////////////////////////////////////////*/ 69 | 70 | // don't use slice assertions here, since that'd be testing them with themselves 71 | function testCmp() public { 72 | assertGt(toSlice("1").cmp(toSlice("0")), 0); 73 | assertEq(toSlice("1").cmp(toSlice("1")), 0); 74 | assertLt(toSlice("0").cmp(toSlice("1")), 0); 75 | assertGt(toSlice("1").cmp(toSlice("")), 0); 76 | assertEq(toSlice("").cmp(toSlice("")), 0); 77 | assertLt(toSlice("").cmp(toSlice("1")), 0); 78 | assertGt(toSlice("12").cmp(toSlice("1")), 0); 79 | assertLt(toSlice("1").cmp(toSlice("12")), 0); 80 | } 81 | 82 | function testCmp__Long() public { 83 | bytes memory b0 = "1234567890______________________________________________________0"; 84 | bytes memory b1 = "1234567890______________________________________________________1"; 85 | bytes memory b12 = "1234567890______________________________________________________12"; 86 | bytes memory bn = "1234567890______________________________________________________"; 87 | 88 | assertGt(toSlice(b1).cmp(toSlice(b0)), 0); 89 | assertEq(toSlice(b1).cmp(toSlice(b1)), 0); 90 | assertLt(toSlice(b0).cmp(toSlice(b1)), 0); 91 | assertGt(toSlice(b1).cmp(toSlice(bn)), 0); 92 | assertEq(toSlice(bn).cmp(toSlice(bn)), 0); 93 | assertLt(toSlice(bn).cmp(toSlice(b1)), 0); 94 | assertGt(toSlice(b12).cmp(toSlice(b1)), 0); 95 | assertLt(toSlice(b1).cmp(toSlice(b12)), 0); 96 | } 97 | 98 | // TODO more comparison tests for specialized funcs 99 | 100 | /*////////////////////////////////////////////////////////////////////////// 101 | COPY 102 | //////////////////////////////////////////////////////////////////////////*/ 103 | 104 | function _copyFromValue(uint256 length, bytes32 value) internal pure returns (Slice slice) { 105 | bytes memory b = new bytes(length); 106 | slice = b.toSlice(); 107 | slice.copyFromValue(value, length); 108 | } 109 | 110 | function _copyFromValueRightAligned(uint256 length, bytes32 value) internal pure returns (Slice slice) { 111 | bytes memory b = new bytes(length); 112 | slice = b.toSlice(); 113 | slice.copyFromValueRightAligned(value, length); 114 | } 115 | 116 | function testCopyFromSlice(bytes calldata _b) public { 117 | Slice sliceSrc = _b.toSlice(); 118 | 119 | bytes memory bDest = new bytes(_b.length); 120 | Slice sliceDest = bDest.toSlice(); 121 | sliceDest.copyFromSlice(sliceSrc); 122 | 123 | assertEq(sliceDest, sliceSrc); 124 | } 125 | 126 | function testCopyFromValue__Fuzz(bytes32 value) public { 127 | bytes memory b = new bytes(32); 128 | Slice slice = b.toSlice(); 129 | 130 | slice.copyFromValue(value, 32); 131 | 132 | assertEq(slice, abi.encodePacked(value)); 133 | } 134 | 135 | function testCopyFromValue__LeftAligned() public { 136 | bytes1 v1 = "1"; 137 | assertEq(_copyFromValue(1, bytes32(v1)), abi.encodePacked(v1)); 138 | 139 | bytes2 v2 = "22"; 140 | assertEq(_copyFromValue(2, bytes32(v2)), abi.encodePacked(v2)); 141 | 142 | bytes16 v16 = "1234567890123456"; 143 | assertEq(_copyFromValue(16, bytes32(v16)), abi.encodePacked(v16)); 144 | 145 | bytes25 v25 = "1234567890123456789012345"; 146 | assertEq(_copyFromValue(25, bytes32(v25)), abi.encodePacked(v25)); 147 | 148 | bytes32 v32 = "12345678901234567890123456789012"; 149 | assertEq(_copyFromValue(32, bytes32(v32)), abi.encodePacked(v32)); 150 | } 151 | 152 | function testCopyFromValue__RightAligned() public { 153 | uint8 v1 = 1; 154 | assertEq(_copyFromValueRightAligned(1, bytes32(uint256(v1))), abi.encodePacked(v1)); 155 | 156 | uint16 v2 = 1000; 157 | assertEq(_copyFromValueRightAligned(2, bytes32(uint256(v2))), abi.encodePacked(v2)); 158 | 159 | uint128 v16 = 2**15 + 1; 160 | assertEq(_copyFromValueRightAligned(16, bytes32(uint256(v16))), abi.encodePacked(v16)); 161 | 162 | uint200 v25 = 123; 163 | assertEq(_copyFromValueRightAligned(25, bytes32(uint256(v25))), abi.encodePacked(v25)); 164 | 165 | uint256 v32 = type(uint256).max; 166 | assertEq(_copyFromValueRightAligned(32, bytes32(uint256(v32))), abi.encodePacked(v32)); 167 | } 168 | 169 | function testCopyFromValue__Multiple() public { 170 | bytes memory b = new bytes(86); 171 | Slice slice = b.toSlice(); 172 | 173 | slice.copyFromValueRightAligned(bytes32(uint256(1)), 1); 174 | slice = slice.getAfter(1); 175 | 176 | slice.copyFromValueRightAligned(bytes32(uint256(1000)), 2); 177 | slice = slice.getAfter(2); 178 | 179 | slice.copyFromValue("12345678901", 11); 180 | slice = slice.getAfter(11); 181 | 182 | slice.copyFromValue("12345678901234567890123456789012", 32); 183 | slice = slice.getAfter(32); 184 | 185 | // address to bytes20 has an autoshift 186 | slice.copyFromValue(bytes20(address(this)), 20); 187 | slice = slice.getAfter(20); 188 | 189 | // try it without autoshift too 190 | address addr = address(this); 191 | bytes32 addrRaw; 192 | assembly { 193 | addrRaw := addr 194 | } 195 | slice.copyFromValueRightAligned(addrRaw, 20); 196 | slice = slice.getAfter(20); 197 | 198 | assertEq( 199 | b, 200 | abi.encodePacked( 201 | uint8(1), 202 | uint16(1000), 203 | bytes11("12345678901"), 204 | bytes32("12345678901234567890123456789012"), 205 | bytes20(address(this)), 206 | address(this) 207 | ) 208 | ); 209 | } 210 | 211 | /*////////////////////////////////////////////////////////////////////////// 212 | CONCATENATION 213 | //////////////////////////////////////////////////////////////////////////*/ 214 | 215 | function testAdd(bytes calldata _b) public { 216 | bytes memory b1 = _b[:_b.length / 2]; 217 | bytes memory b2 = _b[_b.length / 2:]; 218 | 219 | assertEq(b1.toSlice().add(b2.toSlice()), _b); 220 | } 221 | 222 | function testJoin__EmptySeparator(bytes calldata _b) public { 223 | bytes memory b1 = _b[:_b.length / 2]; 224 | bytes memory b2 = _b[_b.length / 2:]; 225 | 226 | bytes memory sep; 227 | Slice[] memory slices = new Slice[](2); 228 | slices[0] = b1.toSlice(); 229 | slices[1] = b2.toSlice(); 230 | 231 | assertEq(sep.toSlice().join(slices), _b); 232 | } 233 | 234 | function testJoin__RandomSeparator(bytes calldata _b) public { 235 | bytes memory b1 = _b[:_b.length * 1/4]; 236 | bytes memory b2 = _b[_b.length * 1/4:_b.length * 2/4]; 237 | bytes memory b3 = _b[_b.length * 2/4:_b.length * 3/4]; 238 | bytes memory sep = _b[_b.length * 3/4:]; 239 | 240 | Slice[] memory slices = new Slice[](3); 241 | slices[0] = b1.toSlice(); 242 | slices[1] = b2.toSlice(); 243 | slices[2] = b3.toSlice(); 244 | 245 | assertEq(sep.toSlice().join(slices), abi.encodePacked(b1, sep, b2, sep, b3)); 246 | } 247 | 248 | function testJoin__ArrayLen1(bytes calldata _b) public { 249 | bytes memory b1 = _b; 250 | bytes memory sep = hex'ABCD'; 251 | 252 | Slice[] memory slices = new Slice[](1); 253 | slices[0] = b1.toSlice(); 254 | 255 | assertEq(sep.toSlice().join(slices), abi.encodePacked(b1)); 256 | } 257 | 258 | function testJoin__ArrayLen0() public { 259 | bytes memory sep = hex'ABCD'; 260 | 261 | Slice[] memory slices; 262 | 263 | assertEq(sep.toSlice().join(slices), ''); 264 | } 265 | 266 | /*////////////////////////////////////////////////////////////////////////// 267 | INDEX 268 | //////////////////////////////////////////////////////////////////////////*/ 269 | 270 | function testGet(bytes calldata _b) public { 271 | Slice slice = _b.toSlice(); 272 | for (uint256 i; i < _b.length; i++) { 273 | assertEq(slice.get(i), uint8(_b[i])); 274 | } 275 | } 276 | 277 | function testGet__RevertOutOfBounds(bytes calldata _b) public { 278 | Slice slice = _b.toSlice(); 279 | vm.expectRevert(Slice__OutOfBounds.selector); 280 | slice.get(_b.length); 281 | } 282 | 283 | function testFirstLast(bytes calldata _b) public { 284 | vm.assume(_b.length > 0); 285 | Slice slice = _b.toSlice(); 286 | assertEq(slice.first(), uint8(_b[0])); 287 | assertEq(slice.last(), uint8(_b[_b.length - 1])); 288 | } 289 | 290 | function testSplitAt(bytes calldata _b) public { 291 | Slice slice = _b.toSlice(); 292 | (Slice s1, Slice s2) = slice.splitAt(_b.length / 2); 293 | assertEq( 294 | abi.encodePacked( 295 | s1.toBytes(), s2.toBytes() 296 | ), 297 | _b 298 | ); 299 | } 300 | 301 | function testSplitAt__0(bytes calldata _b) public { 302 | Slice slice = _b.toSlice(); 303 | (Slice s1, Slice s2) = slice.splitAt(0); 304 | assertEq(s2.toBytes(), _b); 305 | assertEq(s1.len(), 0); 306 | } 307 | 308 | function testSplitAt__Length(bytes calldata _b) public { 309 | Slice slice = _b.toSlice(); 310 | (Slice s1, Slice s2) = slice.splitAt(_b.length); 311 | assertEq(s1.toBytes(), _b); 312 | assertEq(s2.len(), 0); 313 | } 314 | 315 | function testGetSubslice(bytes calldata _b) public { 316 | // TODO fix self-referential pseudorandomness 317 | uint256 start = _b.length == 0 ? 0 : uint256(keccak256(abi.encode(_b, "start"))) % _b.length; 318 | uint256 end = _b.length == 0 ? 0 : uint256(keccak256(abi.encode(_b, "end"))) % _b.length; 319 | vm.assume(start <= end); 320 | Slice subslice = _b.toSlice().getSubslice(start, end); 321 | assertEq(subslice.toBytes(), _b[start:end]); 322 | } 323 | 324 | function testGetSubslice__RevertStartAfterEnd(bytes calldata _b) public { 325 | // TODO fix self-referential pseudorandomness 326 | uint256 start = _b.length == 0 ? 0 : uint256(keccak256(abi.encode(_b, "start"))) % _b.length; 327 | uint256 end = _b.length == 0 ? 0 : uint256(keccak256(abi.encode(_b, "end"))) % _b.length; 328 | vm.assume(start > end); 329 | vm.expectRevert(Slice__OutOfBounds.selector); 330 | _b.toSlice().getSubslice(start, end); 331 | } 332 | 333 | function testGetBefore(bytes calldata _b) public { 334 | Slice s1 = _b.toSlice().getBefore(_b.length / 2); 335 | assertEq(s1, _b[:_b.length / 2]); 336 | } 337 | 338 | function testGetBefore_RevertOutOfBounds() public { 339 | bytes memory _b; 340 | vm.expectRevert(Slice__OutOfBounds.selector); 341 | _b.toSlice().getBefore(1); 342 | } 343 | 344 | function testGetAfter(bytes calldata _b) public { 345 | Slice s1 = _b.toSlice().getAfter(_b.length / 2); 346 | assertEq(s1, _b[_b.length / 2:]); 347 | } 348 | 349 | function testGetAfter_RevertOutOfBounds() public { 350 | bytes memory _b; 351 | vm.expectRevert(Slice__OutOfBounds.selector); 352 | _b.toSlice().getAfter(1); 353 | } 354 | 355 | function testGetAfterStrict(bytes calldata _b) public { 356 | vm.assume(_b.length > 0); 357 | Slice s1 = _b.toSlice().getAfterStrict(_b.length / 2); 358 | assertEq(s1, _b[_b.length / 2:]); 359 | } 360 | 361 | function testGetAfterStrict_RevertOutOfBounds() public { 362 | bytes memory _b; 363 | vm.expectRevert(Slice__OutOfBounds.selector); 364 | _b.toSlice().getAfterStrict(0); 365 | } 366 | 367 | /*////////////////////////////////////////////////////////////////////////// 368 | FIND 369 | //////////////////////////////////////////////////////////////////////////*/ 370 | 371 | function testFind(bytes calldata _b) public { 372 | bytes memory b1 = _b; 373 | bytes memory b2 = _b[_b.length / 8 : _b.length * 3 / 8]; 374 | vm.assume(b2.length > 0); 375 | 376 | uint256 offset = b1.toSlice().find(b2.toSlice()); 377 | // don't use assertContains here, since that'd be testing find with find itself 378 | checkOffset(b1, b2, offset); 379 | } 380 | 381 | function testFindEmpty(bytes calldata _b) public { 382 | bytes memory b1 = _b; 383 | bytes memory b2; 384 | 385 | uint256 offset = b1.toSlice().find(b2.toSlice()); 386 | assertEq(offset, 0); 387 | } 388 | 389 | function testFindEmptyInEmpty() public { 390 | bytes memory b1; 391 | bytes memory b2; 392 | 393 | uint256 offset = b1.toSlice().find(b2.toSlice()); 394 | assertEq(offset, 0); 395 | } 396 | 397 | function testFindNotEmptyInEmpty(bytes calldata _b) public { 398 | vm.assume(_b.length > 0); 399 | bytes memory b1; 400 | bytes memory b2 = _b; 401 | 402 | uint256 offset = b1.toSlice().find(b2.toSlice()); 403 | assertEq(offset, type(uint256).max); 404 | } 405 | 406 | /*////////////////////////////////////////////////////////////////////////// 407 | RFIND 408 | //////////////////////////////////////////////////////////////////////////*/ 409 | 410 | function testRfind(bytes calldata _b) public { 411 | bytes memory b1 = _b; 412 | bytes memory b2 = _b[_b.length * 5 / 8 : _b.length * 7 / 8]; 413 | vm.assume(b2.length > 0); 414 | 415 | uint256 offset = b1.toSlice().rfind(b2.toSlice()); 416 | checkOffset(b1, b2, offset); 417 | } 418 | 419 | function testRfindEmpty(bytes calldata _b) public { 420 | bytes memory b1 = _b; 421 | bytes memory b2; 422 | 423 | uint256 offset = b1.toSlice().rfind(b2.toSlice()); 424 | assertEq(offset, 0); 425 | } 426 | 427 | function testRfindEmptyInEmpty() public { 428 | bytes memory b1; 429 | bytes memory b2; 430 | 431 | uint256 offset = b1.toSlice().rfind(b2.toSlice()); 432 | assertEq(offset, 0); 433 | } 434 | 435 | function testRfindNotEmptyInEmpty(bytes calldata _b) public { 436 | vm.assume(_b.length > 0); 437 | bytes memory b1; 438 | bytes memory b2 = _b; 439 | 440 | uint256 offset = b1.toSlice().rfind(b2.toSlice()); 441 | assertEq(offset, type(uint256).max); 442 | } 443 | 444 | /*////////////////////////////////////////////////////////////////////////// 445 | SEARCH 446 | //////////////////////////////////////////////////////////////////////////*/ 447 | 448 | function testContains(bytes calldata _b) public { 449 | vm.assume(_b.length > 0); 450 | bytes memory pat = _b[_b.length / 2:_b.length / 2 + 1]; 451 | assertTrue(_b.toSlice().contains(pat.toSlice())); 452 | } 453 | 454 | function testContains__NotFound() public { 455 | bytes memory _b = "123456789"; 456 | bytes memory pat = "0"; 457 | assertFalse(_b.toSlice().contains(pat.toSlice())); 458 | } 459 | 460 | function testContains__EmptySelf() public { 461 | bytes memory _b = ""; 462 | bytes memory pat = "0"; 463 | assertFalse(_b.toSlice().contains(pat.toSlice())); 464 | } 465 | 466 | function testContains__EmptyPat() public { 467 | bytes memory _b = "123456789"; 468 | bytes memory pat = ""; 469 | assertTrue(_b.toSlice().contains(pat.toSlice())); 470 | } 471 | 472 | function testContains__EmptyBoth() public { 473 | bytes memory _b = ""; 474 | bytes memory pat = ""; 475 | assertTrue(_b.toSlice().contains(pat.toSlice())); 476 | } 477 | 478 | function testStartsWith(bytes calldata _b) public { 479 | uint256 i = _b.length == 0 ? 0 : uint256(keccak256(abi.encode(_b, "i"))) % _b.length; 480 | bytes memory pat = _b[:i]; 481 | assertTrue(_b.toSlice().startsWith(pat.toSlice())); 482 | } 483 | 484 | function testStartsWith__False() public { 485 | bytes memory _b = "123456789"; 486 | assertFalse(_b.toSlice().startsWith(bytes("2").toSlice())); 487 | assertFalse(_b.toSlice().startsWith(bytes("9").toSlice())); 488 | } 489 | 490 | function testEndsWith(bytes calldata _b) public { 491 | uint256 i = _b.length == 0 ? 0 : uint256(keccak256(abi.encode(_b, "i"))) % _b.length; 492 | bytes memory pat = _b[i:]; 493 | assertTrue(_b.toSlice().endsWith(pat.toSlice())); 494 | } 495 | 496 | function testEndsWith__False() public { 497 | bytes memory _b = "123456789"; 498 | assertFalse(_b.toSlice().endsWith(bytes("1").toSlice())); 499 | assertFalse(_b.toSlice().endsWith(bytes("8").toSlice())); 500 | } 501 | 502 | /*////////////////////////////////////////////////////////////////////////// 503 | MODIFY 504 | //////////////////////////////////////////////////////////////////////////*/ 505 | 506 | function testStripPrefix() public { 507 | bytes memory _b = "12345"; 508 | assertEq(_b.toSlice().stripPrefix(bytes("123").toSlice()), bytes("45")); 509 | assertEq(_b.toSlice().stripPrefix(_b.toSlice()), bytes("")); 510 | assertEq(_b.toSlice().stripPrefix(bytes("").toSlice()), _b); 511 | assertEq(_b.toSlice().stripPrefix(bytes("5").toSlice()), _b); 512 | assertEq(_b.toSlice().stripPrefix(bytes("123456").toSlice()), _b); 513 | } 514 | 515 | function testStripPrefix__FromEmpty() public { 516 | bytes memory _b; 517 | assertEq(_b.toSlice().stripPrefix(bytes("1").toSlice()), _b); 518 | assertEq(_b.toSlice().stripPrefix(bytes("").toSlice()), _b); 519 | } 520 | 521 | function testStripSuffix() public { 522 | bytes memory _b = "12345"; 523 | assertEq(_b.toSlice().stripSuffix(bytes("345").toSlice()), bytes("12")); 524 | assertEq(_b.toSlice().stripSuffix(_b.toSlice()), bytes("")); 525 | assertEq(_b.toSlice().stripSuffix(bytes("").toSlice()), _b); 526 | assertEq(_b.toSlice().stripSuffix(bytes("1").toSlice()), _b); 527 | assertEq(_b.toSlice().stripSuffix(bytes("123456").toSlice()), _b); 528 | } 529 | 530 | function testStripSuffix__FromEmpty() public { 531 | bytes memory _b; 532 | assertEq(_b.toSlice().stripSuffix(bytes("1").toSlice()), _b); 533 | assertEq(_b.toSlice().stripSuffix(bytes("").toSlice()), _b); 534 | } 535 | } -------------------------------------------------------------------------------- /src/Slice.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | pragma solidity ^0.8.17; 4 | 5 | import { mload8, memmove, memcmp, memeq, mstoreN, leftMask } from "./utils/mem.sol"; 6 | import { memchr, memrchr } from "./utils/memchr.sol"; 7 | import { PackPtrLen } from "./utils/PackPtrLen.sol"; 8 | 9 | import { SliceIter, SliceIter__ } from "./SliceIter.sol"; 10 | 11 | /** 12 | * @title A view into a contiguous sequence of 1-byte items. 13 | */ 14 | type Slice is uint256; 15 | 16 | /*////////////////////////////////////////////////////////////////////////// 17 | CUSTOM ERRORS 18 | //////////////////////////////////////////////////////////////////////////*/ 19 | 20 | error Slice__OutOfBounds(); 21 | error Slice__LengthMismatch(); 22 | 23 | /*////////////////////////////////////////////////////////////////////////// 24 | STATIC FUNCTIONS 25 | //////////////////////////////////////////////////////////////////////////*/ 26 | 27 | library Slice__ { 28 | /** 29 | * @dev Converts a `bytes` to a `Slice`. 30 | * The bytes are not copied. 31 | * `Slice` points to the memory of `bytes`, right after the length word. 32 | */ 33 | function from(bytes memory b) internal pure returns (Slice slice) { 34 | uint256 _ptr; 35 | assembly { 36 | _ptr := add(b, 0x20) 37 | } 38 | return fromRawParts(_ptr, b.length); 39 | } 40 | 41 | /** 42 | * @dev Creates a new `Slice` directly from length and memory pointer. 43 | * Note that the caller MUST guarantee memory-safety. 44 | * This method is primarily for internal use. 45 | */ 46 | function fromRawParts(uint256 _ptr, uint256 _len) internal pure returns (Slice slice) { 47 | return Slice.wrap(PackPtrLen.pack(_ptr, _len)); 48 | } 49 | 50 | /** 51 | * @dev Like `fromRawParts`, but does NO validity checks. 52 | * _ptr and _len MUST fit into uint128. 53 | * The caller MUST guarantee memory-safety. 54 | * Primarily for internal use. 55 | */ 56 | function fromUnchecked(uint256 _ptr, uint256 _len) internal pure returns (Slice slice) { 57 | return Slice.wrap( 58 | (_ptr << 128) | (_len & PackPtrLen.MASK_LEN) 59 | ); 60 | } 61 | } 62 | 63 | /** 64 | * @dev Alternative to Slice__.from() 65 | * Put this in your file (using for global is only for user-defined types): 66 | * ``` 67 | * using { toSlice } for bytes; 68 | * ``` 69 | */ 70 | function toSlice(bytes memory b) pure returns (Slice slice) { 71 | return Slice__.from(b); 72 | } 73 | 74 | /*////////////////////////////////////////////////////////////////////////// 75 | GLOBAL FUNCTIONS 76 | //////////////////////////////////////////////////////////////////////////*/ 77 | 78 | using { 79 | ptr, len, isEmpty, 80 | // conversion 81 | toBytes, toBytes32, 82 | keccak, 83 | // concatenation 84 | add, join, 85 | // copy 86 | copyFromSlice, copyFromValue, copyFromValueRightAligned, 87 | // compare 88 | cmp, eq, ne, lt, lte, gt, gte, 89 | // index 90 | get, first, last, 91 | splitAt, getSubslice, getBefore, getAfter, getAfterStrict, 92 | // search 93 | find, rfind, contains, 94 | startsWith, endsWith, 95 | // modify 96 | stripPrefix, stripSuffix, 97 | // iteration 98 | iter 99 | } for Slice global; 100 | 101 | /** 102 | * @dev Returns the pointer to the start of an in-memory slice. 103 | */ 104 | function ptr(Slice self) pure returns (uint256) { 105 | return Slice.unwrap(self) >> 128; 106 | } 107 | 108 | /** 109 | * @dev Returns the length in bytes. 110 | */ 111 | function len(Slice self) pure returns (uint256) { 112 | return Slice.unwrap(self) & PackPtrLen.MASK_LEN; 113 | } 114 | 115 | /** 116 | * @dev Returns true if the slice has a length of 0. 117 | */ 118 | function isEmpty(Slice self) pure returns (bool) { 119 | return Slice.unwrap(self) & PackPtrLen.MASK_LEN == 0; 120 | } 121 | 122 | /** 123 | * @dev Copies `Slice` to a new `bytes`. 124 | * The `Slice` will NOT point to the new `bytes`. 125 | */ 126 | function toBytes(Slice self) view returns (bytes memory b) { 127 | b = new bytes(self.len()); 128 | uint256 bPtr; 129 | assembly { 130 | bPtr := add(b, 0x20) 131 | } 132 | 133 | memmove(bPtr, self.ptr(), self.len()); 134 | return b; 135 | } 136 | 137 | /** 138 | * @dev Fills a `bytes32` (value type) with the first 32 bytes of `Slice`. 139 | * Goes from left(MSB) to right(LSB). 140 | * If len < 32, the leftover bytes are zeros. 141 | */ 142 | function toBytes32(Slice self) pure returns (bytes32 b) { 143 | uint256 selfPtr = self.ptr(); 144 | 145 | // mask removes any trailing bytes 146 | uint256 selfLen = self.len(); 147 | uint256 mask = leftMask(selfLen); 148 | 149 | /// @solidity memory-safe-assembly 150 | assembly { 151 | b := and(mload(selfPtr), mask) 152 | } 153 | return b; 154 | } 155 | 156 | /** 157 | * @dev Returns keccak256 of all the bytes of `Slice`. 158 | * Note that for any `bytes memory b`, keccak256(b) == b.toSlice().keccak() 159 | * (keccak256 does not include the length byte) 160 | */ 161 | function keccak(Slice self) pure returns (bytes32 result) { 162 | uint256 selfPtr = self.ptr(); 163 | uint256 selfLen = self.len(); 164 | /// @solidity memory-safe-assembly 165 | assembly { 166 | result := keccak256(selfPtr, selfLen) 167 | } 168 | } 169 | 170 | /** 171 | * @dev Concatenates two `Slice`s into a newly allocated `bytes`. 172 | */ 173 | function add(Slice self, Slice other) view returns (bytes memory b) { 174 | uint256 selfLen = self.len(); 175 | uint256 otherLen = other.len(); 176 | 177 | b = new bytes(selfLen + otherLen); 178 | uint256 bPtr; 179 | assembly { 180 | bPtr := add(b, 0x20) 181 | } 182 | 183 | memmove(bPtr, self.ptr(), selfLen); 184 | memmove(bPtr + selfLen, other.ptr(), otherLen); 185 | return b; 186 | } 187 | 188 | /** 189 | * @dev Flattens an array of `Slice`s into a single newly allocated `bytes`, 190 | * placing `self` as the separator between each. 191 | * 192 | * TODO this is the wrong place for this method, but there are no other places atm 193 | * (since there's no proper chaining/reducers/anything) 194 | */ 195 | function join(Slice self, Slice[] memory slices) view returns (bytes memory b) { 196 | uint256 slicesLen = slices.length; 197 | if (slicesLen == 0) return ""; 198 | 199 | uint256 selfLen = self.len(); 200 | uint256 repetitionLen; 201 | // -1 is safe because of ==0 check earlier 202 | unchecked { 203 | repetitionLen = slicesLen - 1; 204 | } 205 | // add separator repetitions length 206 | uint256 totalLen = selfLen * repetitionLen; 207 | // add slices length 208 | for (uint256 i; i < slicesLen; i++) { 209 | totalLen += slices[i].len(); 210 | } 211 | 212 | b = new bytes(totalLen); 213 | uint256 bPtr; 214 | assembly { 215 | bPtr := add(b, 0x20) 216 | } 217 | for (uint256 i; i < slicesLen; i++) { 218 | Slice slice = slices[i]; 219 | // copy slice 220 | memmove(bPtr, slice.ptr(), slice.len()); 221 | bPtr += slice.len(); 222 | // copy separator (skips the last cycle) 223 | if (i < repetitionLen) { 224 | memmove(bPtr, self.ptr(), selfLen); 225 | bPtr += selfLen; 226 | } 227 | } 228 | } 229 | 230 | /** 231 | * @dev Copies all elements from `src` into `self`. 232 | * The length of `src` must be the same as `self`. 233 | */ 234 | function copyFromSlice(Slice self, Slice src) view { 235 | uint256 selfLen = self.len(); 236 | if (selfLen != src.len()) revert Slice__LengthMismatch(); 237 | 238 | memmove(self.ptr(), src.ptr(), selfLen); 239 | } 240 | 241 | /** 242 | * @dev Copies `length` bytes from `value` into `self`, starting from MSB. 243 | */ 244 | function copyFromValue(Slice self, bytes32 value, uint256 length) pure { 245 | if (length > self.len() || length > 32) { 246 | revert Slice__OutOfBounds(); 247 | } 248 | 249 | mstoreN(self.ptr(), value, length); 250 | } 251 | 252 | /** 253 | * @dev Shifts `value` to MSB by (32 - `length`), 254 | * then copies `length` bytes from `value` into `self`, starting from MSB. 255 | * (this is for right-aligned values like uint32, so you don't have to shift them to MSB yourself) 256 | */ 257 | function copyFromValueRightAligned(Slice self, bytes32 value, uint256 length) pure { 258 | if (length > self.len() || length > 32) { 259 | revert Slice__OutOfBounds(); 260 | } 261 | if (length < 32) { 262 | // safe because length < 32 263 | unchecked { 264 | value <<= (32 - length) * 8; 265 | } 266 | } 267 | 268 | mstoreN(self.ptr(), value, length); 269 | } 270 | 271 | /** 272 | * @dev Compare slices lexicographically. 273 | * @return result 0 for equal, < 0 for less than and > 0 for greater than. 274 | */ 275 | function cmp(Slice self, Slice other) pure returns (int256 result) { 276 | uint256 selfLen = self.len(); 277 | uint256 otherLen = other.len(); 278 | uint256 minLen = selfLen; 279 | if (otherLen < minLen) { 280 | minLen = otherLen; 281 | } 282 | 283 | result = memcmp(self.ptr(), other.ptr(), minLen); 284 | if (result == 0) { 285 | // the longer slice is greater than its prefix 286 | // (lengths take only 16 bytes, so signed sub is safe) 287 | unchecked { 288 | return int256(selfLen) - int256(otherLen); 289 | } 290 | } 291 | // if not equal, return the diff sign 292 | return result; 293 | } 294 | 295 | /// @dev self == other 296 | /// Note more efficient than cmp 297 | function eq(Slice self, Slice other) pure returns (bool) { 298 | uint256 selfLen = self.len(); 299 | if (selfLen != other.len()) return false; 300 | return memeq(self.ptr(), other.ptr(), selfLen); 301 | } 302 | 303 | /// @dev self != other 304 | /// Note more efficient than cmp 305 | function ne(Slice self, Slice other) pure returns (bool) { 306 | uint256 selfLen = self.len(); 307 | if (selfLen != other.len()) return true; 308 | return !memeq(self.ptr(), other.ptr(), selfLen); 309 | } 310 | 311 | /// @dev `self` < `other` 312 | function lt(Slice self, Slice other) pure returns (bool) { 313 | return self.cmp(other) < 0; 314 | } 315 | 316 | /// @dev `self` <= `other` 317 | function lte(Slice self, Slice other) pure returns (bool) { 318 | return self.cmp(other) <= 0; 319 | } 320 | 321 | /// @dev `self` > `other` 322 | function gt(Slice self, Slice other) pure returns (bool) { 323 | return self.cmp(other) > 0; 324 | } 325 | 326 | /// @dev `self` >= `other` 327 | function gte(Slice self, Slice other) pure returns (bool) { 328 | return self.cmp(other) >= 0; 329 | } 330 | 331 | /** 332 | * @dev Returns the byte at `index`. 333 | * Reverts if index is out of bounds. 334 | */ 335 | function get(Slice self, uint256 index) pure returns (uint8 item) { 336 | if (index >= self.len()) revert Slice__OutOfBounds(); 337 | 338 | // ptr and len are uint128 (because PackPtrLen); index < len 339 | unchecked { 340 | return mload8(self.ptr() + index); 341 | } 342 | } 343 | 344 | /** 345 | * @dev Returns the first byte of the slice. 346 | * Reverts if the slice is empty. 347 | */ 348 | function first(Slice self) pure returns (uint8 item) { 349 | if (self.len() == 0) revert Slice__OutOfBounds(); 350 | return mload8(self.ptr()); 351 | } 352 | 353 | /** 354 | * @dev Returns the last byte of the slice. 355 | * Reverts if the slice is empty. 356 | */ 357 | function last(Slice self) pure returns (uint8 item) { 358 | uint256 selfLen = self.len(); 359 | if (selfLen == 0) revert Slice__OutOfBounds(); 360 | // safe because selfLen > 0 (ptr+len is implicitly safe) 361 | unchecked { 362 | return mload8(self.ptr() + (selfLen - 1)); 363 | } 364 | } 365 | 366 | /** 367 | * @dev Divides one slice into two at an index. 368 | */ 369 | function splitAt(Slice self, uint256 mid) pure returns (Slice, Slice) { 370 | uint256 selfPtr = self.ptr(); 371 | uint256 selfLen = self.len(); 372 | if (mid > selfLen) revert Slice__OutOfBounds(); 373 | return (Slice__.fromUnchecked(selfPtr, mid), Slice__.fromUnchecked(selfPtr + mid, selfLen - mid)); 374 | } 375 | 376 | /** 377 | * @dev Returns a subslice [start:end] of `self`. 378 | * Reverts if start/end are out of bounds. 379 | */ 380 | function getSubslice(Slice self, uint256 start, uint256 end) pure returns (Slice) { 381 | if (!(start <= end && end <= self.len())) revert Slice__OutOfBounds(); 382 | // selfPtr + start is safe because start <= selfLen (pointers are implicitly safe) 383 | // end - start is safe because start <= end 384 | unchecked { 385 | return Slice__.fromUnchecked(self.ptr() + start, end - start); 386 | } 387 | } 388 | 389 | /** 390 | * @dev Returns a subslice [:index] of `self`. 391 | * Reverts if `index` > length. 392 | */ 393 | function getBefore(Slice self, uint256 index) pure returns (Slice) { 394 | uint256 selfLen = self.len(); 395 | if (index > selfLen) revert Slice__OutOfBounds(); 396 | return Slice__.fromUnchecked(self.ptr(), index); 397 | } 398 | 399 | /** 400 | * @dev Returns a subslice [index:] of `self`. 401 | * Reverts if `index` > length. 402 | */ 403 | function getAfter(Slice self, uint256 index) pure returns (Slice) { 404 | uint256 selfLen = self.len(); 405 | if (index > selfLen) revert Slice__OutOfBounds(); 406 | // safe because index <= selfLen (ptr+len is implicitly safe) 407 | unchecked { 408 | return Slice__.fromUnchecked(self.ptr() + index, selfLen - index); 409 | } 410 | } 411 | 412 | /** 413 | * @dev Returns a non-zero subslice [index:] of `self`. 414 | * Reverts if `index` >= length. 415 | */ 416 | function getAfterStrict(Slice self, uint256 index) pure returns (Slice) { 417 | uint256 selfLen = self.len(); 418 | if (index >= selfLen) revert Slice__OutOfBounds(); 419 | // safe because index < selfLen (ptr+len is implicitly safe) 420 | unchecked { 421 | return Slice__.fromUnchecked(self.ptr() + index, selfLen - index); 422 | } 423 | } 424 | 425 | /** 426 | * @dev Returns the byte index of the first slice of `self` that matches `pattern`. 427 | * Returns type(uint256).max if the `pattern` does not match. 428 | */ 429 | function find(Slice self, Slice pattern) pure returns (uint256) { 430 | // offsetLen == selfLen initially, then starts shrinking 431 | uint256 offsetLen = self.len(); 432 | uint256 patLen = pattern.len(); 433 | if (patLen == 0) { 434 | return 0; 435 | } else if (offsetLen == 0 || patLen > offsetLen) { 436 | return type(uint256).max; 437 | } 438 | 439 | uint256 offsetPtr = self.ptr(); 440 | uint256 patPtr = pattern.ptr(); 441 | // low-level alternative to `first()` (safe because patLen != 0) 442 | uint8 patFirst = mload8(patPtr); 443 | 444 | while (true) { 445 | uint256 index = memchr(offsetPtr, offsetLen, patFirst); 446 | // not found 447 | if (index == type(uint256).max) return type(uint256).max; 448 | 449 | // move pointer to the found byte 450 | // safe because index < offsetLen (ptr+len is implicitly safe) 451 | unchecked { 452 | offsetPtr += index; 453 | offsetLen -= index; 454 | } 455 | // can't find, pattern won't fit after index 456 | if (patLen > offsetLen) { 457 | return type(uint256).max; 458 | } 459 | 460 | if (memeq(offsetPtr, patPtr, patLen)) { 461 | // found, return offset index 462 | return (offsetPtr - self.ptr()); 463 | } else if (offsetLen == 1) { 464 | // not found and this was the last character 465 | return type(uint256).max; 466 | } else { 467 | // not found and can keep going; 468 | // increment pointer, memchr shouldn't receive what it returned (otherwise infinite loop) 469 | unchecked { 470 | // safe because offsetLen > 1 (see offsetLen -= index, and index < offsetLen) 471 | offsetPtr++; 472 | offsetLen--; 473 | } 474 | } 475 | } 476 | return type(uint256).max; 477 | } 478 | 479 | /** 480 | * @dev Returns the byte index of the last slice of `self` that matches `pattern`. 481 | * Returns type(uint256).max if the `pattern` does not match. 482 | */ 483 | function rfind(Slice self, Slice pattern) pure returns (uint256) { 484 | // offsetLen == selfLen initially, then starts shrinking 485 | uint256 offsetLen = self.len(); 486 | uint256 patLen = pattern.len(); 487 | if (patLen == 0) { 488 | return 0; 489 | } else if (offsetLen == 0 || patLen > offsetLen) { 490 | return type(uint256).max; 491 | } 492 | 493 | uint256 selfPtr = self.ptr(); 494 | uint256 patPtr = pattern.ptr(); 495 | uint8 patLast = pattern.last(); 496 | // using indexes instead of lengths saves some gas on redundant increments/decrements 497 | uint256 patLastIndex; 498 | // safe because of patLen == 0 check earlier 499 | unchecked { 500 | patLastIndex = patLen - 1; 501 | } 502 | 503 | while (true) { 504 | uint256 endIndex = memrchr(selfPtr, offsetLen, patLast); 505 | // not found 506 | if (endIndex == type(uint256).max) return type(uint256).max; 507 | // can't find, pattern won't fit after index 508 | if (patLastIndex > endIndex) return type(uint256).max; 509 | 510 | // (endIndex - patLastIndex is safe because of the check just earlier) 511 | // (selfPtr + startIndex is safe because startIndex <= endIndex < offsetLen <= selfLen) 512 | // (ptr+len is implicitly safe) 513 | unchecked { 514 | // need startIndex, but memrchr returns endIndex 515 | uint256 startIndex = endIndex - patLastIndex; 516 | 517 | if (memeq(selfPtr + startIndex, patPtr, patLen)) { 518 | // found, return index 519 | return startIndex; 520 | } else if (endIndex > 0) { 521 | // not found and can keep going; 522 | // "decrement pointer", memrchr shouldn't receive what it returned 523 | // (index is basically a decremented length already, saves an op) 524 | // (I could even use 1 variable for both, but that'd be too confusing) 525 | offsetLen = endIndex; 526 | // an explicit continue is better for optimization here 527 | continue; 528 | } else { 529 | // not found and this was the last character 530 | return type(uint256).max; 531 | } 532 | } 533 | } 534 | return type(uint256).max; 535 | } 536 | 537 | /** 538 | * @dev Returns true if the given pattern matches a sub-slice of this `bytes` slice. 539 | */ 540 | function contains(Slice self, Slice pattern) pure returns (bool) { 541 | return self.find(pattern) != type(uint256).max; 542 | } 543 | 544 | /** 545 | * @dev Returns true if the given pattern matches a prefix of this slice. 546 | */ 547 | function startsWith(Slice self, Slice pattern) pure returns (bool) { 548 | uint256 selfLen = self.len(); 549 | uint256 patLen = pattern.len(); 550 | if (selfLen < patLen) return false; 551 | 552 | Slice prefix = self; 553 | // make prefix's length equal patLen 554 | if (selfLen > patLen) { 555 | prefix = self.getBefore(patLen); 556 | } 557 | return prefix.eq(pattern); 558 | } 559 | 560 | /** 561 | * @dev Returns true if the given pattern matches a suffix of this slice. 562 | */ 563 | function endsWith(Slice self, Slice pattern) pure returns (bool) { 564 | uint256 selfLen = self.len(); 565 | uint256 patLen = pattern.len(); 566 | if (selfLen < patLen) return false; 567 | 568 | Slice suffix = self; 569 | // make suffix's length equal patLen 570 | if (selfLen > patLen) { 571 | suffix = self.getAfter(selfLen - patLen); 572 | } 573 | return suffix.eq(pattern); 574 | } 575 | 576 | /** 577 | * @dev Returns a subslice with the prefix removed. 578 | * If it does not start with `prefix`, returns `self` unmodified. 579 | */ 580 | function stripPrefix(Slice self, Slice pattern) pure returns (Slice) { 581 | uint256 selfLen = self.len(); 582 | uint256 patLen = pattern.len(); 583 | if (patLen > selfLen) return self; 584 | 585 | (Slice prefix, Slice suffix) = self.splitAt(patLen); 586 | 587 | if (prefix.eq(pattern)) { 588 | return suffix; 589 | } else { 590 | return self; 591 | } 592 | } 593 | 594 | /** 595 | * @dev Returns a subslice with the suffix removed. 596 | * If it does not end with `suffix`, returns `self` unmodified. 597 | */ 598 | function stripSuffix(Slice self, Slice pattern) pure returns (Slice) { 599 | uint256 selfLen = self.len(); 600 | uint256 patLen = pattern.len(); 601 | if (patLen > selfLen) return self; 602 | 603 | uint256 index; 604 | // safe because selfLen >= patLen 605 | unchecked { 606 | index = selfLen - patLen; 607 | } 608 | (Slice prefix, Slice suffix) = self.splitAt(index); 609 | 610 | if (suffix.eq(pattern)) { 611 | return prefix; 612 | } else { 613 | return self; 614 | } 615 | } 616 | 617 | /** 618 | * @dev Returns an iterator over the slice. 619 | * The iterator yields items from either side. 620 | */ 621 | function iter(Slice self) pure returns (SliceIter memory) { 622 | return SliceIter__.from(self); 623 | } --------------------------------------------------------------------------------