├── .npmrc
├── .czrc
├── .commitlintrc
├── .gitignore
├── .husky
    └── commit-msg
├── foundry.toml
├── src
    ├── test
    │   ├── Assertions.sol
    │   ├── SliceAssertions.sol
    │   └── StrSliceAssertions.sol
    ├── utils
    │   ├── PackPtrLen.sol
    │   ├── memascii.sol
    │   ├── toString.sol
    │   ├── utf8.sol
    │   ├── unicode.sol
    │   ├── mem.sol
    │   └── memchr.sol
    ├── SliceIter.sol
    ├── StrChar.sol
    ├── StrCharsIter.sol
    ├── StrSlice.sol
    └── Slice.sol
├── .github
    └── workflows
    │   └── ci.yml
├── test
    ├── Utils.t.sol
    ├── Examples.t.sol
    ├── StrSliceAssertions.t.sol
    ├── SliceIter.t.sol
    ├── StrCharsIter.t.sol
    ├── SliceAssertions.t.sol
    ├── StrSlice.t.sol
    ├── StrChar.t.sol
    └── Slice.t.sol
├── package.json
├── LICENSE
├── CHANGELOG.md
└── README.md


/.npmrc:
--------------------------------------------------------------------------------
1 | message="chore(release): %s"


--------------------------------------------------------------------------------
/.czrc:
--------------------------------------------------------------------------------
1 | {
2 |   "path": "cz-conventional-changelog"
3 | }


--------------------------------------------------------------------------------
/.commitlintrc:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": ["@commitlint/config-conventional"]
3 | }


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | cache
3 | abi
4 | out
5 | yarn-error.log
6 | .vscode


--------------------------------------------------------------------------------
/.husky/commit-msg:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | . "$(dirname -- "$0")/_/husky.sh"
3 | 
4 | npx --no -- commitlint --edit "${1}"
5 | 


--------------------------------------------------------------------------------
/foundry.toml:
--------------------------------------------------------------------------------
 1 | [profile.default]
 2 | src = "src"
 3 | out = "out"
 4 | libs = ["node_modules"]
 5 | include_paths = ["node_modules"]
 6 | optimizer = true
 7 | optimizer_runs = 1000000
 8 | verbosity = 2
 9 | 
10 | [fuzz]
11 | runs = 4096
12 | 
13 | [profile.ci]
14 | verbosity = 4


--------------------------------------------------------------------------------
/src/test/Assertions.sol:
--------------------------------------------------------------------------------
 1 | // SPDX-License-Identifier: MIT
 2 | 
 3 | pragma solidity ^0.8.17;
 4 | 
 5 | import { SliceAssertions } from "./SliceAssertions.sol";
 6 | import { StrSliceAssertions } from "./StrSliceAssertions.sol";
 7 | 
 8 | /// @title Extension to PRBTest with Slice and StrSlice assertions.
 9 | /// @dev Also provides lt,lte,gt,gte,contains for 2 native `bytes` and 2 native `string`.
10 | contract Assertions is SliceAssertions, StrSliceAssertions {
11 | }


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - "main"
 7 |   pull_request:
 8 |     branches:
 9 |       - "main"
10 | 
11 | jobs:
12 |   ci:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/checkout@v3
16 | 
17 |       - name: Install Node.js
18 |         uses: actions/setup-node@v3
19 |         with:
20 |           node-version: 18.12.1
21 |           cache: yarn
22 | 
23 |       - name: Install Foundry
24 |         uses: foundry-rs/foundry-toolchain@v1
25 |         with:
26 |           version: nightly
27 | 
28 |       - name: Install dependencies
29 |         run: yarn install --immutable
30 | 
31 |       - name: Run tests
32 |         run: yarn test
33 | 
34 |       - name: Build the contracts
35 |         run: forge build --sizes


--------------------------------------------------------------------------------
/test/Utils.t.sol:
--------------------------------------------------------------------------------
 1 | // SPDX-License-Identifier: MIT
 2 | 
 3 | pragma solidity ^0.8.17;
 4 | 
 5 | import { PRBTest } from "@prb/test/src/PRBTest.sol";
 6 | 
 7 | import { toString } from "../src/utils/toString.sol";
 8 | 
 9 | contract UtilsTest is PRBTest {
10 |     function testUintToString() public {
11 |         for (uint256 value; value < 10000; value++) {
12 |             assertEq(toString(value), vm.toString(value));
13 |         }
14 |         for (uint256 value; value < 10000; value++) {
15 |             assertEq(toString(10**77 - value), vm.toString(10**77 - value));
16 |             assertEq(toString(10**77 + value), vm.toString(10**77 + value));
17 |         }
18 |         assertEq(toString(type(uint256).max - 1), vm.toString(type(uint256).max - 1));
19 |         assertEq(toString(type(uint256).max), vm.toString(type(uint256).max));
20 |     }
21 | 
22 |     function testUintToString__Fuzz(uint256 value) public {
23 |         assertEq(toString(value), vm.toString(value));
24 |     }
25 | }


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@dk1a/solidity-stringutils",
 3 |   "version": "0.3.3",
 4 |   "author": "dk1a <dk1a@protonmail.com>",
 5 |   "license": "MIT",
 6 |   "description": "StrSlice & Slice library for Solidity",
 7 |   "keywords": [
 8 |     "solidity",
 9 |     "library",
10 |     "string",
11 |     "slice",
12 |     "ethereum",
13 |     "smart-contracts"
14 |   ],
15 |   "repository": {
16 |     "type": "git",
17 |     "url": "https://github.com/dk1a/solidity-stringutils.git"
18 |   },
19 |   "publishConfig": {
20 |     "access": "public"
21 |   },
22 |   "scripts": {
23 |     "prepare": "husky install",
24 |     "test": "forge test",
25 |     "build": "forge build",
26 |     "version": "conventional-changelog -p angular -i CHANGELOG.md -s && git add CHANGELOG.md"
27 |   },
28 |   "files": [
29 |     "/src"
30 |   ],
31 |   "devDependencies": {
32 |     "@commitlint/cli": "^17.4.2",
33 |     "@commitlint/config-conventional": "^17.4.2",
34 |     "@prb/test": "^0.2.1",
35 |     "conventional-changelog-cli": "^2.2.2",
36 |     "husky": "^8.0.3"
37 |   },
38 |   "dependencies": {}
39 | }
40 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2022 Kirill Dmitriev
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.


--------------------------------------------------------------------------------
/src/utils/PackPtrLen.sol:
--------------------------------------------------------------------------------
 1 | // SPDX-License-Identifier: MIT
 2 | 
 3 | pragma solidity ^0.8.17;
 4 | 
 5 | error PackedPtrLen__PtrOverflow();
 6 | error PackedPtrLen__LenOverflow();
 7 | 
 8 | /**
 9 |  * @title Pack ptr and len uint128 values into 1 uint256.
10 |  * @dev ptr is left/MSB. len is right/LSB.
11 |  */
12 | library PackPtrLen {
13 |     uint256 constant MAX = type(uint128).max;
14 | 
15 |     uint256 constant MASK_PTR = uint256(type(uint128).max) << 128;
16 |     uint256 constant MASK_LEN = uint256(type(uint128).max);
17 | 
18 |     function pack(uint256 ptr, uint256 len) internal pure returns (uint256 packed) {
19 |         if (ptr > MAX) revert PackedPtrLen__PtrOverflow();
20 |         if (len > MAX) revert PackedPtrLen__LenOverflow();
21 |         return (ptr << 128) | (len & MASK_LEN);
22 |     }
23 | 
24 |     function getPtr(uint256 packed) internal pure returns (uint256) {
25 |         return packed >> 128;
26 |     }
27 | 
28 |     function getLen(uint256 packed) internal pure returns (uint256) {
29 |         return packed & MASK_LEN;
30 |     }
31 | 
32 |     function setPtr(uint256 packed, uint256 ptr) internal pure returns (uint256) {
33 |         return (packed & MASK_PTR) | (ptr << 128);
34 |     }
35 | 
36 |     function setLen(uint256 packed, uint256 len) internal pure returns (uint256) {
37 |         return (packed & MASK_LEN) | (len);
38 |     }
39 | }


--------------------------------------------------------------------------------
/src/utils/memascii.sol:
--------------------------------------------------------------------------------
 1 | // SPDX-License-Identifier: MIT
 2 | 
 3 | pragma solidity ^0.8.17;
 4 | 
 5 | import { leftMask } from "./mem.sol";
 6 | 
 7 | /*
 8 |  * These functions are VERY DANGEROUS!
 9 |  * They operate directly on memory pointers, use with caution.
10 |  *
11 |  * Assembly here is marked as memory-safe for optimization.
12 |  * The caller MUST use pointers in a memory-safe way!
13 |  * https://docs.soliditylang.org/en/latest/assembly.html#memory-safety
14 |  */
15 | 
16 | /// @dev 32 0x80 bytes. 0x80 = 1000_0000
17 | uint256 constant ASCII_MASK = 0x80 * (type(uint256).max / type(uint8).max);
18 | 
19 | /**
20 |  * @dev Efficiently checks if all bytes are within the ASCII range.
21 |  */
22 | function memIsAscii(uint256 textPtr, uint256 textLen) pure returns (bool) {
23 |     uint256 tailLen;
24 |     uint256 endPtr;
25 |     // safe because tailLen <= textLen (ptr+len is implicitly safe)
26 |     unchecked {
27 |         tailLen = textLen % 32;
28 |         endPtr = textPtr + (textLen - tailLen);
29 |     }
30 | 
31 |     // check 32 byte chunks with the ascii mask
32 |     uint256 b;
33 |     while (textPtr < endPtr) {
34 |         /// @solidity memory-safe-assembly
35 |         assembly {
36 |             b := mload(textPtr)
37 |         }
38 |         // break if any non-ascii byte is found
39 |         if (b & ASCII_MASK != 0) {
40 |             return false;
41 |         }
42 |         // safe because textPtr < endPtr, and endPtr = textPtr + n*32 (see tailLen)
43 |         unchecked {
44 |             textPtr += 32;
45 |         }
46 |     }
47 | 
48 |     // this mask removes any trailing bytes
49 |     uint256 trailingMask = leftMask(tailLen);
50 |     /// @solidity memory-safe-assembly
51 |     assembly {
52 |         b := and(mload(endPtr), trailingMask)
53 |     }
54 |     // check tail with the ascii mask
55 |     return b & ASCII_MASK == 0;
56 | }


--------------------------------------------------------------------------------
/src/utils/toString.sol:
--------------------------------------------------------------------------------
 1 | // SPDX-License-Identifier: MIT
 2 | 
 3 | pragma solidity ^0.8.17;
 4 | 
 5 | uint256 constant ASCII_DIGIT_OFFSET = 0x30;
 6 | // 96 = 78 rounded up to a multiple of 32
 7 | // 78 = ceil(log10(2**256))
 8 | uint256 constant MAX_UINT256_STRING_LENGTH = 96;
 9 | 
10 | /**
11 |  * @dev uint256 to string (decimal).
12 |  * WARNING: this function is very optimized for gas, it's almost pure assembly.
13 |  * Just use OpenZeppelin's toString for safety and readability.
14 |  *
15 |  * (this is ~100 gas/digit, OZ is ~1000)
16 |  *
17 |  * Derived from https://github.com/moodlezoup/sol2string
18 |  */
19 | function toString(uint256 value) pure returns (string memory str) {
20 |     if (value <= 9) {
21 |         // very fast path for 1 digit
22 |         /// @solidity memory-safe-assembly
23 |         assembly {
24 |             // allocate memory (0x20 for length, 0x20 for content)
25 |             str := mload(0x40)
26 |             mstore(0x40, add(str, 0x40))
27 |             // store length
28 |             mstore(str, 1)
29 |             // store content
30 |             mstore8(add(str, 0x20), add(value, ASCII_DIGIT_OFFSET))
31 |         }
32 |         return str;
33 |     }
34 | 
35 |     uint256 startPtr;
36 |     uint256 slidingPtr;
37 |     /// @solidity memory-safe-assembly
38 |     assembly {
39 |         // slidingPtr is confusing, here's an example if MAX_UINT256_STRING_LENGTH were equal 5:
40 |         //  length (0x20)                                                    (5)
41 |         // |0000000000000000000000000000000000000000000000000000000000000000|0000000000|
42 |         //  ^startPtr ^slidingPtr; mstore will write to the 32 bytes which end   here ^
43 |         //           <== and the pointer slides from right to left, filling each LSB
44 | 
45 |         startPtr := mload(0x40)
46 |         // note how slidingPtr doesn't include 0x20 for length
47 |         slidingPtr := add(startPtr, MAX_UINT256_STRING_LENGTH)
48 |         // overallocate memory
49 |         // 0x20 for length, MAX_UINT256_STRING_LENGTH for content
50 |         mstore(0x40, add(0x20, slidingPtr))
51 |     }
52 | 
53 |     // populate from right to left (lsb to msb)
54 |     while (value != 0) {
55 |         /// @solidity memory-safe-assembly
56 |         assembly {
57 |             let char := add(
58 |                 mod(value, 10),
59 |                 ASCII_DIGIT_OFFSET
60 |             )
61 |             mstore(slidingPtr, char)
62 |             slidingPtr := sub(slidingPtr, 1)
63 |             value := div(value, 10)
64 |         }
65 |     }
66 | 
67 |     /// @solidity memory-safe-assembly
68 |     assembly {
69 |         let realLen := sub(MAX_UINT256_STRING_LENGTH, sub(slidingPtr, startPtr))
70 |         // move `str` pointer to the start of the string
71 |         str := slidingPtr
72 |         // store the real length
73 |         mstore(str, realLen)
74 |     }
75 |     return str;
76 | }


--------------------------------------------------------------------------------
/test/Examples.t.sol:
--------------------------------------------------------------------------------
 1 | // SPDX-License-Identifier: MIT
 2 | 
 3 | pragma solidity ^0.8.17;
 4 | 
 5 | import { PRBTest } from "@prb/test/src/PRBTest.sol";
 6 | import { StrSliceAssertions } from "../src/test/StrSliceAssertions.sol";
 7 | 
 8 | import { StrSlice, toSlice, StrCharsIter } from "../src/StrSlice.sol";
 9 | import { StrChar__InvalidUTF8 } from "../src/StrChar.sol";
10 | 
11 | using { toSlice } for string;
12 | 
13 | /// @dev Returns the content of brackets, or empty string if not found
14 | function extractFromBrackets(string memory stuffInBrackets) pure returns (StrSlice extracted) {
15 |     StrSlice s = stuffInBrackets.toSlice();
16 |     bool found;
17 | 
18 |     (found, , s) = s.splitOnce(toSlice("("));
19 |     if (!found) return toSlice("");
20 | 
21 |     (found, s, ) = s.rsplitOnce(toSlice(")"));
22 |     if (!found) return toSlice("");
23 | 
24 |     return s;
25 | }
26 | 
27 | /// @dev Counts number of disjoint `_pat` in `_haystack` from the start
28 | /// Assumes valid UTF-8
29 | function countOccurrences(string memory _haystack, string memory _pat) pure returns (uint256 counter) {
30 |     uint256 index;
31 |     StrSlice haystack = _haystack.toSlice();
32 |     StrSlice pat = _pat.toSlice();
33 | 
34 |     while (true) {
35 |         index = haystack.find(pat);
36 |         if (index == type(uint256).max) break;
37 |         haystack = haystack.getSubslice(index + pat.len(), haystack.len());
38 |         counter++;
39 |     }
40 |     return counter;
41 | }
42 | 
43 | /// @dev Returns a StrSlice of `str` with the 2 first UTF-8 characters removed
44 | /// reverts on invalid UTF8
45 | function removeFirstTwoChars(string memory str) pure returns (StrSlice) {
46 |     StrCharsIter memory chars = str.toSlice().chars();
47 |     for (uint256 i; i < 2; i++) {
48 |         if (chars.isEmpty()) break;
49 |         chars.next();
50 |     }
51 |     return chars.asStr();
52 | }
53 | 
54 | contract ExamplesTest is PRBTest, StrSliceAssertions {
55 |     function testExtractFromBrackets() public {
56 |         assertEq(
57 |             extractFromBrackets("((1 + 2) + 3) + 4"),
58 |             toSlice("(1 + 2) + 3")
59 |         );
60 |         assertEq(
61 |             extractFromBrackets("((1 + 2) + 3"),
62 |             toSlice("(1 + 2")
63 |         );
64 |         assertEq(
65 |             extractFromBrackets("((1 + 2 + 3"),
66 |             toSlice("")
67 |         );
68 |     }
69 | 
70 |     function testCountOccurrences() public {
71 |         assertEq(countOccurrences(",", ","), 1);
72 |         assertEq(countOccurrences("1,2,3,456789,10", ","), 4);
73 |         assertEq(countOccurrences("123", ","), 0);
74 |         assertEq(countOccurrences(string(bytes(hex"FF")), "1"), 0);
75 |     }
76 | 
77 |     function testRemoveFirstTwoChars() public {
78 |         assertEq(removeFirstTwoChars("1"), "");
79 |         assertEq(removeFirstTwoChars("12345"), "345");
80 |         assertEq(removeFirstTwoChars(unicode"こんにちは"), unicode"にちは");
81 |         assertEq(removeFirstTwoChars(unicode"📎!こんにちは"), unicode"こんにちは");
82 |     }
83 | 
84 |     function testRemoveFirstTwoChars__InvalidUTF8() public {
85 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
86 |         removeFirstTwoChars(string(bytes(hex"FF")));
87 |     }
88 | }


--------------------------------------------------------------------------------
/src/SliceIter.sol:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: MIT
  2 | 
  3 | pragma solidity ^0.8.17;
  4 | 
  5 | import { mload8 } from "./utils/mem.sol";
  6 | import { Slice, Slice__ } from "./Slice.sol";
  7 | 
  8 | /**
  9 |  * @title Slice iterator.
 10 |  * @dev This struct is created by the iter method on `Slice`.
 11 |  * Iterates only 1 byte (uint8) at a time.
 12 |  */
 13 | struct SliceIter {
 14 |     uint256 _ptr;
 15 |     uint256 _len;
 16 | }
 17 | 
 18 | /*//////////////////////////////////////////////////////////////////////////
 19 |                                 CUSTOM ERRORS
 20 | //////////////////////////////////////////////////////////////////////////*/
 21 | 
 22 | error SliceIter__StopIteration();
 23 | 
 24 | /*//////////////////////////////////////////////////////////////////////////
 25 |                               STATIC FUNCTIONS
 26 | //////////////////////////////////////////////////////////////////////////*/
 27 | 
 28 | library SliceIter__ {
 29 |     /**
 30 |      * @dev Creates a new `SliceIter` from `Slice`.
 31 |      * Note the `Slice` is assumed to be memory-safe.
 32 |      */
 33 |     function from(Slice slice) internal pure returns (SliceIter memory) {
 34 |         return SliceIter(slice.ptr(), slice.len());
 35 |     }
 36 | }
 37 | 
 38 | /*//////////////////////////////////////////////////////////////////////////
 39 |                               GLOBAL FUNCTIONS
 40 | //////////////////////////////////////////////////////////////////////////*/
 41 | 
 42 | using { asSlice, ptr, len, isEmpty, next, nextBack } for SliceIter global;
 43 | 
 44 | /**
 45 |  * @dev Views the underlying data as a subslice of the original data.
 46 |  */
 47 | function asSlice(SliceIter memory self) pure returns (Slice slice) {
 48 |     return Slice__.fromUnchecked(self._ptr, self._len);
 49 | }
 50 | 
 51 | /**
 52 |  * @dev Returns the pointer to the start of an in-memory slice.
 53 |  */
 54 | function ptr(SliceIter memory self) pure returns (uint256) {
 55 |     return self._ptr;
 56 | }
 57 | 
 58 | /**
 59 |  * @dev Returns the length in bytes.
 60 |  */
 61 | function len(SliceIter memory self) pure returns (uint256) {
 62 |     return self._len;
 63 | }
 64 | 
 65 | /**
 66 |  * @dev Returns true if the iterator is empty.
 67 |  */
 68 | function isEmpty(SliceIter memory self) pure returns (bool) {
 69 |     return self._len == 0;
 70 | }
 71 | 
 72 | /**
 73 |  * @dev Advances the iterator and returns the next value.
 74 |  * Reverts if len == 0.
 75 |  */
 76 | function next(SliceIter memory self) pure returns (uint8 value) {
 77 |     uint256 selfPtr = self._ptr;
 78 |     uint256 selfLen = self._len;
 79 |     if (selfLen == 0) revert SliceIter__StopIteration();
 80 | 
 81 |     // safe because selfLen != 0 (ptr+len is implicitly safe and 1<=len)
 82 |     unchecked {
 83 |         // advance the iterator
 84 |         self._ptr = selfPtr + 1;
 85 |         self._len = selfLen - 1;
 86 |     }
 87 | 
 88 |     return mload8(selfPtr);
 89 | }
 90 | 
 91 | /**
 92 |  * @dev Advances the iterator from the back and returns the next value.
 93 |  * Reverts if len == 0.
 94 |  */
 95 | function nextBack(SliceIter memory self) pure returns (uint8 value) {
 96 |     uint256 selfPtr = self._ptr;
 97 |     uint256 selfLen = self._len;
 98 |     if (selfLen == 0) revert SliceIter__StopIteration();
 99 | 
100 |     // safe because selfLen != 0 (ptr+len is implicitly safe)
101 |     unchecked {
102 |         // advance the iterator
103 |         self._len = selfLen - 1;
104 | 
105 |         return mload8(selfPtr + (selfLen - 1));
106 |     }
107 | }


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | ## [0.3.3](https://github.com/dk1a/solidity-stringutils/compare/v0.3.1...v0.3.3) (2023-01-18)
 2 | 
 3 | 
 4 | ### Bug Fixes
 5 | 
 6 | * correct an annotation placement ([d73c0e6](https://github.com/dk1a/solidity-stringutils/commit/d73c0e62c51a3f538ba91170a42678094aea402c))
 7 | 
 8 | 
 9 | ### Features
10 | 
11 | * **Slice:** add copyFromValue ([120e375](https://github.com/dk1a/solidity-stringutils/commit/120e37525234a5d148f4fe6ec93cac3feb45982b))
12 | 
13 | 
14 | 
15 | ## [0.3.2](https://github.com/dk1a/solidity-stringutils/compare/v0.3.1...v0.3.2) (2022-12-21)
16 | 
17 | 
18 | ### Bug Fixes
19 | 
20 | * correct an annotation placement ([d73c0e6](https://github.com/dk1a/solidity-stringutils/commit/d73c0e62c51a3f538ba91170a42678094aea402c))
21 | 
22 | 
23 | 
24 | ## [0.3.1](https://github.com/dk1a/solidity-stringutils/compare/v0.3.0...v0.3.1) (2022-12-15)
25 | 
26 | 
27 | ### Features
28 | 
29 | * add fast uint to string conversion ([5a975fe](https://github.com/dk1a/solidity-stringutils/commit/5a975fe509ad6e5cac5f07590f28faf6b2ca65e5))
30 | * add isAscii ([dfb9916](https://github.com/dk1a/solidity-stringutils/commit/dfb9916b4477e34c382016f2b977f24389812685))
31 | 
32 | 
33 | 
34 | # [0.3.0](https://github.com/dk1a/solidity-stringutils/compare/v0.2.2...v0.3.0) (2022-12-12)
35 | 
36 | 
37 | ### Features
38 | 
39 | * add optimizations to StrCharsIter, StrChar ([06a0b55](https://github.com/dk1a/solidity-stringutils/commit/06a0b55171af1e0d31e86327f4be6dafe2a6e6fc))
40 | * add unicode code point support and tests for StrChar ([07f2047](https://github.com/dk1a/solidity-stringutils/commit/07f2047962992ef18712103d8ac08bd856213cb0))
41 | 
42 | 
43 | 
44 | ## [0.2.2](https://github.com/dk1a/solidity-stringutils/compare/v0.2.1...v0.2.2) (2022-12-11)
45 | 
46 | 
47 | ### Bug Fixes
48 | 
49 | * fix critical issues in SliceIter tests ([6887ae4](https://github.com/dk1a/solidity-stringutils/commit/6887ae48ceb59c789930f748d35432954f1453c0))
50 | * fix critical issues with nextBack in StrCharsIter and its tests; add optimizations ([6fcb355](https://github.com/dk1a/solidity-stringutils/commit/6fcb355baef25ac11a54097a20313bfe7fe96ce0))
51 | 
52 | 
53 | 
54 | ## [0.2.1](https://github.com/dk1a/solidity-stringutils/compare/v0.2.0...v0.2.1) (2022-12-09)
55 | 
56 | 
57 | 
58 | # [0.2.0](https://github.com/dk1a/solidity-stringutils/compare/v0.1.1...v0.2.0) (2022-12-09)
59 | 
60 | 
61 | ### Features
62 | 
63 | * add replacen ([3cc586b](https://github.com/dk1a/solidity-stringutils/commit/3cc586be116be77279f2004323380ea6742709fe))
64 | * update readme ([b269b98](https://github.com/dk1a/solidity-stringutils/commit/b269b98a34eea64e3173721fc6d42af2107b9367))
65 | * use memmove instead of memcpy ([8b6a6a5](https://github.com/dk1a/solidity-stringutils/commit/8b6a6a5dd009cf4e16ce8a42a4470678e4018454))
66 | 
67 | 
68 | 
69 | ## [0.1.1](https://github.com/dk1a/solidity-stringutils/compare/v0.1.0...v0.1.1) (2022-12-08)
70 | 
71 | 
72 | 
73 | # 0.1.0 (2022-12-07)
74 | 
75 | 
76 | ### Bug Fixes
77 | 
78 | * stripSuffix ([6fabda5](https://github.com/dk1a/solidity-stringutils/commit/6fabda5d7abe1617dc278304b831f1d173ae2218))
79 | 
80 | 
81 | ### Features
82 | 
83 | * add getAfterStrict, more tests ([2e5d62b](https://github.com/dk1a/solidity-stringutils/commit/2e5d62b87d3a889229b424f899256d827d268936))
84 | * add splitOnce ([501dc41](https://github.com/dk1a/solidity-stringutils/commit/501dc41807f33671ce87607b63d3ea66be560802))
85 | * add string slice, char, char iterator ([fe0a65e](https://github.com/dk1a/solidity-stringutils/commit/fe0a65e24bcbc87bf77c00ea8e1df3258d89d0b0))
86 | * add StrSlice assertions ([f069e7e](https://github.com/dk1a/solidity-stringutils/commit/f069e7e964596c9fc269bfee6dfe83104f3d01d1))
87 | * initial commit ([d4b2ed0](https://github.com/dk1a/solidity-stringutils/commit/d4b2ed0d63167bf98a4476b68f36fa00a0268b4f))
88 | 
89 | 
90 | 
91 | 


--------------------------------------------------------------------------------
/src/utils/utf8.sol:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: MIT
  2 | 
  3 | pragma solidity ^0.8.17;
  4 | 
  5 | /**
  6 |  * @dev Returns the byte length for a UTF-8 character with the leading byte.
  7 |  * Returns 0 for invalid leading bytes.
  8 |  */
  9 | function utf8CharWidth(uint256 leadingByte) pure returns (uint256) {
 10 |     if (leadingByte < 0x80) {
 11 |         return 1;
 12 |     } else if (leadingByte < 0xC2) {
 13 |         return 0;
 14 |     } else if (leadingByte < 0xE0) {
 15 |         return 2;
 16 |     } else if (leadingByte < 0xF0) {
 17 |         return 3;
 18 |     } else if (leadingByte < 0xF5) {
 19 |         return 4;
 20 |     } else {
 21 |         return 0;
 22 |     }
 23 | }
 24 | 
 25 | /**
 26 |  * @dev Returns true if `b` is a valid UTF-8 leading byte.
 27 |  */
 28 | function isLeadingByte(uint256 b) pure returns (bool) {
 29 |     return utf8CharWidth(b) > 0;
 30 | }
 31 | 
 32 | /**
 33 |  * @dev Returns character length if the 1-4 bytes at MSB are a valid UTF-8 encoded character.
 34 |  * Returns 0 for invalid characters.
 35 |  * (utf8CharWidth validates ONLY the leading byte, not the whole character)
 36 |  *
 37 |  * Note if MSB is 0x00, this will return 1, since 0x00 is valid UTF-8.
 38 |  * Works faster for smaller code points.
 39 |  *
 40 |  * https://www.rfc-editor.org/rfc/rfc3629#section-4
 41 |  * UTF8-char   = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
 42 |  * UTF8-1      = %x00-7F
 43 |  * UTF8-2      = %xC2-DF UTF8-tail
 44 |  * UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
 45 |  *               %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
 46 |  * UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
 47 |  *               %xF4 %x80-8F 2( UTF8-tail )
 48 |  * UTF8-tail   = %x80-BF
 49 |  */
 50 | function isValidUtf8(bytes32 b) pure returns (uint256) {
 51 |     // TODO you can significantly optimize comparisons with bitmasks,
 52 |     // some stuff to look at:
 53 |     // https://github.com/zwegner/faster-utf8-validator/blob/master/z_validate.c
 54 |     // https://github.com/websockets/utf-8-validate/blob/master/src/validation.c
 55 |     // https://github.com/simdutf/simdutf/blob/master/src/scalar/utf8.h
 56 | 
 57 |     uint8 first = uint8(b[0]);
 58 |     // UTF8-1 = %x00-7F
 59 |     if (first <= 0x7F) {
 60 |         // fast path for ascii
 61 |         return 1;
 62 |     }
 63 | 
 64 |     uint256 w = utf8CharWidth(first);
 65 |     if (w == 2) {
 66 |         // UTF8-2
 67 |         if (
 68 |             // %xC2-DF UTF8-tail
 69 |             0xC2 <= first && first <= 0xDF
 70 |             && _utf8Tail(uint8(b[1]))
 71 |         ) {
 72 |             return 2;
 73 |         } else {
 74 |             return 0;
 75 |         }
 76 |     } else if (w == 3) {
 77 |         uint8 second = uint8(b[1]);
 78 |         // UTF8-3
 79 |         bool valid12 =
 80 |             // = %xE0 %xA0-BF UTF8-tail
 81 |             first == 0xE0
 82 |             && 0xA0 <= second && second <= 0xBF
 83 |             // / %xE1-EC 2( UTF8-tail )
 84 |             || 0xE1 <= first && first <= 0xEC
 85 |             && _utf8Tail(second)
 86 |             // / %xED %x80-9F UTF8-tail
 87 |             || first == 0xED
 88 |             && 0x80 <= second && second <= 0x9F
 89 |             // / %xEE-EF 2( UTF8-tail )
 90 |             || 0xEE <= first && first <= 0xEF
 91 |             && _utf8Tail(second);
 92 | 
 93 |         if (valid12 && _utf8Tail(uint8(b[2]))) {
 94 |             return 3;
 95 |         } else {
 96 |             return 0;
 97 |         }
 98 |     } else if (w == 4) {
 99 |         uint8 second = uint8(b[1]);
100 |         // UTF8-4
101 |         bool valid12 =
102 |             // = %xF0 %x90-BF 2( UTF8-tail )
103 |             first == 0xF0
104 |             && 0x90 <= second && second <= 0xBF
105 |             // / %xF1-F3 3( UTF8-tail )
106 |             || 0xF1 <= first && first <= 0xF3
107 |             && _utf8Tail(second)
108 |             // / %xF4 %x80-8F 2( UTF8-tail )
109 |             || first == 0xF4
110 |             && 0x80 <= second && second <= 0x8F;
111 | 
112 |         if (valid12 && _utf8Tail(uint8(b[2])) && _utf8Tail(uint8(b[3]))) {
113 |             return 4;
114 |         } else {
115 |             return 0;
116 |         }
117 |     } else {
118 |         return 0;
119 |     }
120 | }
121 | 
122 | /// @dev UTF8-tail = %x80-BF
123 | function _utf8Tail(uint256 b) pure returns (bool) {
124 |     // and,cmp should be faster than cmp,cmp,and
125 |     // 0xC0 = 0b1100_0000, 0x80 = 0b1000_0000
126 |     return b & 0xC0 == 0x80;
127 | }


--------------------------------------------------------------------------------
/src/utils/unicode.sol:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: MIT
  2 | 
  3 | pragma solidity ^0.8.17;
  4 | 
  5 | import { utf8CharWidth } from "./utf8.sol";
  6 | 
  7 | /*
  8 |  * IMPORTANT: Here `uint256` represents 1 code point (aka unicode scalar values),
  9 |  * NOT a UTF-8 encoded character!
 10 |  * E.g. for '€' code point = 0x20AC; wheareas UTF-8 encoding = 0xE282AC.
 11 |  *
 12 |  * Only conversion to/from UTF-8 is addressed here.
 13 |  * Note that UTF-16 surrogate halves are invalid code points even if UTF-16 was supported.
 14 |  */
 15 | 
 16 | error Unicode__InvalidCode();
 17 | 
 18 | /// @dev The highest valid code point.
 19 | uint256 constant MAX = 0x10FFFF;
 20 | 
 21 | // UTF-8 ranges
 22 | uint256 constant MAX_ONE_B = 0x80;
 23 | uint256 constant MAX_TWO_B = 0x800;
 24 | uint256 constant MAX_THREE_B = 0x10000;
 25 | // and tags for encoding characters
 26 | uint256 constant TAG_CONT = 0x80;
 27 | uint256 constant TAG_TWO_B = 0xC0;
 28 | uint256 constant TAG_THREE_B = 0xE0;
 29 | uint256 constant TAG_FOUR_B = 0xF0;
 30 | // and continuation byte mask
 31 | uint256 constant MASK_CONT = 0x3F;
 32 | 
 33 | /**
 34 |  * @dev Encodes a unicode code point as UTF-8.
 35 |  * Reverts if the code point is invalid.
 36 |  * The result is 1-4 bytes starting at MSB.
 37 |  */
 38 | function encodeUtf8(uint256 code) pure returns (bytes32) {
 39 |     if (code < MAX_ONE_B) {
 40 |         return bytes32(
 41 |             (code                                ) << (31 * 8)
 42 |         );
 43 |     } else if (code < MAX_TWO_B) {
 44 |         return bytes32(
 45 |             (code >> 6              | TAG_TWO_B  ) << (31 * 8) |
 46 |             (code       & MASK_CONT | TAG_CONT   ) << (30 * 8)
 47 |         );
 48 |     } else if (code < MAX_THREE_B) {
 49 |         if (code & 0xF800 == 0xD800) {
 50 |             // equivalent to `code >= 0xD800 && code <= 0xDFFF`
 51 |             // U+D800–U+DFFF are invalid UTF-16 surrogate halves
 52 |             revert Unicode__InvalidCode();
 53 |         }
 54 |         return bytes32(
 55 |             (code >> 12             | TAG_THREE_B) << (31 * 8) |
 56 |             (code >> 6  & MASK_CONT | TAG_CONT   ) << (30 * 8) |
 57 |             (code       & MASK_CONT | TAG_CONT   ) << (29 * 8)
 58 |         );
 59 |     } else if (code <= MAX) {
 60 |         return bytes32(
 61 |             (code >> 18             | TAG_FOUR_B ) << (31 * 8) |
 62 |             (code >> 12 & MASK_CONT | TAG_CONT   ) << (30 * 8) |
 63 |             (code >> 6  & MASK_CONT | TAG_CONT   ) << (29 * 8) |
 64 |             (code       & MASK_CONT | TAG_CONT   ) << (28 * 8)
 65 |         );
 66 |     } else {
 67 |         revert Unicode__InvalidCode();
 68 |     }
 69 | }
 70 | 
 71 | /**
 72 |  * @dev Decodes a UTF-8 character into its code point.
 73 |  * Validates ONLY the leading byte, use `isValidCodePoint` on the result if UTF-8 wasn't validated.
 74 |  * The input is 1-4 bytes starting at MSB.
 75 |  */
 76 | function decodeUtf8(bytes32 str) pure returns (uint256) {
 77 |     uint256 leadingByte = uint256(uint8(str[0]));
 78 |     uint256 width = utf8CharWidth(leadingByte);
 79 | 
 80 |     if (width == 1) {
 81 |         return leadingByte;
 82 |     } else if (width == 2) {
 83 |         uint256 byte1 = uint256(uint8(str[1]));
 84 |         return uint256(
 85 |             // 0x1F = 0001_1111
 86 |             (leadingByte & 0x1F     ) << 6 |
 87 |             (byte1       & MASK_CONT)
 88 |         );
 89 |     } else if (width == 3) {
 90 |         uint256 byte1 = uint256(uint8(str[1]));
 91 |         uint256 byte2 = uint256(uint8(str[2]));
 92 |         return uint256(
 93 |             // 0x0F = 0000_1111
 94 |             (leadingByte & 0x0F     ) << 12 |
 95 |             (byte1       & MASK_CONT) << 6  |
 96 |             (byte2       & MASK_CONT)
 97 |         );
 98 |     } else if (width == 4) {
 99 |         uint256 byte1 = uint256(uint8(str[1]));
100 |         uint256 byte2 = uint256(uint8(str[2]));
101 |         uint256 byte3 = uint256(uint8(str[3]));
102 |         return uint256(
103 |             // 0x07 = 0000_0111
104 |             (leadingByte & 0x07     ) << 18 |
105 |             (byte1       & MASK_CONT) << 12 |
106 |             (byte2       & MASK_CONT) << 6  |
107 |             (byte3       & MASK_CONT)
108 |         );
109 |     } else {
110 |         revert Unicode__InvalidCode();
111 |     }
112 | }
113 | 
114 | /**
115 |  * @dev Returns the length of a code point in UTF-8 encoding.
116 |  * Does NOT validate it.
117 |  * WARNING: atm this function is neither used nor tested in this repo
118 |  */
119 | function lenUtf8(uint256 code) pure returns (uint256) {
120 |     if (code < MAX_ONE_B) {
121 |         return 1;
122 |     } else if (code < MAX_TWO_B) {
123 |         return 2;
124 |     } else if (code < MAX_THREE_B) {
125 |         return 3;
126 |     } else {
127 |         return 4;
128 |     }
129 | }
130 | 
131 | /**
132 |  * @dev Returns true if the code point is valid.
133 |  * WARNING: atm this function is neither used nor tested in this repo
134 |  */
135 | function isValidCodePoint(uint256 code) pure returns (bool) {
136 |     // U+D800–U+DFFF are invalid UTF-16 surrogate halves
137 |     if (code < 0xD800) {
138 |         return true;
139 |     } else {
140 |         return code > 0xDFFF && code <= MAX;
141 |     }
142 | }


--------------------------------------------------------------------------------
/test/StrSliceAssertions.t.sol:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: MIT
  2 | 
  3 | pragma solidity ^0.8.17;
  4 | 
  5 | import { PRBTest } from "@prb/test/src/PRBTest.sol";
  6 | import { StrSliceAssertions } from "../src/test/StrSliceAssertions.sol";
  7 | 
  8 | import { StrSlice, toSlice } from "../src/StrSlice.sol";
  9 | 
 10 | using { toSlice } for string;
 11 | 
 12 | // StrSlice just wraps Slice's comparators, so these tests don't fuzz
 13 | // TODO currently invalid UTF-8 compares like bytes, but should it revert?
 14 | contract StrSliceAssertionsTest is PRBTest, StrSliceAssertions {
 15 |     /*//////////////////////////////////////////////////////////////////////////
 16 |                                         EQUALITY
 17 |     //////////////////////////////////////////////////////////////////////////*/
 18 | 
 19 |     function testEq() public {
 20 |         string memory b = unicode"こんにちは";
 21 |         // compare new assertions
 22 |         assertEq(b.toSlice(), b.toSlice());
 23 |         assertEq(b.toSlice(), b);
 24 |         assertEq(b, b.toSlice());
 25 | 
 26 |         assertLte(b.toSlice(), b.toSlice());
 27 |         assertLte(b.toSlice(), b);
 28 |         assertLte(b, b.toSlice());
 29 | 
 30 |         assertGte(b.toSlice(), b.toSlice());
 31 |         assertGte(b.toSlice(), b);
 32 |         assertGte(b, b.toSlice());
 33 |         // to the existing ones
 34 |         assertEq(b.toSlice().toString(), b.toSlice().toString());
 35 |         assertEq(b.toSlice().toString(), b);
 36 |         assertEq(b, b.toSlice().toString());
 37 |     }
 38 | 
 39 |     function testFailEq() public {
 40 |         assertEq(string(unicode"こん"), string(unicode"こ"));
 41 |     }
 42 | 
 43 |     function testNotEq() public {
 44 |         string memory b1 = unicode"こ";
 45 |         string memory b2 = unicode"ん";
 46 |         // compare new assertions
 47 |         assertNotEq(b1.toSlice(), b2.toSlice());
 48 |         assertNotEq(b1.toSlice(), b2);
 49 |         assertNotEq(b1, b2.toSlice());
 50 |         // to the existing ones
 51 |         assertNotEq(b1.toSlice().toString(), b2.toSlice().toString());
 52 |         assertNotEq(b1.toSlice().toString(), b2);
 53 |         assertNotEq(b1, b2.toSlice().toString());
 54 |     }
 55 | 
 56 |     function testFailNotEq() public {
 57 |         assertNotEq(string(unicode"こんにちは"), string(unicode"こんにちは"));
 58 |     }
 59 | 
 60 |     /*//////////////////////////////////////////////////////////////////////////
 61 |                                     LESS-THAN
 62 |     //////////////////////////////////////////////////////////////////////////*/
 63 | 
 64 |     function testLt() public {
 65 |         string memory b1 = unicode"こ";
 66 |         string memory b2 = unicode"ん";
 67 | 
 68 |         assertLt(b1.toSlice(), b2.toSlice());
 69 |         assertLt(b1.toSlice(), b2);
 70 |         assertLt(b1, b2.toSlice());
 71 |         assertLt(b1, b2);
 72 | 
 73 |         assertLte(b1.toSlice(), b2.toSlice());
 74 |         assertLte(b1.toSlice(), b2);
 75 |         assertLte(b1, b2.toSlice());
 76 |         assertLte(b1, b2);
 77 |     }
 78 | 
 79 |     function testFailLt() public {
 80 |         string memory b1 = unicode"こ";
 81 |         string memory b2 = unicode"ん";
 82 | 
 83 |         assertLt(b2, b1);
 84 |     }
 85 | 
 86 |     function testFailLt__ForEq() public {
 87 |         string memory b = unicode"こ";
 88 |         assertLt(b, b);
 89 |     }
 90 | 
 91 |     function testFailLte() public {
 92 |         string memory b1 = unicode"こ";
 93 |         string memory b2 = unicode"ん";
 94 | 
 95 |         assertLte(b2, b1);
 96 |     }
 97 | 
 98 |     /*//////////////////////////////////////////////////////////////////////////
 99 |                                     GREATER-THAN
100 |     //////////////////////////////////////////////////////////////////////////*/
101 | 
102 |     function testGt() public {
103 |         string memory b1 = unicode"ん";
104 |         string memory b2 = unicode"こ";
105 | 
106 |         assertGt(b1.toSlice(), b2.toSlice());
107 |         assertGt(b1.toSlice(), b2);
108 |         assertGt(b1, b2.toSlice());
109 |         assertGt(b1, b2);
110 | 
111 |         assertGte(b1.toSlice(), b2.toSlice());
112 |         assertGte(b1.toSlice(), b2);
113 |         assertGte(b1, b2.toSlice());
114 |         assertGte(b1, b2);
115 |     }
116 | 
117 |     function testFailGt() public {
118 |         string memory b1 = unicode"ん";
119 |         string memory b2 = unicode"こ";
120 | 
121 |         assertGt(b2, b1);
122 |     }
123 | 
124 |     function testFailGt__ForEq() public {
125 |         string memory b = unicode"こ";
126 |         assertGt(b, b);
127 |     }
128 | 
129 |     function testFailGte() public {
130 |         string memory b1 = unicode"ん";
131 |         string memory b2 = unicode"こ";
132 | 
133 |         assertGte(b2, b1);
134 |     }
135 | 
136 |     /*//////////////////////////////////////////////////////////////////////////
137 |                                     CONTAINS
138 |     //////////////////////////////////////////////////////////////////////////*/
139 | 
140 |     function testContains() public {
141 |         string memory b1 = unicode"こんにちは";
142 |         string memory b2 = unicode"んにち";
143 | 
144 |         assertContains(b1.toSlice(), b2.toSlice());
145 |         assertContains(b1.toSlice(), b2);
146 |         assertContains(b1, b2.toSlice());
147 |         assertContains(b1, b2);
148 |     }
149 | 
150 |     function testFailContains() public {
151 |         string memory b1 = unicode"こんにちは";
152 |         string memory b2 = unicode"ここ";
153 | 
154 |         assertContains(b1, b2);
155 |     }
156 | }


--------------------------------------------------------------------------------
/src/utils/mem.sol:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: MIT
  2 | 
  3 | pragma solidity ^0.8.17;
  4 | 
  5 | /*
  6 |  * These functions are VERY DANGEROUS!
  7 |  * They operate directly on memory pointers, use with caution.
  8 |  *
  9 |  * Assembly here is marked as memory-safe for optimization.
 10 |  * The caller MUST use pointers in a memory-safe way!
 11 |  * https://docs.soliditylang.org/en/latest/assembly.html#memory-safety
 12 |  */
 13 | 
 14 | /**
 15 |  * @dev Load 1 byte from the pointer.
 16 |  * The result is in the least significant byte, hence uint8.
 17 |  */
 18 | function mload8(uint256 ptr) pure returns (uint8 item) {
 19 |     /// @solidity memory-safe-assembly
 20 |     assembly {
 21 |         item := byte(0, mload(ptr))
 22 |     }
 23 |     return item;
 24 | }
 25 | 
 26 | /**
 27 |  * @dev Copy `n` memory bytes.
 28 |  * WARNING: Does not handle pointer overlap!
 29 |  */
 30 | function memcpy(uint256 ptrDest, uint256 ptrSrc, uint256 length) pure {
 31 |     // copy 32-byte chunks
 32 |     while (length >= 32) {
 33 |         /// @solidity memory-safe-assembly
 34 |         assembly {
 35 |             mstore(ptrDest, mload(ptrSrc))
 36 |         }
 37 |         // safe because total addition will be <= length (ptr+len is implicitly safe)
 38 |         unchecked {
 39 |             ptrDest += 32;
 40 |             ptrSrc += 32;
 41 |             length -= 32;
 42 |         }
 43 |     }
 44 |     // copy the 0-31 length tail
 45 |     // (the rest is an inlined `mstoreN`)
 46 |     uint256 mask = leftMask(length);
 47 |     /// @solidity memory-safe-assembly
 48 |     assembly {
 49 |         mstore(ptrDest,
 50 |             or(
 51 |                 // store the left part
 52 |                 and(mload(ptrSrc), mask),
 53 |                 // preserve the right part
 54 |                 and(mload(ptrDest), not(mask))
 55 |             )
 56 |         )
 57 |     }
 58 | }
 59 | 
 60 | /**
 61 |  * @dev mstore `n` bytes (left-aligned) of `data`
 62 |  */
 63 | function mstoreN(uint256 ptrDest, bytes32 data, uint256 n) pure {
 64 |     uint256 mask = leftMask(n);
 65 |     /// @solidity memory-safe-assembly
 66 |     assembly {
 67 |         mstore(ptrDest,
 68 |             or(
 69 |                 // store the left part
 70 |                 and(data, mask),
 71 |                 // preserve the right part
 72 |                 and(mload(ptrDest), not(mask))
 73 |             )
 74 |         )
 75 |     }
 76 | }
 77 | 
 78 | /**
 79 |  * @dev Copy `n` memory bytes using identity precompile.
 80 |  */
 81 | function memmove(uint256 ptrDest, uint256 ptrSrc, uint256 n) view {
 82 |     /// @solidity memory-safe-assembly
 83 |     assembly {
 84 |         pop(
 85 |             staticcall(
 86 |                 gas(),   // gas (unused is returned)
 87 |                 0x04,    // identity precompile address
 88 |                 ptrSrc,  // argsOffset
 89 |                 n,       // argsSize: byte size to copy
 90 |                 ptrDest, // retOffset
 91 |                 n        // retSize: byte size to copy
 92 |             )
 93 |         )
 94 |     }
 95 | }
 96 | 
 97 | /**
 98 |  * @dev Compare `n` memory bytes lexicographically.
 99 |  * Returns 0 for equal, < 0 for less than and > 0 for greater than.
100 |  *
101 |  * https://doc.rust-lang.org/std/cmp/trait.Ord.html#lexicographical-comparison
102 |  */
103 | function memcmp(uint256 ptrSelf, uint256 ptrOther, uint256 n) pure returns (int256) {
104 |     // binary search for the first inequality
105 |     while (n >= 32) {
106 |         // safe because total addition will be <= n (ptr+len is implicitly safe)
107 |         unchecked {
108 |             uint256 nHalf = n / 2;
109 |             if (memeq(ptrSelf, ptrOther, nHalf)) {
110 |                 ptrSelf += nHalf;
111 |                 ptrOther += nHalf;
112 |                 // (can't do n /= 2 instead of nHalf, some bytes would be skipped)
113 |                 n -= nHalf;
114 |                 // an explicit continue is better for optimization here
115 |                 continue;
116 |             } else {
117 |                 n -= nHalf;
118 |             }
119 |         }
120 |     }
121 | 
122 |     uint256 mask = leftMask(n);
123 |     int256 diff;
124 |     /// @solidity memory-safe-assembly
125 |     assembly {
126 |         // for <32 bytes subtraction can be used for comparison,
127 |         // just need to shift away from MSB
128 |         diff := sub(
129 |             shr(8, and(mload(ptrSelf), mask)),
130 |             shr(8, and(mload(ptrOther), mask))
131 |         )
132 |     }
133 |     return diff;
134 | }
135 | 
136 | /**
137 |  * @dev Returns true if `n` memory bytes are equal.
138 |  *
139 |  * It's faster (up to 4x) than memcmp, especially on medium byte lengths like 32-320.
140 |  * The benefit gets smaller for larger lengths, for 10000 it's only 30% faster.
141 |  */
142 | function memeq(uint256 ptrSelf, uint256 ptrOther, uint256 n) pure returns (bool result) {
143 |     /// @solidity memory-safe-assembly
144 |     assembly {
145 |         result := eq(keccak256(ptrSelf, n), keccak256(ptrOther, n))
146 |     }
147 | }
148 | 
149 | /**
150 |  * @dev Left-aligned byte mask (e.g. for partial mload/mstore).
151 |  * For length >= 32 returns type(uint256).max
152 |  *
153 |  * length 0:   0x000000...000000
154 |  * length 1:   0xff0000...000000
155 |  * length 2:   0xffff00...000000
156 |  * ...
157 |  * length 30:  0xffffff...ff0000
158 |  * length 31:  0xffffff...ffff00
159 |  * length 32+: 0xffffff...ffffff
160 |  */
161 | function leftMask(uint256 length) pure returns (uint256) {
162 |     unchecked {
163 |         return ~(
164 |             type(uint256).max >> (length * 8)
165 |         );
166 |     }
167 | }


--------------------------------------------------------------------------------
/src/StrChar.sol:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: MIT
  2 | 
  3 | pragma solidity ^0.8.17;
  4 | 
  5 | import { isValidUtf8 as _isValidUtf8, utf8CharWidth } from "./utils/utf8.sol";
  6 | import { decodeUtf8, encodeUtf8 } from "./utils/unicode.sol";
  7 | import { leftMask } from "./utils/mem.sol";
  8 | 
  9 | /**
 10 |  * @title A single UTF-8 encoded character.
 11 |  * @dev Internally it is stored as UTF-8 encoded bytes starting from left/MSB.
 12 |  */
 13 | type StrChar is bytes32;
 14 | 
 15 | /*//////////////////////////////////////////////////////////////////////////
 16 |                                 CUSTOM ERRORS
 17 | //////////////////////////////////////////////////////////////////////////*/
 18 | 
 19 | error StrChar__InvalidUTF8();
 20 | 
 21 | /*//////////////////////////////////////////////////////////////////////////
 22 |                               STATIC FUNCTIONS
 23 | //////////////////////////////////////////////////////////////////////////*/
 24 | 
 25 | library StrChar__ {
 26 |     /**
 27 |      * @dev Converts the first 1-4 bytes of `bytes32` to a `StrChar`.
 28 |      * Starts from left/MSB, reverts if not valid UTF-8.
 29 |      * @param b UTF-8 encoded character in the most significant bytes.
 30 |      */
 31 |     function from(bytes32 b) internal pure returns (StrChar char) {
 32 |         uint256 charLen = _isValidUtf8(b);
 33 |         if (charLen == 0) revert StrChar__InvalidUTF8();
 34 |         return fromUnchecked(b, charLen);
 35 |     }
 36 | 
 37 |     /**
 38 |     * @dev Converts a unicode code point to a `StrChar`.
 39 |     * E.g. for '€' code point = 0x20AC; wheareas UTF-8 = 0xE282AC.
 40 |     */
 41 |     function fromCodePoint(uint256 code) internal pure returns (StrChar char) {
 42 |         return StrChar.wrap(encodeUtf8(code));
 43 |     }
 44 | 
 45 |     /**
 46 |      * @dev Like `from`, but does NO validity checks.
 47 |      * Uses provided `_len` instead of calculating it. This allows invalid/malformed characters.
 48 |      *
 49 |      * MSB of `bytes32` SHOULD be valid UTF-8.
 50 |      * And `bytes32` SHOULD be zero-padded after the first UTF-8 character.
 51 |      * Primarily for internal use.
 52 |      */
 53 |     function fromUnchecked(bytes32 b, uint256 _len) internal pure returns (StrChar char) {
 54 |         return StrChar.wrap(bytes32(
 55 |             // zero-pad after the character
 56 |             uint256(b) & leftMask(_len)
 57 |         ));
 58 |     }
 59 | }
 60 | 
 61 | /*//////////////////////////////////////////////////////////////////////////
 62 |                                 GLOBAL FUNCTIONS
 63 | //////////////////////////////////////////////////////////////////////////*/
 64 | 
 65 | using { 
 66 |     len,
 67 |     toBytes32, toString, toCodePoint,
 68 |     cmp, eq, ne, lt, lte, gt, gte,
 69 |     isValidUtf8,
 70 |     isAscii
 71 | } for StrChar global;
 72 | 
 73 | /**
 74 |  * @dev Returns the character's length in bytes (1-4).
 75 |  * Returns 0 for some (not all!) invalid characters (e.g. due to unsafe use of fromUnchecked).
 76 |  */
 77 | function len(StrChar self) pure returns (uint256) {
 78 |     return utf8CharWidth(
 79 |         // extract the leading byte
 80 |         uint256(uint8(StrChar.unwrap(self)[0]))
 81 |     );
 82 | }
 83 | 
 84 | /**
 85 |  * @dev Converts a `StrChar` to its underlying bytes32 value.
 86 |  */
 87 | function toBytes32(StrChar self) pure returns (bytes32) {
 88 |     return StrChar.unwrap(self);
 89 | }
 90 | 
 91 | /**
 92 |  * @dev Converts a `StrChar` to a newly allocated `string`.
 93 |  */
 94 | function toString(StrChar self) pure returns (string memory str) {
 95 |     uint256 _len = self.len();
 96 |     str = new string(_len);
 97 |     /// @solidity memory-safe-assembly
 98 |     assembly {
 99 |         mstore(add(str, 0x20), self)
100 |     }
101 |     return str;
102 | }
103 | 
104 | /**
105 |  * @dev Converts a `StrChar` to its unicode code point (aka unicode scalar value).
106 |  */
107 | function toCodePoint(StrChar self) pure returns (uint256) {
108 |     return decodeUtf8(StrChar.unwrap(self));
109 | }
110 | 
111 | /**
112 |  * @dev Compare characters lexicographically.
113 |  * @return result 0 for equal, < 0 for less than and > 0 for greater than.
114 |  */
115 | function cmp(StrChar self, StrChar other) pure returns (int256 result) {
116 |     uint256 selfUint = uint256(StrChar.unwrap(self));
117 |     uint256 otherUint = uint256(StrChar.unwrap(other));
118 |     if (selfUint > otherUint) {
119 |         return 1;
120 |     } else if (selfUint < otherUint) {
121 |         return -1;
122 |     } else {
123 |         return 0;
124 |     }
125 | }
126 | 
127 | /// @dev `self` == `other`
128 | function eq(StrChar self, StrChar other) pure returns (bool) {
129 |     return uint256(StrChar.unwrap(self)) == uint256(StrChar.unwrap(other));
130 | }
131 | 
132 | /// @dev `self` != `other`
133 | function ne(StrChar self, StrChar other) pure returns (bool) {
134 |     return uint256(StrChar.unwrap(self)) != uint256(StrChar.unwrap(other));
135 | }
136 | 
137 | /// @dev `self` < `other`
138 | function lt(StrChar self, StrChar other) pure returns (bool) {
139 |     return uint256(StrChar.unwrap(self)) < uint256(StrChar.unwrap(other));
140 | }
141 | 
142 | /// @dev `self` <= `other`
143 | function lte(StrChar self, StrChar other) pure returns (bool) {
144 |     return uint256(StrChar.unwrap(self)) <= uint256(StrChar.unwrap(other));
145 | }
146 | 
147 | /// @dev `self` > `other`
148 | function gt(StrChar self, StrChar other) pure returns (bool) {
149 |     return uint256(StrChar.unwrap(self)) > uint256(StrChar.unwrap(other));
150 | }
151 | 
152 | /// @dev `self` >= `other`
153 | function gte(StrChar self, StrChar other) pure returns (bool) {
154 |     return uint256(StrChar.unwrap(self)) >= uint256(StrChar.unwrap(other));
155 | }
156 | 
157 | /**
158 |  * @dev Returns true if `StrChar` is valid UTF-8.
159 |  * Can be false if it was formed with an unsafe method (fromUnchecked, wrap).
160 |  */
161 | function isValidUtf8(StrChar self) pure returns (bool) {
162 |     return _isValidUtf8(StrChar.unwrap(self)) != 0;
163 | }
164 | 
165 | /**
166 |  * @dev Returns true if `StrChar` is within the ASCII range.
167 |  */
168 | function isAscii(StrChar self) pure returns (bool) {
169 |     return StrChar.unwrap(self)[0] < 0x80;
170 | }


--------------------------------------------------------------------------------
/test/SliceIter.t.sol:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: MIT
  2 | 
  3 | pragma solidity ^0.8.17;
  4 | 
  5 | import { PRBTest } from "@prb/test/src/PRBTest.sol";
  6 | 
  7 | import { Slice, toSlice } from "../src/Slice.sol";
  8 | import { SliceIter } from "../src/SliceIter.sol";
  9 | import { SliceIter__StopIteration } from "../src/SliceIter.sol";
 10 | 
 11 | using { toSlice } for bytes;
 12 | 
 13 | contract SliceIterTest is PRBTest {
 14 |     function testLen(bytes calldata _b) public {
 15 |         SliceIter memory iter = _b.toSlice().iter();
 16 |         assertEq(iter.len(), _b.length);
 17 |     }
 18 | 
 19 |     function testIsEmpty() public {
 20 |         assertTrue(bytes("").toSlice().iter().isEmpty());
 21 |         assertFalse(new bytes(1).toSlice().iter().isEmpty());
 22 |     }
 23 | 
 24 |     /*//////////////////////////////////////////////////////////////////////////
 25 |                                     NEXT
 26 |     //////////////////////////////////////////////////////////////////////////*/
 27 | 
 28 |     function testNext() public {
 29 |         Slice s = bytes("123").toSlice();
 30 |         SliceIter memory iter = s.iter();
 31 | 
 32 |         assertEq(iter.next(), uint8(bytes1("1")));
 33 |         assertEq(iter.asSlice().toBytes(), bytes("23"));
 34 |         assertEq(iter.next(), uint8(bytes1("2")));
 35 |         assertEq(iter.asSlice().toBytes(), bytes("3"));
 36 |         assertEq(iter.next(), uint8(bytes1("3")));
 37 |         assertEq(iter.asSlice().toBytes(), bytes(""));
 38 | 
 39 |         vm.expectRevert(SliceIter__StopIteration.selector);
 40 |         iter.next();
 41 |     }
 42 | 
 43 |     function testNext__StopIteration() public {
 44 |         Slice s = bytes("123").toSlice();
 45 |         SliceIter memory iter = s.iter();
 46 | 
 47 |         iter.next();
 48 |         iter.next();
 49 |         iter.next();
 50 | 
 51 |         vm.expectRevert(SliceIter__StopIteration.selector);
 52 |         iter.next();
 53 |     }
 54 | 
 55 |     function testNext__Fuzz(bytes calldata _b) public {
 56 |         SliceIter memory iter = _b.toSlice().iter();
 57 | 
 58 |         uint256 i;
 59 |         while (!iter.isEmpty()) {
 60 |             assertEq(iter.next(), uint8(_b[i]));
 61 |             assertEq(iter.asSlice().toBytes(), _b[i + 1:]);
 62 |             i++;
 63 |         }
 64 | 
 65 |         vm.expectRevert(SliceIter__StopIteration.selector);
 66 |         iter.next();
 67 |     }
 68 | 
 69 |     function testNext__StopIteration__Fuzz(bytes calldata _b) public {
 70 |         SliceIter memory iter = _b.toSlice().iter();
 71 | 
 72 |         uint256 i;
 73 |         while (!iter.isEmpty()) {
 74 |             iter.next();
 75 |             i++;
 76 |         }
 77 | 
 78 |         vm.expectRevert(SliceIter__StopIteration.selector);
 79 |         iter.next();
 80 |     }
 81 | 
 82 |     /*//////////////////////////////////////////////////////////////////////////
 83 |                                     NEXT_BACK
 84 |     //////////////////////////////////////////////////////////////////////////*/
 85 | 
 86 |     function testNextBack() public {
 87 |         Slice s = bytes("123").toSlice();
 88 |         SliceIter memory iter = s.iter();
 89 | 
 90 |         assertEq(iter.nextBack(), uint8(bytes1("3")));
 91 |         assertEq(iter.asSlice().toBytes(), bytes("12"));
 92 |         assertEq(iter.nextBack(), uint8(bytes1("2")));
 93 |         assertEq(iter.asSlice().toBytes(), bytes("1"));
 94 |         assertEq(iter.nextBack(), uint8(bytes1("1")));
 95 |         assertEq(iter.asSlice().toBytes(), bytes(""));
 96 | 
 97 |         vm.expectRevert(SliceIter__StopIteration.selector);
 98 |         iter.nextBack();
 99 |     }
100 | 
101 |     function testNextBack__StopIteration() public {
102 |         Slice s = bytes("123").toSlice();
103 |         SliceIter memory iter = s.iter();
104 | 
105 |         iter.nextBack();
106 |         iter.nextBack();
107 |         iter.nextBack();
108 |         
109 |         vm.expectRevert(SliceIter__StopIteration.selector);
110 |         iter.nextBack();
111 |     }
112 | 
113 |     function testNextBack__Fuzz(bytes calldata _b) public {
114 |         SliceIter memory iter = _b.toSlice().iter();
115 | 
116 |         uint256 i;
117 |         while (!iter.isEmpty()) {
118 |             assertEq(iter.nextBack(), uint8(_b[_b.length - i - 1]));
119 |             assertEq(iter.asSlice().toBytes(), _b[:_b.length - i - 1]);
120 |             i++;
121 |         }
122 |     }
123 | 
124 |     function testNextBack__StopIteration__Fuzz(bytes calldata _b) public {
125 |         SliceIter memory iter = _b.toSlice().iter();
126 | 
127 |         uint256 i;
128 |         while (!iter.isEmpty()) {
129 |             iter.nextBack();
130 |             i++;
131 |         }
132 | 
133 |         vm.expectRevert(SliceIter__StopIteration.selector);
134 |         iter.nextBack();
135 |     }
136 | 
137 |     /*//////////////////////////////////////////////////////////////////////////
138 |                                     NEXT MIXED
139 |     //////////////////////////////////////////////////////////////////////////*/
140 | 
141 |     function testNextMixed() public {
142 |         Slice s = bytes("12345").toSlice();
143 |         SliceIter memory iter = s.iter();
144 | 
145 |         assertEq(iter.next(), uint8(bytes1("1")));
146 |         assertEq(iter.asSlice().toBytes(), bytes("2345"));
147 |         assertEq(iter.nextBack(), uint8(bytes1("5")));
148 |         assertEq(iter.asSlice().toBytes(), bytes("234"));
149 |         assertEq(iter.next(), uint8(bytes1("2")));
150 |         assertEq(iter.asSlice().toBytes(), bytes("34"));
151 |         assertEq(iter.next(), uint8(bytes1("3")));
152 |         assertEq(iter.asSlice().toBytes(), bytes("4"));
153 |         assertEq(iter.nextBack(), uint8(bytes1("4")));
154 |         assertEq(iter.asSlice().toBytes(), bytes(""));
155 |     }
156 | 
157 |     function testNextMixed__StopIteration() public {
158 |         Slice s = bytes("12345").toSlice();
159 |         SliceIter memory iter = s.iter();
160 | 
161 |         iter.next();
162 |         iter.nextBack();
163 |         iter.next();
164 |         iter.next();
165 |         iter.nextBack();
166 |         
167 |         vm.expectRevert(SliceIter__StopIteration.selector);
168 |         iter.next();
169 |     }
170 | }


--------------------------------------------------------------------------------
/test/StrCharsIter.t.sol:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: MIT
  2 | 
  3 | pragma solidity ^0.8.17;
  4 | 
  5 | import { PRBTest } from "@prb/test/src/PRBTest.sol";
  6 | 
  7 | import { StrSlice, toSlice, StrCharsIter } from "../src/StrSlice.sol";
  8 | import { SliceIter__StopIteration } from "../src/SliceIter.sol";
  9 | import { StrChar__InvalidUTF8 } from "../src/StrChar.sol";
 10 | 
 11 | using { toSlice } for string;
 12 | 
 13 | contract StrCharsIterTest is PRBTest {
 14 |     function testCount() public {
 15 |         assertEq(toSlice("").chars().count(), 0);
 16 |         assertEq(toSlice("Hello, world!").chars().count(), 13);
 17 |         assertEq(toSlice(unicode"naïve").chars().count(), 5);
 18 |         assertEq(toSlice(unicode"こんにちは").chars().count(), 5);
 19 |         assertEq(toSlice(unicode"Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇").chars().count(), 56);
 20 |         assertEq(toSlice(unicode"🗮🐵🌝👤👿🗉💀🉄🍨🉔🈥🔥🏅🔪🉣📷🉳🍠🈃🉌🖷👍🌐💎🋀🌙💼💮🗹🗘💬🖜🐥🖸🈰🍦💈📆🋬🏇🖒🐜👮🊊🗒🈆🗻🏁🈰🎎🊶🉠🍖🉪🌖📎🌄💵🕷🔧🍸🋗🍁🋸")
 21 |             .chars().count(), 64);
 22 |     }
 23 | 
 24 |     function testUnsafeCount() public {
 25 |         assertEq(toSlice("").chars().unsafeCount(), 0);
 26 |         assertEq(toSlice("Hello, world!").chars().unsafeCount(), 13);
 27 |         assertEq(toSlice(unicode"naïve").chars().unsafeCount(), 5);
 28 |         assertEq(toSlice(unicode"こんにちは").chars().unsafeCount(), 5);
 29 |         assertEq(toSlice(unicode"Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇").chars().unsafeCount(), 56);
 30 |         assertEq(toSlice(unicode"🗮🐵🌝👤👿🗉💀🉄🍨🉔🈥🔥🏅🔪🉣📷🉳🍠🈃🉌🖷👍🌐💎🋀🌙💼💮🗹🗘💬🖜🐥🖸🈰🍦💈📆🋬🏇🖒🐜👮🊊🗒🈆🗻🏁🈰🎎🊶🉠🍖🉪🌖📎🌄💵🕷🔧🍸🋗🍁🋸")
 31 |             .chars().unsafeCount(), 64);
 32 |     }
 33 | 
 34 |     function testValidateUtf8() public {
 35 |         assertTrue(toSlice("").chars().validateUtf8());
 36 |         assertTrue(toSlice("Hello, world!").chars().validateUtf8());
 37 |         assertTrue(toSlice(unicode"naïve").chars().validateUtf8());
 38 |         assertTrue(toSlice(unicode"こんにちは").chars().validateUtf8());
 39 |         assertTrue(toSlice(unicode"Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇").chars().validateUtf8());
 40 |         assertTrue(toSlice(unicode"🗮🐵🌝👤👿🗉💀🉄🍨🉔🈥🔥🏅🔪🉣📷🉳🍠🈃🉌🖷👍🌐💎🋀🌙💼💮🗹🗘💬🖜🐥🖸🈰🍦💈📆🋬🏇🖒🐜👮🊊🗒🈆🗻🏁🈰🎎🊶🉠🍖🉪🌖📎🌄💵🕷🔧🍸🋗🍁🋸")
 41 |             .chars().validateUtf8());
 42 |     }
 43 | 
 44 |     function testValidateUtf8__False() public {
 45 |         assertFalse(toSlice(string(bytes(hex"80"))).chars().validateUtf8());
 46 |         assertFalse(toSlice(string(bytes(hex"E0"))).chars().validateUtf8());
 47 |         assertFalse(toSlice(string(bytes(hex"C000"))).chars().validateUtf8());
 48 |         assertFalse(toSlice(string(bytes(hex"F880808080"))).chars().validateUtf8());
 49 |         assertFalse(toSlice(string(bytes(hex"E08080"))).chars().validateUtf8());
 50 |         assertFalse(toSlice(string(bytes(hex"F0808080"))).chars().validateUtf8());
 51 |         assertFalse(toSlice(string(abi.encodePacked(unicode"こんにちは", hex"80"))).chars().validateUtf8());
 52 |         assertFalse(toSlice(string(abi.encodePacked(unicode"Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇", hex"F0808080"))).chars().validateUtf8());
 53 |     }
 54 | 
 55 |     function testCount__InvalidUTF8() public {
 56 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
 57 |         toSlice(string(bytes(hex"FFFF"))).chars().count();
 58 |     }
 59 | 
 60 |     function testNext() public {
 61 |         StrSlice s = string(unicode"a¡ࠀ𐀡").toSlice();
 62 |         StrCharsIter memory iter = s.chars();
 63 | 
 64 |         assertEq(iter.next().toString(), unicode"a");
 65 |         assertEq(iter.asStr().toString(), unicode"¡ࠀ𐀡");
 66 |         assertEq(iter.next().toString(), unicode"¡");
 67 |         assertEq(iter.asStr().toString(), unicode"ࠀ𐀡");
 68 |         assertEq(iter.next().toString(), unicode"ࠀ");
 69 |         assertEq(iter.asStr().toString(), unicode"𐀡");
 70 |         assertEq(iter.next().toString(), unicode"𐀡");
 71 |         assertEq(iter.asStr().toString(), unicode"");
 72 |     }
 73 | 
 74 |     function testNext__StopIteration() public {
 75 |         StrSlice s = string(unicode"💀!").toSlice();
 76 |         StrCharsIter memory iter = s.chars();
 77 | 
 78 |         iter.next();
 79 |         iter.next();
 80 |         vm.expectRevert(SliceIter__StopIteration.selector);
 81 |         iter.next();
 82 |     }
 83 | 
 84 |     function testNextBack() public {
 85 |         StrSlice s = string(unicode"a¡ࠀ𐀡").toSlice();
 86 |         StrCharsIter memory iter = s.chars();
 87 | 
 88 |         assertEq(iter.nextBack().toString(), unicode"𐀡");
 89 |         assertEq(iter.asStr().toString(), unicode"a¡ࠀ");
 90 |         assertEq(iter.nextBack().toString(), unicode"ࠀ");
 91 |         assertEq(iter.asStr().toString(), unicode"a¡");
 92 |         assertEq(iter.nextBack().toString(), unicode"¡");
 93 |         assertEq(iter.asStr().toString(), unicode"a");
 94 |         assertEq(iter.nextBack().toString(), unicode"a");
 95 |         assertEq(iter.asStr().toString(), unicode"");
 96 |     }
 97 | 
 98 |     function testNextBack__StopIteration() public {
 99 |         StrSlice s = string(unicode"💀!").toSlice();
100 |         StrCharsIter memory iter = s.chars();
101 | 
102 |         iter.nextBack();
103 |         iter.nextBack();
104 |         vm.expectRevert(SliceIter__StopIteration.selector);
105 |         iter.nextBack();
106 |     }
107 | 
108 |     function testUnsafeNext() public {
109 |         StrSlice s = string(unicode"a¡ࠀ𐀡").toSlice();
110 |         StrCharsIter memory iter = s.chars();
111 | 
112 |         assertEq(iter.unsafeNext().toString(), unicode"a");
113 |         assertEq(iter.asStr().toString(), unicode"¡ࠀ𐀡");
114 |         assertEq(iter.unsafeNext().toString(), unicode"¡");
115 |         assertEq(iter.asStr().toString(), unicode"ࠀ𐀡");
116 |         assertEq(iter.unsafeNext().toString(), unicode"ࠀ");
117 |         assertEq(iter.asStr().toString(), unicode"𐀡");
118 |         assertEq(iter.unsafeNext().toString(), unicode"𐀡");
119 |         assertEq(iter.asStr().toString(), unicode"");
120 |     }
121 | 
122 |     function testUnsafeNext__InvalidUtf8() public {
123 |         StrSlice s = string(bytes(hex"00FF80")).toSlice();
124 |         StrCharsIter memory iter = s.chars();
125 | 
126 |         // this works kinda weirdly for invalid chars
127 |         // TODO test toBytes32 too (it will be non-empty here)
128 |         assertEq(iter.unsafeNext().toString(), string(bytes(hex"00")));
129 |         assertEq(iter.asStr().toString(), string(bytes(hex"FF80")));
130 |         assertEq(iter.unsafeNext().toString(), "");
131 |         assertEq(iter.asStr().toString(), string(bytes(hex"80")));
132 |         assertEq(iter.unsafeNext().toString(), "");
133 |         assertEq(iter.asStr().toString(), "");
134 |     }
135 | }


--------------------------------------------------------------------------------
/test/SliceAssertions.t.sol:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: MIT
  2 | 
  3 | pragma solidity ^0.8.17;
  4 | 
  5 | import { PRBTest } from "@prb/test/src/PRBTest.sol";
  6 | import { SliceAssertions } from "../src/test/SliceAssertions.sol";
  7 | 
  8 | import { Slice, toSlice } from "../src/Slice.sol";
  9 | 
 10 | using { toSlice } for bytes;
 11 | 
 12 | contract SliceAssertionsTest is PRBTest, SliceAssertions {
 13 |     // 100 bytes
 14 |     bytes constant LOREM_IPSUM = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore.";
 15 | 
 16 |     /// @dev simple byte-by-byte comparison to test more complicated comparisons
 17 |     function naiveCmp(bytes memory b1, bytes memory b2) internal pure returns (int256) {
 18 |         uint256 shortest = b1.length < b2.length ? b1.length : b2.length;
 19 |         for (uint256 i; i < shortest; i++) {
 20 |             if (b1[i] < b2[i]) {
 21 |                 return -1;
 22 |             } else if (b1[i] > b2[i]) {
 23 |                 return 1;
 24 |             }
 25 |         }
 26 |         if (b1.length < b2.length) {
 27 |             return -1;
 28 |         } else if (b1.length > b2.length) {
 29 |             return 1;
 30 |         } else {
 31 |             return 0;
 32 |         }
 33 |     }
 34 | 
 35 |     /// @dev split calldata bytes in half
 36 |     function b1b2(bytes calldata b) internal pure returns (bytes memory b1, bytes memory b2) {
 37 |         b1 = b[:b.length / 2];
 38 |         // b2 can be 1 byte longer sometimes
 39 |         b2 = b[b.length / 2:];
 40 | 
 41 |         // this is useful to test a special case of initially similar sequences
 42 |         // TODO fix self-referential pseudorandomness
 43 |         uint256 random = uint256(keccak256(abi.encode(b, "randomlyAddPrefix"))) % 4;
 44 |         if (random == 1) {
 45 |             // prefix
 46 |             b1 = abi.encodePacked(LOREM_IPSUM, b1);
 47 |             b2 = abi.encodePacked(LOREM_IPSUM, b2);
 48 |         } else if (random == 2) {
 49 |             // suffix
 50 |             b1 = abi.encodePacked(b1, LOREM_IPSUM);
 51 |             b2 = abi.encodePacked(b2, LOREM_IPSUM);
 52 |         } else if (random == 3) {
 53 |             // prefix and suffix
 54 |             b1 = abi.encodePacked(LOREM_IPSUM, b1, LOREM_IPSUM);
 55 |             b2 = abi.encodePacked(LOREM_IPSUM, b2, LOREM_IPSUM);
 56 |         }
 57 |     }
 58 | 
 59 |     function testNaiveCmp() public {
 60 |         assertEq(naiveCmp("1", "0"),   1);
 61 |         assertEq(naiveCmp("1", "1"),   0);
 62 |         assertEq(naiveCmp("0", "1"),  -1);
 63 |         assertEq(naiveCmp("1", ""),    1);
 64 |         assertEq(naiveCmp("", ""),     0);
 65 |         assertEq(naiveCmp("", "1"),   -1);
 66 |         assertEq(naiveCmp("12", "1"),  1);
 67 |         assertEq(naiveCmp("1", "12"), -1);
 68 |     }
 69 | 
 70 |     /*//////////////////////////////////////////////////////////////////////////
 71 |                                         EQUALITY
 72 |     //////////////////////////////////////////////////////////////////////////*/
 73 | 
 74 |     function testEq(bytes memory b) public {
 75 |         // compare new assertions
 76 |         assertEq(b.toSlice(), b.toSlice());
 77 |         assertEq(b.toSlice(), b);
 78 |         assertEq(b, b.toSlice());
 79 | 
 80 |         assertLte(b.toSlice(), b.toSlice());
 81 |         assertLte(b.toSlice(), b);
 82 |         assertLte(b, b.toSlice());
 83 | 
 84 |         assertGte(b.toSlice(), b.toSlice());
 85 |         assertGte(b.toSlice(), b);
 86 |         assertGte(b, b.toSlice());
 87 |         // to the existing ones
 88 |         assertEq(b.toSlice().toBytes(), b.toSlice().toBytes());
 89 |         assertEq(b.toSlice().toBytes(), b);
 90 |         assertEq(b, b.toSlice().toBytes());
 91 |     }
 92 | 
 93 |     function testFailEq(bytes calldata _b) public {
 94 |         (bytes memory b1, bytes memory b2) = b1b2(_b);
 95 |         vm.assume(keccak256(b1) != keccak256(b2));
 96 |         assertEq(b1.toSlice(), b2.toSlice());
 97 |     }
 98 | 
 99 |     function testNotEq(bytes calldata _b) public {
100 |         (bytes memory b1, bytes memory b2) = b1b2(_b);
101 |         vm.assume(keccak256(b1) != keccak256(b2));
102 |         // compare new assertions
103 |         assertNotEq(b1.toSlice(), b2.toSlice());
104 |         assertNotEq(b1.toSlice(), b2);
105 |         assertNotEq(b1, b2.toSlice());
106 |         // to the existing ones
107 |         assertNotEq(b1.toSlice().toBytes(), b2.toSlice().toBytes());
108 |         assertNotEq(b1.toSlice().toBytes(), b2);
109 |         assertNotEq(b1, b2.toSlice().toBytes());
110 |     }
111 | 
112 |     function testFailNotEq(bytes memory b) public {
113 |         assertNotEq(b.toSlice(), b.toSlice());
114 |     }
115 | 
116 |     /*//////////////////////////////////////////////////////////////////////////
117 |                                     LESS-THAN
118 |     //////////////////////////////////////////////////////////////////////////*/
119 | 
120 |     function testLt(bytes calldata _b) public {
121 |         (bytes memory b1, bytes memory b2) = b1b2(_b);
122 |         vm.assume(naiveCmp(b1, b2) < 0);
123 | 
124 |         assertLt(b1.toSlice(), b2.toSlice());
125 |         assertLt(b1.toSlice(), b2);
126 |         assertLt(b1, b2.toSlice());
127 |         assertLt(b1, b2);
128 | 
129 |         assertLte(b1.toSlice(), b2.toSlice());
130 |         assertLte(b1.toSlice(), b2);
131 |         assertLte(b1, b2.toSlice());
132 |         assertLte(b1, b2);
133 |     }
134 | 
135 |     function testFailLt(bytes calldata _b) public {
136 |         (bytes memory b1, bytes memory b2) = b1b2(_b);
137 |         vm.assume(naiveCmp(b1, b2) > 0);
138 | 
139 |         assertLt(b1.toSlice(), b2.toSlice());
140 |     }
141 | 
142 |     function testFailLt__ForEq(bytes memory b) public {
143 |         assertLt(b.toSlice(), b.toSlice());
144 |     }
145 | 
146 |     function testFailLte(bytes calldata _b) public {
147 |         (bytes memory b1, bytes memory b2) = b1b2(_b);
148 |         vm.assume(naiveCmp(b1, b2) > 0);
149 | 
150 |         assertLte(b1.toSlice(), b2.toSlice());
151 |     }
152 | 
153 |     /*//////////////////////////////////////////////////////////////////////////
154 |                                     GREATER-THAN
155 |     //////////////////////////////////////////////////////////////////////////*/
156 | 
157 |     function testGt(bytes calldata _b) public {
158 |         (bytes memory b1, bytes memory b2) = b1b2(_b);
159 |         vm.assume(naiveCmp(b1, b2) > 0);
160 | 
161 |         assertGt(b1.toSlice(), b2.toSlice());
162 |         assertGt(b1.toSlice(), b2);
163 |         assertGt(b1, b2.toSlice());
164 |         assertGt(b1, b2);
165 | 
166 |         assertGte(b1.toSlice(), b2.toSlice());
167 |         assertGte(b1.toSlice(), b2);
168 |         assertGte(b1, b2.toSlice());
169 |         assertGte(b1, b2);
170 |     }
171 | 
172 |     function testFailGt(bytes calldata _b) public {
173 |         (bytes memory b1, bytes memory b2) = b1b2(_b);
174 |         vm.assume(naiveCmp(b1, b2) < 0);
175 | 
176 |         assertGt(b1.toSlice(), b2.toSlice());
177 |     }
178 | 
179 |     function testFailGt__ForEq(bytes memory b) public {
180 |         assertGt(b.toSlice(), b.toSlice());
181 |     }
182 | 
183 |     function testFailGte(bytes calldata _b) public {
184 |         (bytes memory b1, bytes memory b2) = b1b2(_b);
185 |         vm.assume(naiveCmp(b1, b2) < 0);
186 | 
187 |         assertGte(b1.toSlice(), b2.toSlice());
188 |     }
189 | 
190 |     /*//////////////////////////////////////////////////////////////////////////
191 |                                     CONTAINS
192 |     //////////////////////////////////////////////////////////////////////////*/
193 | 
194 |     function testContains(bytes calldata _b) public {
195 |         bytes memory b1 = _b;
196 |         bytes memory b2 = _b[_b.length / 3 : _b.length * 2 / 3];
197 | 
198 |         assertContains(b1.toSlice(), b2.toSlice());
199 |         assertContains(b1.toSlice(), b2);
200 |         assertContains(b1, b2.toSlice());
201 |         assertContains(b1, b2);
202 |     }
203 | 
204 |     function testFailContains(bytes calldata _b) public {
205 |         bytes memory b1 = _b;
206 |         bytes memory b2 = _b;
207 |         // change 1 byte
208 |         b2[0] = bytes1(uint8(b2[0]) ^ uint8(0x01));
209 | 
210 |         assertContains(b1.toSlice(), b2.toSlice());
211 |     }
212 | 
213 |     function testFailContains__1Byte(bytes calldata _b) public {
214 |         bytes1 pat = bytes1(keccak256(abi.encode(_b, "1Byte")));
215 | 
216 |         bytes memory b1 = _b;
217 |         bytes memory b2 = new bytes(1);
218 |         b2[0] = pat;
219 |         // replace all pat
220 |         for (uint256 i; i < b1.length; i++) {
221 |             if (b1[i] == pat) {
222 |                 b1[i] = ~pat;
223 |             }
224 |         }
225 | 
226 |         assertContains(b1.toSlice(), b2.toSlice());
227 |     }
228 | }


--------------------------------------------------------------------------------
/src/test/SliceAssertions.sol:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: MIT
  2 | 
  3 | pragma solidity ^0.8.17;
  4 | 
  5 | import { PRBTest } from "@prb/test/src/PRBTest.sol";
  6 | 
  7 | import { Slice, toSlice } from "../Slice.sol";
  8 | 
  9 | using { toSlice } for bytes;
 10 | 
 11 | /// @title Extension to PRBTest with Slice assertions.
 12 | /// @dev Also provides lt,lte,gt,gte,contains for 2 native `bytes`.
 13 | contract SliceAssertions is PRBTest {
 14 |     // Eq
 15 | 
 16 |     function assertEq(Slice a, Slice b) internal {
 17 |         assertEq(a.toBytes(), b.toBytes());
 18 |     }
 19 | 
 20 |     function assertEq(Slice a, Slice b, string memory err) internal {
 21 |         assertEq(a.toBytes(), b.toBytes(), err);
 22 |     }
 23 | 
 24 |     function assertEq(Slice a, bytes memory b) internal {
 25 |         assertEq(a.toBytes(), b);
 26 |     }
 27 | 
 28 |     function assertEq(Slice a, bytes memory b, string memory err) internal {
 29 |         assertEq(a.toBytes(), b, err);
 30 |     }
 31 | 
 32 |     function assertEq(bytes memory a, Slice b) internal {
 33 |         assertEq(a, b.toBytes());
 34 |     }
 35 | 
 36 |     function assertEq(bytes memory a, Slice b, string memory err) internal {
 37 |         assertEq(a, b.toBytes(), err);
 38 |     }
 39 | 
 40 |     // NotEq
 41 | 
 42 |     function assertNotEq(Slice a, Slice b) internal {
 43 |         assertNotEq(a.toBytes(), b.toBytes());
 44 |     }
 45 | 
 46 |     function assertNotEq(Slice a, Slice b, string memory err) internal {
 47 |         assertNotEq(a.toBytes(), b.toBytes(), err);
 48 |     }
 49 | 
 50 |     function assertNotEq(Slice a, bytes memory b) internal {
 51 |         assertNotEq(a.toBytes(), b);
 52 |     }
 53 | 
 54 |     function assertNotEq(Slice a, bytes memory b, string memory err) internal {
 55 |         assertNotEq(a.toBytes(), b, err);
 56 |     }
 57 | 
 58 |     function assertNotEq(bytes memory a, Slice b) internal {
 59 |         assertNotEq(a, b.toBytes());
 60 |     }
 61 | 
 62 |     function assertNotEq(bytes memory a, Slice b, string memory err) internal {
 63 |         assertNotEq(a, b.toBytes(), err);
 64 |     }
 65 | 
 66 |     // Lt
 67 | 
 68 |     function assertLt(Slice a, Slice b) internal virtual {
 69 |         if (!a.lt(b)) {
 70 |             emit Log("Error: a < b not satisfied [bytes]");
 71 |             emit LogNamedBytes("  Value a", a.toBytes());
 72 |             emit LogNamedBytes("  Value b", a.toBytes());
 73 |             fail();
 74 |         }
 75 |     }
 76 | 
 77 |     function assertLt(Slice a, Slice b, string memory err) internal virtual {
 78 |         if (!a.lt(b)) {
 79 |             emit LogNamedString("Error", err);
 80 |             assertLt(a, b);
 81 |         }
 82 |     }
 83 | 
 84 |     function assertLt(Slice a, bytes memory b) internal virtual {
 85 |         assertLt(a, b.toSlice());
 86 |     }
 87 | 
 88 |     function assertLt(Slice a, bytes memory b, string memory err) internal virtual {
 89 |         assertLt(a, b.toSlice(), err);
 90 |     }
 91 | 
 92 |     function assertLt(bytes memory a, Slice b) internal virtual {
 93 |         assertLt(a.toSlice(), b);
 94 |     }
 95 | 
 96 |     function assertLt(bytes memory a, Slice b, string memory err) internal virtual {
 97 |         assertLt(a.toSlice(), b, err);
 98 |     }
 99 | 
100 |     function assertLt(bytes memory a, bytes memory b) internal virtual {
101 |         assertLt(a.toSlice(), b.toSlice());
102 |     }
103 | 
104 |     function assertLt(bytes memory a, bytes memory b, string memory err) internal virtual {
105 |         assertLt(a.toSlice(), b.toSlice(), err);
106 |     }
107 | 
108 |     // Lte
109 | 
110 |     function assertLte(Slice a, Slice b) internal virtual {
111 |         if (!a.lte(b)) {
112 |             emit Log("Error: a <= b not satisfied [bytes]");
113 |             emit LogNamedBytes("  Value a", a.toBytes());
114 |             emit LogNamedBytes("  Value b", a.toBytes());
115 |             fail();
116 |         }
117 |     }
118 | 
119 |     function assertLte(Slice a, Slice b, string memory err) internal virtual {
120 |         if (!a.lte(b)) {
121 |             emit LogNamedString("Error", err);
122 |             assertLte(a, b);
123 |         }
124 |     }
125 | 
126 |     function assertLte(Slice a, bytes memory b) internal virtual {
127 |         assertLte(a, b.toSlice());
128 |     }
129 | 
130 |     function assertLte(Slice a, bytes memory b, string memory err) internal virtual {
131 |         assertLte(a, b.toSlice(), err);
132 |     }
133 | 
134 |     function assertLte(bytes memory a, Slice b) internal virtual {
135 |         assertLte(a.toSlice(), b);
136 |     }
137 | 
138 |     function assertLte(bytes memory a, Slice b, string memory err) internal virtual {
139 |         assertLte(a.toSlice(), b, err);
140 |     }
141 | 
142 |     function assertLte(bytes memory a, bytes memory b) internal virtual {
143 |         assertLte(a.toSlice(), b.toSlice());
144 |     }
145 | 
146 |     function assertLte(bytes memory a, bytes memory b, string memory err) internal virtual {
147 |         assertLte(a.toSlice(), b.toSlice(), err);
148 |     }
149 | 
150 |     // Gt
151 | 
152 |     function assertGt(Slice a, Slice b) internal virtual {
153 |         if (!a.gt(b)) {
154 |             emit Log("Error: a > b not satisfied [bytes]");
155 |             emit LogNamedBytes("  Value a", a.toBytes());
156 |             emit LogNamedBytes("  Value b", a.toBytes());
157 |             fail();
158 |         }
159 |     }
160 | 
161 |     function assertGt(Slice a, Slice b, string memory err) internal virtual {
162 |         if (!a.gt(b)) {
163 |             emit LogNamedString("Error", err);
164 |             assertGt(a, b);
165 |         }
166 |     }
167 | 
168 |     function assertGt(Slice a, bytes memory b) internal virtual {
169 |         assertGt(a, b.toSlice());
170 |     }
171 | 
172 |     function assertGt(Slice a, bytes memory b, string memory err) internal virtual {
173 |         assertGt(a, b.toSlice(), err);
174 |     }
175 | 
176 |     function assertGt(bytes memory a, Slice b) internal virtual {
177 |         assertGt(a.toSlice(), b);
178 |     }
179 | 
180 |     function assertGt(bytes memory a, Slice b, string memory err) internal virtual {
181 |         assertGt(a.toSlice(), b, err);
182 |     }
183 | 
184 |     function assertGt(bytes memory a, bytes memory b) internal virtual {
185 |         assertGt(a.toSlice(), b.toSlice());
186 |     }
187 | 
188 |     function assertGt(bytes memory a, bytes memory b, string memory err) internal virtual {
189 |         assertGt(a.toSlice(), b.toSlice(), err);
190 |     }
191 | 
192 |     // Gte
193 | 
194 |     function assertGte(Slice a, Slice b) internal virtual {
195 |         if (!a.gte(b)) {
196 |             emit Log("Error: a >= b not satisfied [bytes]");
197 |             emit LogNamedBytes("  Value a", a.toBytes());
198 |             emit LogNamedBytes("  Value b", a.toBytes());
199 |             fail();
200 |         }
201 |     }
202 | 
203 |     function assertGte(Slice a, Slice b, string memory err) internal virtual {
204 |         if (!a.gte(b)) {
205 |             emit LogNamedString("Error", err);
206 |             assertGte(a, b);
207 |         }
208 |     }
209 | 
210 |     function assertGte(Slice a, bytes memory b) internal virtual {
211 |         assertGte(a, b.toSlice());
212 |     }
213 | 
214 |     function assertGte(Slice a, bytes memory b, string memory err) internal virtual {
215 |         assertGte(a, b.toSlice(), err);
216 |     }
217 | 
218 |     function assertGte(bytes memory a, Slice b) internal virtual {
219 |         assertGte(a.toSlice(), b);
220 |     }
221 | 
222 |     function assertGte(bytes memory a, Slice b, string memory err) internal virtual {
223 |         assertGte(a.toSlice(), b, err);
224 |     }
225 | 
226 |     function assertGte(bytes memory a, bytes memory b) internal virtual {
227 |         assertGte(a.toSlice(), b.toSlice());
228 |     }
229 | 
230 |     function assertGte(bytes memory a, bytes memory b, string memory err) internal virtual {
231 |         assertGte(a.toSlice(), b.toSlice(), err);
232 |     }
233 | 
234 |     // Contains
235 | 
236 |     function assertContains(Slice a, Slice b) internal virtual {
237 |         if (!a.contains(b)) {
238 |             emit Log("Error: a does not contain b [bytes]");
239 |             emit LogNamedBytes("  Bytes a", a.toBytes());
240 |             emit LogNamedBytes("  Bytes b", b.toBytes());
241 |             fail();
242 |         }
243 |     }
244 | 
245 |     function assertContains(Slice a, Slice b, string memory err) internal virtual {
246 |         if (!a.contains(b)) {
247 |             emit LogNamedString("Error", err);
248 |             assertContains(a, b);
249 |         }
250 |     }
251 | 
252 |     function assertContains(Slice a, bytes memory b) internal virtual {
253 |         assertContains(a, b.toSlice());
254 |     }
255 | 
256 |     function assertContains(Slice a, bytes memory b, string memory err) internal virtual {
257 |         assertContains(a, b.toSlice(), err);
258 |     }
259 | 
260 |     function assertContains(bytes memory a, Slice b) internal virtual {
261 |         assertContains(a.toSlice(), b);
262 |     }
263 | 
264 |     function assertContains(bytes memory a, Slice b, string memory err) internal virtual {
265 |         assertContains(a.toSlice(), b, err);
266 |     }
267 | 
268 |     function assertContains(bytes memory a, bytes memory b) internal virtual {
269 |         assertContains(a.toSlice(), b.toSlice());
270 |     }
271 | 
272 |     function assertContains(bytes memory a, bytes memory b, string memory err) internal virtual {
273 |         assertContains(a.toSlice(), b.toSlice(), err);
274 |     }
275 | }


--------------------------------------------------------------------------------
/src/test/StrSliceAssertions.sol:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: MIT
  2 | 
  3 | pragma solidity ^0.8.17;
  4 | 
  5 | import { PRBTest } from "@prb/test/src/PRBTest.sol";
  6 | 
  7 | import { StrSlice, toSlice } from "../StrSlice.sol";
  8 | 
  9 | using { toSlice } for string;
 10 | 
 11 | /// @title Extension to PRBTest with StrSlice assertions.
 12 | /// @dev Also provides lt,lte,gt,gte,contains for 2 native `string`.
 13 | contract StrSliceAssertions is PRBTest {
 14 |     // Eq
 15 | 
 16 |     function assertEq(StrSlice a, StrSlice b) internal {
 17 |         assertEq(a.toString(), b.toString());
 18 |     }
 19 | 
 20 |     function assertEq(StrSlice a, StrSlice b, string memory err) internal {
 21 |         assertEq(a.toString(), b.toString(), err);
 22 |     }
 23 | 
 24 |     function assertEq(StrSlice a, string memory b) internal {
 25 |         assertEq(a.toString(), b);
 26 |     }
 27 | 
 28 |     function assertEq(StrSlice a, string memory b, string memory err) internal {
 29 |         assertEq(a.toString(), b, err);
 30 |     }
 31 | 
 32 |     function assertEq(string memory a, StrSlice b) internal {
 33 |         assertEq(a, b.toString());
 34 |     }
 35 | 
 36 |     function assertEq(string memory a, StrSlice b, string memory err) internal {
 37 |         assertEq(a, b.toString(), err);
 38 |     }
 39 | 
 40 |     // NotEq
 41 | 
 42 |     function assertNotEq(StrSlice a, StrSlice b) internal {
 43 |         assertNotEq(a.toString(), b.toString());
 44 |     }
 45 | 
 46 |     function assertNotEq(StrSlice a, StrSlice b, string memory err) internal {
 47 |         assertNotEq(a.toString(), b.toString(), err);
 48 |     }
 49 | 
 50 |     function assertNotEq(StrSlice a, string memory b) internal {
 51 |         assertNotEq(a.toString(), b);
 52 |     }
 53 | 
 54 |     function assertNotEq(StrSlice a, string memory b, string memory err) internal {
 55 |         assertNotEq(a.toString(), b, err);
 56 |     }
 57 | 
 58 |     function assertNotEq(string memory a, StrSlice b) internal {
 59 |         assertNotEq(a, b.toString());
 60 |     }
 61 | 
 62 |     function assertNotEq(string memory a, StrSlice b, string memory err) internal {
 63 |         assertNotEq(a, b.toString(), err);
 64 |     }
 65 | 
 66 |     // Lt
 67 | 
 68 |     function assertLt(StrSlice a, StrSlice b) internal virtual {
 69 |         if (!a.lt(b)) {
 70 |             emit Log("Error: a < b not satisfied [string]");
 71 |             emit LogNamedString("  Value a", a.toString());
 72 |             emit LogNamedString("  Value b", a.toString());
 73 |             fail();
 74 |         }
 75 |     }
 76 | 
 77 |     function assertLt(StrSlice a, StrSlice b, string memory err) internal virtual {
 78 |         if (!a.lt(b)) {
 79 |             emit LogNamedString("Error", err);
 80 |             assertLt(a, b);
 81 |         }
 82 |     }
 83 | 
 84 |     function assertLt(StrSlice a, string memory b) internal virtual {
 85 |         assertLt(a, b.toSlice());
 86 |     }
 87 | 
 88 |     function assertLt(StrSlice a, string memory b, string memory err) internal virtual {
 89 |         assertLt(a, b.toSlice(), err);
 90 |     }
 91 | 
 92 |     function assertLt(string memory a, StrSlice b) internal virtual {
 93 |         assertLt(a.toSlice(), b);
 94 |     }
 95 | 
 96 |     function assertLt(string memory a, StrSlice b, string memory err) internal virtual {
 97 |         assertLt(a.toSlice(), b, err);
 98 |     }
 99 | 
100 |     function assertLt(string memory a, string memory b) internal virtual {
101 |         assertLt(a.toSlice(), b.toSlice());
102 |     }
103 | 
104 |     function assertLt(string memory a, string memory b, string memory err) internal virtual {
105 |         assertLt(a.toSlice(), b.toSlice(), err);
106 |     }
107 | 
108 |     // Lte
109 | 
110 |     function assertLte(StrSlice a, StrSlice b) internal virtual {
111 |         if (!a.lte(b)) {
112 |             emit Log("Error: a <= b not satisfied [string]");
113 |             emit LogNamedString("  Value a", a.toString());
114 |             emit LogNamedString("  Value b", a.toString());
115 |             fail();
116 |         }
117 |     }
118 | 
119 |     function assertLte(StrSlice a, StrSlice b, string memory err) internal virtual {
120 |         if (!a.lte(b)) {
121 |             emit LogNamedString("Error", err);
122 |             assertLte(a, b);
123 |         }
124 |     }
125 | 
126 |     function assertLte(StrSlice a, string memory b) internal virtual {
127 |         assertLte(a, b.toSlice());
128 |     }
129 | 
130 |     function assertLte(StrSlice a, string memory b, string memory err) internal virtual {
131 |         assertLte(a, b.toSlice(), err);
132 |     }
133 | 
134 |     function assertLte(string memory a, StrSlice b) internal virtual {
135 |         assertLte(a.toSlice(), b);
136 |     }
137 | 
138 |     function assertLte(string memory a, StrSlice b, string memory err) internal virtual {
139 |         assertLte(a.toSlice(), b, err);
140 |     }
141 | 
142 |     function assertLte(string memory a, string memory b) internal virtual {
143 |         assertLte(a.toSlice(), b.toSlice());
144 |     }
145 | 
146 |     function assertLte(string memory a, string memory b, string memory err) internal virtual {
147 |         assertLte(a.toSlice(), b.toSlice(), err);
148 |     }
149 | 
150 |     // Gt
151 | 
152 |     function assertGt(StrSlice a, StrSlice b) internal virtual {
153 |         if (!a.gt(b)) {
154 |             emit Log("Error: a > b not satisfied [string]");
155 |             emit LogNamedString("  Value a", a.toString());
156 |             emit LogNamedString("  Value b", a.toString());
157 |             fail();
158 |         }
159 |     }
160 | 
161 |     function assertGt(StrSlice a, StrSlice b, string memory err) internal virtual {
162 |         if (!a.gt(b)) {
163 |             emit LogNamedString("Error", err);
164 |             assertGt(a, b);
165 |         }
166 |     }
167 | 
168 |     function assertGt(StrSlice a, string memory b) internal virtual {
169 |         assertGt(a, b.toSlice());
170 |     }
171 | 
172 |     function assertGt(StrSlice a, string memory b, string memory err) internal virtual {
173 |         assertGt(a, b.toSlice(), err);
174 |     }
175 | 
176 |     function assertGt(string memory a, StrSlice b) internal virtual {
177 |         assertGt(a.toSlice(), b);
178 |     }
179 | 
180 |     function assertGt(string memory a, StrSlice b, string memory err) internal virtual {
181 |         assertGt(a.toSlice(), b, err);
182 |     }
183 | 
184 |     function assertGt(string memory a, string memory b) internal virtual {
185 |         assertGt(a.toSlice(), b.toSlice());
186 |     }
187 | 
188 |     function assertGt(string memory a, string memory b, string memory err) internal virtual {
189 |         assertGt(a.toSlice(), b.toSlice(), err);
190 |     }
191 | 
192 |     // Gte
193 | 
194 |     function assertGte(StrSlice a, StrSlice b) internal virtual {
195 |         if (!a.gte(b)) {
196 |             emit Log("Error: a >= b not satisfied [string]");
197 |             emit LogNamedString("  Value a", a.toString());
198 |             emit LogNamedString("  Value b", a.toString());
199 |             fail();
200 |         }
201 |     }
202 | 
203 |     function assertGte(StrSlice a, StrSlice b, string memory err) internal virtual {
204 |         if (!a.gte(b)) {
205 |             emit LogNamedString("Error", err);
206 |             assertGte(a, b);
207 |         }
208 |     }
209 | 
210 |     function assertGte(StrSlice a, string memory b) internal virtual {
211 |         assertGte(a, b.toSlice());
212 |     }
213 | 
214 |     function assertGte(StrSlice a, string memory b, string memory err) internal virtual {
215 |         assertGte(a, b.toSlice(), err);
216 |     }
217 | 
218 |     function assertGte(string memory a, StrSlice b) internal virtual {
219 |         assertGte(a.toSlice(), b);
220 |     }
221 | 
222 |     function assertGte(string memory a, StrSlice b, string memory err) internal virtual {
223 |         assertGte(a.toSlice(), b, err);
224 |     }
225 | 
226 |     function assertGte(string memory a, string memory b) internal virtual {
227 |         assertGte(a.toSlice(), b.toSlice());
228 |     }
229 | 
230 |     function assertGte(string memory a, string memory b, string memory err) internal virtual {
231 |         assertGte(a.toSlice(), b.toSlice(), err);
232 |     }
233 | 
234 |     // Contains
235 | 
236 |     function assertContains(StrSlice a, StrSlice b) internal virtual {
237 |         if (!a.contains(b)) {
238 |             emit Log("Error: a does not contain b [string]");
239 |             emit LogNamedString("  String a", a.toString());
240 |             emit LogNamedString("  String b", b.toString());
241 |             fail();
242 |         }
243 |     }
244 | 
245 |     function assertContains(StrSlice a, StrSlice b, string memory err) internal virtual {
246 |         if (!a.contains(b)) {
247 |             emit LogNamedString("Error", err);
248 |             assertContains(a, b);
249 |         }
250 |     }
251 | 
252 |     function assertContains(StrSlice a, string memory b) internal virtual {
253 |         assertContains(a, b.toSlice());
254 |     }
255 | 
256 |     function assertContains(StrSlice a, string memory b, string memory err) internal virtual {
257 |         assertContains(a, b.toSlice(), err);
258 |     }
259 | 
260 |     function assertContains(string memory a, StrSlice b) internal virtual {
261 |         assertContains(a.toSlice(), b);
262 |     }
263 | 
264 |     function assertContains(string memory a, StrSlice b, string memory err) internal virtual {
265 |         assertContains(a.toSlice(), b, err);
266 |     }
267 | 
268 |     function assertContains(string memory a, string memory b) internal virtual {
269 |         assertContains(a.toSlice(), b.toSlice());
270 |     }
271 | 
272 |     function assertContains(string memory a, string memory b, string memory err) internal virtual {
273 |         assertContains(a.toSlice(), b.toSlice(), err);
274 |     }
275 | }


--------------------------------------------------------------------------------
/src/utils/memchr.sol:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: MIT
  2 | 
  3 | pragma solidity ^0.8.17;
  4 | 
  5 | /*
  6 |  * These functions are VERY DANGEROUS!
  7 |  * They operate directly on memory pointers, use with caution.
  8 |  *
  9 |  * Assembly here is marked as memory-safe for optimization.
 10 |  * The caller MUST use pointers in a memory-safe way!
 11 |  * https://docs.soliditylang.org/en/latest/assembly.html#memory-safety
 12 |  *
 13 |  * Loosely based on https://doc.rust-lang.org/1.65.0/core/slice/memchr/
 14 |  */
 15 | 
 16 | /**
 17 |  * @dev Returns the first index matching the byte `x` in text;
 18 |  * or type(uint256).max if not found.
 19 |  */
 20 | function memchr(uint256 ptrText, uint256 lenText, uint8 x) pure returns (uint256 index) {
 21 |     if (lenText <= 32) {
 22 |         // Fast path for small slices.
 23 |         return memchrWord(ptrText, lenText, x);
 24 |     }
 25 | 
 26 |     uint256 ptrStart = ptrText;
 27 |     uint256 lenTail;
 28 |     uint256 ptrEnd;
 29 |     // safe because lenTail <= lenText (ptr+len is implicitly safe)
 30 |     unchecked {
 31 |         // (unchecked % saves a little gas)
 32 |         lenTail = lenText % 32;
 33 |         ptrEnd = ptrText + (lenText - lenTail);
 34 |     }
 35 |     uint256 repeatedX = repeatByte(x);
 36 |     while (ptrText < ptrEnd) {
 37 |         // any bytes equal to `x` become zeros
 38 |         // (this helps find `x` faster, values of non-zero bytes don't matter)
 39 |         uint256 chunkXZero;
 40 |         /// @solidity memory-safe-assembly
 41 |         assembly {
 42 |             chunkXZero := xor(mload(ptrText), repeatedX)
 43 |         }
 44 |         // break if there is a matching byte
 45 |         if (nonZeroIfXcontainsZeroByte(chunkXZero) != 0) {
 46 |             // - is safe because ptrText >= ptrStart (ptrText = ptrStart + 32*n)
 47 |             // + is safe because index + offsetLen < lenText
 48 |             // (ptr+len is implicitly safe)
 49 |             unchecked {
 50 |                 return
 51 |                     // index
 52 |                     memchrWord(ptrText, 32, x)
 53 |                     // + offsetLen
 54 |                     + (ptrText - ptrStart);
 55 |             }
 56 |         }
 57 | 
 58 |         // safe because ptrText < ptrEnd, and ptrEnd = ptrText + n*32 (see lenTail)
 59 |         unchecked {
 60 |             ptrText += 32;
 61 |         }
 62 |     }
 63 | 
 64 |     if (lenTail == 0) return type(uint256).max;
 65 | 
 66 |     index = memchrWord(ptrEnd, lenTail, x);
 67 |     if (index == type(uint256).max) {
 68 |         return type(uint256).max;
 69 |     } else {
 70 |         // - is safe because ptrEnd >= ptrStart (ptrEnd = ptrStart + lenText - lenTail)
 71 |         // + is safe because index + offsetLen < lenText
 72 |         // (ptr+len is implicitly safe)
 73 |         unchecked {
 74 |             return index
 75 |                 // + offsetLen
 76 |                 + (ptrEnd - ptrStart);
 77 |         }
 78 |     }
 79 | }
 80 | 
 81 | /**
 82 |  * @dev Returns the last index matching the byte `x` in text;
 83 |  * or type(uint256).max if not found.
 84 |  */
 85 | function memrchr(uint256 ptrText, uint256 lenText, uint8 x) pure returns (uint256) {
 86 |     if (lenText <= 32) {
 87 |         // Fast path for small slices.
 88 |         return memrchrWord(ptrText, lenText, x);
 89 |     }
 90 | 
 91 |     uint256 lenTail;
 92 |     uint256 offsetPtr;
 93 |     // safe because pointers are guaranteed to be valid by the caller
 94 |     unchecked {
 95 |         // (unchecked % saves a little gas)
 96 |         lenTail = lenText % 32;
 97 |         offsetPtr = ptrText + lenText;
 98 |     }
 99 | 
100 |     if (lenTail != 0) {
101 |         // remove tail length
102 |         // - is safe because lenTail <= lenText <= offsetPtr
103 |         unchecked {
104 |             offsetPtr -= lenTail;
105 |         }
106 |         // return if there is a matching byte
107 |         uint256 index = memrchrWord(offsetPtr, lenTail, x);
108 |         if (index != type(uint256).max) {
109 |             // - is safe because offsetPtr > ptrText (offsetPtr = ptrText + lenText - lenTail)
110 |             // + is safe because index + offsetLen < lenText
111 |             unchecked {
112 |                 return index
113 |                     // + offsetLen
114 |                     + (offsetPtr - ptrText);
115 |             }
116 |         }
117 |     }
118 | 
119 |     uint256 repeatedX = repeatByte(x);
120 |     while (offsetPtr > ptrText) {
121 |         // - is safe because 32 <= lenText <= offsetPtr
122 |         unchecked {
123 |             offsetPtr -= 32;
124 |         }
125 | 
126 |         // any bytes equal to `x` become zeros
127 |         // (this helps find `x` faster, values of non-zero bytes don't matter)
128 |         uint256 chunkXZero;
129 |         /// @solidity memory-safe-assembly
130 |         assembly {
131 |             chunkXZero := xor(mload(offsetPtr), repeatedX)
132 |         }
133 |         // break if there is a matching byte
134 |         if (nonZeroIfXcontainsZeroByte(chunkXZero) != 0) {
135 |             // - is safe because offsetPtr > ptrText (see the while condition)
136 |             // + is safe because index + offsetLen < lenText
137 |             unchecked {
138 |                 return
139 |                     // index
140 |                     memrchrWord(offsetPtr, 32, x)
141 |                     // + offsetLen
142 |                     + (offsetPtr - ptrText);
143 |             }
144 |         }
145 |     }
146 |     // not found
147 |     return type(uint256).max;
148 | }
149 | 
150 | /**
151 |  * @dev Returns the first index matching the byte `x` in text;
152 |  * or type(uint256).max if not found.
153 |  * 
154 |  * WARNING: it works ONLY for length 32 or less.
155 |  * This is for use by memchr after its chunk search.
156 |  */
157 | function memchrWord(uint256 ptrText, uint256 lenText, uint8 x) pure returns (uint256) {
158 |     uint256 chunk;
159 |     /// @solidity memory-safe-assembly
160 |     assembly {
161 |         chunk := mload(ptrText)
162 |     }
163 | 
164 |     uint256 i;
165 |     if (lenText > 32) {
166 |         lenText = 32;
167 |     }
168 | 
169 |     ////////binary search start
170 |     // Some manual binary searches, cost ~50gas, could save up to ~1500
171 |     // (comment them out and the function will work fine)
172 |     if (lenText >= 16 + 2) {
173 |         uint256 repeatedX = chunk ^ repeatByte(x);
174 | 
175 |         if (nonZeroIfXcontainsZeroByte(repeatedX | type(uint128).max) == 0) {
176 |             i = 16;
177 | 
178 |             if (lenText >= 24 + 2) {
179 |                 if (nonZeroIfXcontainsZeroByte(repeatedX | type(uint64).max) == 0) {
180 |                     i = 24;
181 |                 }
182 |             }
183 |         } else if (nonZeroIfXcontainsZeroByte(repeatedX | type(uint192).max) == 0) {
184 |             i = 8;
185 |         }
186 |     } else if (lenText >= 8 + 2) {
187 |         uint256 repeatedX = chunk ^ repeatByte(x);
188 | 
189 |         if (nonZeroIfXcontainsZeroByte(repeatedX | type(uint192).max) == 0) {
190 |             i = 8;
191 |         }
192 |     }
193 |     ////////binary search end
194 |     
195 |     // ++ is safe because lenText <= 32
196 |     unchecked {
197 |         for (i; i < lenText; i++) {
198 |             uint8 b;
199 |             assembly {
200 |                 b := byte(i, chunk)
201 |             }
202 |             if (b == x) return i;
203 |         }
204 |     }
205 |     // not found
206 |     return type(uint256).max;
207 | }
208 | 
209 | /**
210 |  * @dev Returns the last index matching the byte `x` in text;
211 |  * or type(uint256).max if not found.
212 |  * 
213 |  * WARNING: it works ONLY for length 32 or less.
214 |  * This is for use by memrchr after its chunk search.
215 |  */
216 | function memrchrWord(uint256 ptrText, uint256 lenText, uint8 x) pure returns (uint256) {
217 |     if (lenText > 32) {
218 |         lenText = 32;
219 |     }
220 |     uint256 chunk;
221 |     /// @solidity memory-safe-assembly
222 |     assembly {
223 |         chunk := mload(ptrText)
224 |     }
225 | 
226 |     while (lenText > 0) {
227 |         // -- is safe because lenText > 0
228 |         unchecked {
229 |             lenText--;
230 |         }
231 |         uint8 b;
232 |         assembly {
233 |             b := byte(lenText, chunk)
234 |         }
235 |         if (b == x) return lenText;
236 |     }
237 |     // not found
238 |     return type(uint256).max;
239 | }
240 | 
241 | /// @dev repeating low bit for containsZeroByte
242 | uint256 constant LO_U256 = 0x0101010101010101010101010101010101010101010101010101010101010101;
243 | /// @dev repeating high bit for containsZeroByte
244 | uint256 constant HI_U256 = 0x8080808080808080808080808080808080808080808080808080808080808080;
245 | 
246 | /**
247 |  * @dev Returns a non-zero value if `x` contains any zero byte.
248 |  * (returning a bool would be less efficient)
249 |  *
250 |  * From *Matters Computational*, J. Arndt:
251 |  *
252 |  * "The idea is to subtract one from each of the bytes and then look for
253 |  * bytes where the borrow propagated all the way to the most significant bit."
254 |  */
255 | function nonZeroIfXcontainsZeroByte(uint256 x) pure returns (uint256) {
256 |     unchecked {
257 |         return (x - LO_U256) & (~x) & HI_U256;
258 |     }
259 |     /*
260 |      * An example of how it works:
261 |      *                                              here is 00
262 |      * x    0x0101010101010101010101010101010101010101010101000101010101010101
263 |      * x-LO 0xffffffffffffffffffffffffffffffffffffffffffffffff0000000000000000
264 |      * ~x   0xfefefefefefefefefefefefefefefefefefefefefefefefffefefefefefefefe
265 |      * &1   0xfefefefefefefefefefefefefefefefefefefefefefefeff0000000000000000
266 |      * &2   0x8080808080808080808080808080808080808080808080800000000000000000
267 |      */
268 | }
269 | 
270 | /// @dev Repeat byte `b` 32 times
271 | function repeatByte(uint8 b) pure returns (uint256) {
272 |     // safe because uint8 can't cause overflow:
273 |     // e.g. 0x5A * 0x010101..010101 = 0x5A5A5A..5A5A5A
274 |     // and  0xFF * 0x010101..010101 = 0xFFFFFF..FFFFFF
275 |     unchecked {
276 |         return b * (type(uint256).max / type(uint8).max);
277 |     }
278 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # StrSlice & Slice library for Solidity
  2 | 
  3 | - Types: [StrSlice](src/StrSlice.sol) for strings, [Slice](src/Slice.sol) for bytes, [StrChar](src/StrChar.sol) for characters
  4 | - [Gas efficient](https://github.com/dk1a/solidity-stringutils-gas)
  5 | - Versioned releases, available for both foundry and hardhat
  6 | - Simple imports, you only need e.g. `StrSlice` and `toSlice`
  7 | - `StrSlice` enforces UTF-8 character boundaries; `StrChar` validates character encoding
  8 | - Clean, well-documented and thoroughly-tested source code
  9 | - Optional [PRBTest](https://github.com/paulrberg/prb-test) extension with assertions like `assertContains` and `assertLt` for both slices and native `bytes`, `string`
 10 | - `Slice` and `StrSlice` are value types, not structs
 11 | - Low-level functions like [memchr](src/utils/memchr.sol), [memcmp, memmove etc](src/utils/mem.sol)
 12 | 
 13 | ## Install
 14 | 
 15 | ### Node
 16 | ```sh
 17 | yarn add @dk1a/solidity-stringutils
 18 | ```
 19 | 
 20 | ### Forge
 21 | ```sh
 22 | forge install --no-commit dk1a/solidity-stringutils
 23 | ```
 24 | 
 25 | ## StrSlice
 26 | 
 27 | ```solidity
 28 | import { StrSlice, toSlice } from "@dk1a/solidity-stringutils/src/StrSlice.sol";
 29 | 
 30 | using { toSlice } for string;
 31 | 
 32 | /// @dev Returns the content of brackets, or empty string if not found
 33 | function extractFromBrackets(string memory stuffInBrackets) pure returns (StrSlice extracted) {
 34 |     StrSlice s = stuffInBrackets.toSlice();
 35 |     bool found;
 36 | 
 37 |     (found, , s) = s.splitOnce(toSlice("("));
 38 |     if (!found) return toSlice("");
 39 | 
 40 |     (found, s, ) = s.rsplitOnce(toSlice(")"));
 41 |     if (!found) return toSlice("");
 42 | 
 43 |     return s;
 44 | }
 45 | /*
 46 | assertEq(
 47 |     extractFromBrackets("((1 + 2) + 3) + 4"),
 48 |     toSlice("(1 + 2) + 3")
 49 | );
 50 | */
 51 | ```
 52 | 
 53 | See [ExamplesTest](test/Examples.t.sol).
 54 | 
 55 | Internally `StrSlice` uses `Slice` and extends it with logic for multibyte UTF-8 where necessary.
 56 | 
 57 | | Method           | Description                                      |
 58 | | ---------------- | ------------------------------------------------ |
 59 | | `len`            | length in **bytes**                              |
 60 | | `isEmpty`        | true if len == 0                                 |
 61 | | `toString`       | copy slice contents to a **new** string          |
 62 | | `keccak`         | equal to `keccak256(s.toString())`, but cheaper  |
 63 | **concatenate**
 64 | | `add`            | Concatenate 2 slices into a **new** string       |
 65 | | `join`           | Join slice array on `self` as separator          |
 66 | **compare**
 67 | | `cmp`            | 0 for eq, < 0 for lt, > 0 for gt                 |
 68 | | `eq`,`ne`        | ==, !=  (more efficient than cmp)                |
 69 | | `lt`,`lte`       | <, <=                                            |
 70 | | `gt`,`gte`       | >, >=                                            |
 71 | **index**
 72 | | `isCharBoundary` | true if given index is an allowed boundary       |
 73 | | `get`            | get 1 UTF-8 character at given index             |
 74 | | `splitAt`        | (slice[:index], slice[index:])                   |
 75 | | `getSubslice`    | slice[start:end]                                 |
 76 | **search**
 77 | | `find`           | index of the start of the **first** match        |
 78 | | `rfind`          | index of the start of the **last** match         |
 79 | |                  | *return `type(uint256).max` for no matches*      |
 80 | | `contains`       | true if a match is found                         |
 81 | | `startsWith`     | true if starts with pattern                      |
 82 | | `endsWith`       | true if ends with pattern                        |
 83 | **modify**
 84 | | `stripPrefix`    | returns subslice without the prefix              |
 85 | | `stripSuffix`    | returns subslice without the suffix              |
 86 | | `splitOnce`      | split into 2 subslices on the **first** match    |
 87 | | `rsplitOnce`     | split into 2 subslices on the **last** match     |
 88 | | `replacen`       | *experimental* replace `n` matches               |
 89 | |                  | *replacen requires 0 < pattern.len() <= to.len()*|
 90 | **iterate**
 91 | | `chars`          | character iterator over the slice                |
 92 | **ascii**
 93 | | `isAscii`        | true if all chars are ASCII                      |
 94 | **dangerous**
 95 | | `asSlice`        | get underlying Slice                             |
 96 | | `ptr`            | get memory pointer                               |
 97 | 
 98 | Indexes are in **bytes**, not characters. Indexing methods revert if `isCharBoundary` is false.
 99 | 
100 | ## StrCharsIter
101 | 
102 | *Returned by `chars` method of `StrSlice`*
103 | 
104 | ```solidity
105 | import { StrSlice, toSlice, StrCharsIter } from "@dk1a/solidity-stringutils/src/StrSlice.sol";
106 | 
107 | using { toSlice } for string;
108 | 
109 | /// @dev Returns a StrSlice of `str` with the 2 first UTF-8 characters removed
110 | /// reverts on invalid UTF8
111 | function removeFirstTwoChars(string memory str) pure returns (StrSlice) {
112 |     StrCharsIter memory chars = str.toSlice().chars();
113 |     for (uint256 i; i < 2; i++) {
114 |         if (chars.isEmpty()) break;
115 |         chars.next();
116 |     }
117 |     return chars.asStr();
118 | }
119 | /*
120 | assertEq(removeFirstTwoChars(unicode"📎!こんにちは"), unicode"こんにちは");
121 | */
122 | ```
123 | 
124 | | Method           | Description                                      |
125 | | ---------------- | ------------------------------------------------ |
126 | | `asStr`          | get underlying StrSlice of the remainder         |
127 | | `len`            | remainder length in **bytes**                    |
128 | | `isEmpty`        | true if len == 0                                 |
129 | | `next`           | advance the iterator, return the next StrChar    |
130 | | `nextBack`       | advance from the back, return the next StrChar   |
131 | | `count`          | returns the number of UTF-8 characters           |
132 | | `validateUtf8`   | returns true if the sequence is valid UTF-8      |
133 | **dangerous**
134 | | `unsafeNext`     | advance unsafely, return the next StrChar        |
135 | | `unsafeCount`    | unsafely count chars, read the source for caveats|
136 | | `ptr`            | get memory pointer                               |
137 | 
138 | `count`, `validateUtf8`, `unsafeCount` consume the iterator in O(n).
139 | 
140 | Safe methods revert on an invalid UTF-8 byte sequence.
141 | 
142 | `unsafeNext` does NOT check if the iterator is empty, may underflow! Does not revert on invalid UTF-8. If returned `StrChar` is invalid, it will have length 0. Otherwise length 1-4.
143 | 
144 | Internally `next`, `unsafeNext`, `count` all use `_nextRaw`. It's very efficient, but very unsafe and complicated. Read the source and import it separately if you need it.
145 | 
146 | ## StrChar
147 | 
148 | Represents a single UTF-8 encoded character.
149 | Internally it's bytes32 with leading byte at MSB.
150 | 
151 | It's returned by some methods of `StrSlice` and `StrCharsIter`.
152 | 
153 | | Method           | Description                                      |
154 | | ---------------- | ------------------------------------------------ |
155 | | `len`            | character length in bytes                        |
156 | | `toBytes32`      | returns the underlying `bytes32` value           |
157 | | `toString`       | copy the character to a new string               |
158 | | `toCodePoint`    | returns the unicode code point (`ord` in python) |
159 | | `cmp`            | 0 for eq, < 0 for lt, > 0 for gt                 |
160 | | `eq`,`ne`        | ==, !=                                           |
161 | | `lt`,`lte`       | <, <=                                            |
162 | | `gt`,`gte`       | >, >=                                            |
163 | | `isValidUtf8`    | usually true                                     |
164 | | `isAscii`        | true if the char is ASCII                        |
165 | 
166 | Import `StrChar__` (static function lib) to use `StrChar__.fromCodePoint` for code point to `StrChar` conversion.
167 | 
168 | `len` can return `0` *only* for invalid UTF-8 characters. But some invalid chars *may* have non-zero len! (use `isValidUtf8` to check validity). Note that `0x00` is a valid 1-byte UTF-8 character, its len is 1.
169 | 
170 | `isValidUtf8` can be false if the character was formed with an unsafe method (fromUnchecked, wrap).
171 | 
172 | ## Slice
173 | 
174 | ```solidity
175 | import { Slice, toSlice } from "@dk1a/solidity-stringutils/src/Slice.sol";
176 | 
177 | using { toSlice } for bytes;
178 | 
179 | function findZeroByte(bytes memory b) pure returns (uint256 index) {
180 |     return b.toSlice().find(
181 |         bytes(hex"00").toSlice()
182 |     );
183 | }
184 | ```
185 | 
186 | See `using {...} for Slice global` in the source for a function summary. Many are shared between `Slice` and `StrSlice`, but there are differences.
187 | 
188 | Internally Slice has very minimal assembly, instead using `memcpy`, `memchr`, `memcmp` and others; if you need the low-level functions, see `src/utils/`.
189 | 
190 | ## Assertions (PRBTest extension)
191 | 
192 | ```solidity
193 | import { PRBTest } from "@prb/test/src/PRBTest.sol";
194 | import { Assertions } from "@dk1a/solidity-stringutils/src/test/Assertions.sol";
195 | 
196 | contract StrSliceTest is PRBTest, Assertions {
197 |     function testContains() public {
198 |         bytes memory b1 = "12345";
199 |         bytes memory b2 = "3";
200 |         assertContains(b1, b2);
201 |     }
202 | 
203 |     function testLt() public {
204 |         string memory s1 = "123";
205 |         string memory s2 = "124";
206 |         assertLt(s1, s2);
207 |     }
208 | }
209 | ```
210 | 
211 | You can completely ignore slices if all you want is e.g. `assertContains` for native `bytes`/`string`.
212 | 
213 | ## Acknowledgements
214 | - [Arachnid/solidity-stringutils](https://github.com/Arachnid/solidity-stringutils) - I basically wanted to make an updated version of solidity-stringutils
215 | - [rust](https://doc.rust-lang.org/core/index.html) - most similarities are in names and general structure; the implementation can't really be similar (solidity doesn't even have generics)
216 | - [paulrberg/prb-math](https://github.com/paulrberg/prb-math) - good template for solidity data structure libraries with `using {...} for ... global`
217 | - [brockelmore/memmove](https://github.com/brockelmore/memmove) - good assembly memory management examples


--------------------------------------------------------------------------------
/test/StrSlice.t.sol:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: MIT
  2 | 
  3 | pragma solidity ^0.8.17;
  4 | 
  5 | import { PRBTest } from "@prb/test/src/PRBTest.sol";
  6 | import { StrSliceAssertions } from "../src/test/StrSliceAssertions.sol";
  7 | 
  8 | import { StrSlice, toSlice, StrSlice__InvalidCharBoundary } from "../src/StrSlice.sol";
  9 | 
 10 | using { toSlice } for string;
 11 | 
 12 | contract StrSliceTest is PRBTest, StrSliceAssertions {
 13 |     function testToString() public {
 14 |         string memory _s = unicode"Hello, world!";
 15 |         assertEq(_s, _s.toSlice().toString());
 16 |     }
 17 | 
 18 |     function testLen() public {
 19 |         string memory _s = unicode"こんにちは";
 20 |         assertEq(bytes(_s).length, _s.toSlice().len());
 21 |     }
 22 | 
 23 |     function testIsEmpty() public {
 24 |         assertTrue(string("").toSlice().isEmpty());
 25 |         assertFalse(new string(1).toSlice().isEmpty());
 26 |     }
 27 | 
 28 |     /*//////////////////////////////////////////////////////////////////////////
 29 |                                 CONCATENATION
 30 |     //////////////////////////////////////////////////////////////////////////*/
 31 | 
 32 |     function testAdd() public {
 33 |         assertEq(unicode"こんにちは", toSlice(unicode"こん").add(toSlice(unicode"にちは")));
 34 |     }
 35 | 
 36 |     function testJoin() public {
 37 |         StrSlice[] memory sliceArr = new StrSlice[](3);
 38 |         sliceArr[0] = toSlice("Hello");
 39 |         sliceArr[1] = toSlice(unicode"こんにちは");
 40 |         sliceArr[2] = toSlice("");
 41 |         assertEq(
 42 |             toSlice(unicode"📎!").join(sliceArr),
 43 |             unicode"Hello📎!こんにちは📎!"
 44 |         );
 45 |     }
 46 | 
 47 |     /*//////////////////////////////////////////////////////////////////////////
 48 |                                     INDEX
 49 |     //////////////////////////////////////////////////////////////////////////*/
 50 | 
 51 |     function testIsCharBoundary() public {
 52 |         string memory _s = unicode"こ";
 53 |         // start
 54 |         assertTrue(toSlice(_s).isCharBoundary(0));
 55 |         // mid
 56 |         assertFalse(toSlice(_s).isCharBoundary(1));
 57 |         assertFalse(toSlice(_s).isCharBoundary(2));
 58 |         // end (isn't a valid index, but is a valid boundary)
 59 |         assertTrue(toSlice(_s).isCharBoundary(3));
 60 |         // out of bounds
 61 |         assertFalse(toSlice(_s).isCharBoundary(4));
 62 |     }
 63 | 
 64 |     function testGet() public {
 65 |         string memory _s = unicode"こんにちは";
 66 |         assertEq(_s.toSlice().get(3).toString(), unicode"ん");
 67 |     }
 68 | 
 69 |     function testGet__InvalidCharBoundary() public {
 70 |         string memory _s = unicode"こんにちは";
 71 |         vm.expectRevert(StrSlice__InvalidCharBoundary.selector);
 72 |         _s.toSlice().get(1);
 73 |     }
 74 | 
 75 |     function testSplitAt() public {
 76 |         string memory _s = unicode"こんにちは";
 77 |         (StrSlice s1, StrSlice s2) = _s.toSlice().splitAt(3);
 78 |         assertEq(s1.toString(), unicode"こ");
 79 |         assertEq(s2.toString(), unicode"んにちは");
 80 |     }
 81 | 
 82 |     function testSplitAt__InvalidCharBoundary() public {
 83 |         string memory _s = unicode"こんにちは";
 84 |         vm.expectRevert(StrSlice__InvalidCharBoundary.selector);
 85 |         _s.toSlice().splitAt(1);
 86 |     }
 87 | 
 88 |     function testGetSubslice() public {
 89 |         string memory _s = unicode"こんにちは";
 90 |         assertEq(_s.toSlice().getSubslice(3, 9).toString(), unicode"んに");
 91 |     }
 92 | 
 93 |     function testGetSubslice__InvalidCharBoundary() public {
 94 |         string memory _s = unicode"こんにちは";
 95 |         vm.expectRevert(StrSlice__InvalidCharBoundary.selector);
 96 |         _s.toSlice().getSubslice(3, 8);
 97 |     }
 98 | 
 99 |     /*//////////////////////////////////////////////////////////////////////////
100 |                                     SEARCH
101 |     //////////////////////////////////////////////////////////////////////////*/
102 | 
103 |     function testFind() public {
104 |         string memory s1 = unicode"012こんにちはこんにちは34";
105 |         string memory s2 = unicode"んに";
106 |         uint256 index = s1.toSlice().find(s2.toSlice());
107 |         assertEq(index, 6);
108 |         (, StrSlice rSlice) = s1.toSlice().splitAt(index);
109 |         assertEq(rSlice, unicode"んにちはこんにちは34");
110 |     }
111 | 
112 |     function testRfind() public {
113 |         string memory s1 = unicode"012こんにちはこんにちは34";
114 |         string memory s2 = unicode"んに";
115 |         uint256 index = s1.toSlice().rfind(s2.toSlice());
116 |         assertEq(index, 21);
117 |         (, StrSlice rSlice) = s1.toSlice().splitAt(index);
118 |         assertEq(rSlice, unicode"んにちは34");
119 |     }
120 | 
121 |     function testContains() public {
122 |         string memory s1 = unicode"「lorem ipsum」の典型的なテキストのほかにも、原典からの距離の様々なバリエーションが存在する。他のバージョンでは、ラテン語にはあまり登場しないか存在しない";
123 |         string memory s2 = unicode"登場";
124 |         assertTrue(s1.toSlice().contains(s2.toSlice()));
125 |     }
126 | 
127 |     function testNotContains() public {
128 |         string memory s1 = unicode"「lorem ipsum」の典型的なテキストのほかにも、原典からの距離の様々なバリエーションが存在する。他のバージョンでは、ラテン語にはあまり登場しないか存在しない";
129 |         string memory s2 = unicode"0";
130 |         assertFalse(s1.toSlice().contains(s2.toSlice()));
131 |     }
132 | 
133 |     /*//////////////////////////////////////////////////////////////////////////
134 |                                     MODIFY
135 |     //////////////////////////////////////////////////////////////////////////*/
136 | 
137 |     function testStripPrefix() public {
138 |         StrSlice slice = string(unicode"こんにちは").toSlice();
139 |         assertEq(slice.stripPrefix(string(unicode"こん").toSlice()), string(unicode"にちは"));
140 |         assertEq(slice.stripPrefix(slice),                           "");
141 |         assertEq(slice.stripPrefix(string("").toSlice()),            slice);
142 |         assertEq(slice.stripPrefix(string(unicode"は").toSlice()),   slice);
143 |         assertEq(slice.stripPrefix(string(unicode"こんにちはは").toSlice()), slice);
144 |     }
145 | 
146 |     function testStripPrefix__FromEmpty() public {
147 |         StrSlice slice = string("").toSlice();
148 |         assertEq(slice.stripPrefix(string(unicode"こ").toSlice()), slice);
149 |         assertEq(slice.stripPrefix(string("").toSlice()),          slice);
150 |     }
151 | 
152 |     function testStripSuffix() public {
153 |         StrSlice slice = string(unicode"こんにちは").toSlice();
154 |         assertEq(slice.stripSuffix(string(unicode"ちは").toSlice()), string(unicode"こんに"));
155 |         assertEq(slice.stripSuffix(slice),                           "");
156 |         assertEq(slice.stripSuffix(string("").toSlice()),            slice);
157 |         assertEq(slice.stripSuffix(string(unicode"こ").toSlice()),   slice);
158 |         assertEq(slice.stripSuffix(string(unicode"ここんにちは").toSlice()), slice);
159 |     }
160 | 
161 |     function testStripSuffix__FromEmpty() public {
162 |         StrSlice slice = string("").toSlice();
163 |         assertEq(slice.stripSuffix(string(unicode"こ").toSlice()), slice);
164 |         assertEq(slice.stripSuffix(string("").toSlice()),          slice);
165 |     }
166 | 
167 |     function testSplitOnce() public {
168 |         StrSlice slice = string(unicode"こんにちはこんにちは").toSlice();
169 |         StrSlice pat = string(unicode"に").toSlice();
170 |         (bool found, StrSlice prefix, StrSlice suffix) = slice.splitOnce(pat);
171 |         assertTrue(found);
172 |         assertEq(prefix, unicode"こん");
173 |         assertEq(suffix, unicode"ちはこんにちは");
174 |     }
175 | 
176 |     function testSplitOnce__NotFound() public {
177 |         StrSlice slice = string(unicode"こんにちはこんにちは").toSlice();
178 |         StrSlice pat = string(unicode"こに").toSlice();
179 |         (bool found, StrSlice prefix, StrSlice suffix) = slice.splitOnce(pat);
180 |         assertFalse(found);
181 |         assertEq(prefix, unicode"こんにちはこんにちは");
182 |         assertEq(suffix, unicode"");
183 |     }
184 | 
185 |     function testRsplitOnce() public {
186 |         StrSlice slice = string(unicode"こんにちはこんにちは").toSlice();
187 |         StrSlice pat = string(unicode"に").toSlice();
188 |         (bool found, StrSlice prefix, StrSlice suffix) = slice.rsplitOnce(pat);
189 |         assertTrue(found);
190 |         assertEq(prefix, unicode"こんにちはこん");
191 |         assertEq(suffix, unicode"ちは");
192 |     }
193 | 
194 |     function testRsplitOnce__NotFound() public {
195 |         StrSlice slice = string(unicode"こんにちはこんにちは").toSlice();
196 |         StrSlice pat = string(unicode"こに").toSlice();
197 |         (bool found, StrSlice prefix, StrSlice suffix) = slice.rsplitOnce(pat);
198 |         assertFalse(found);
199 |         assertEq(prefix, unicode"");
200 |         assertEq(suffix, unicode"こんにちはこんにちは");
201 |     }
202 | 
203 |     // TODO both replacen and its tests are rather unfinished
204 |     function testReplacen() public {
205 |         string memory s = unicode"0110110110110";
206 |         string memory pat = unicode"11";
207 |         string memory to = unicode"__";
208 |         string memory result = unicode"0__0__0__0__0";
209 |         assertEq(s.toSlice().replacen(pat.toSlice(), to.toSlice(), 4), result);
210 |     }
211 | 
212 |     function testReplacen__Unicode() public {
213 |         string memory s = unicode"012こんにちはこんにちはこんにちは34";
214 |         string memory pat = unicode"んに";
215 |         string memory to = unicode"📎";
216 |         string memory result = unicode"012こ📎ちはこ📎ちはこんにちは34";
217 |         assertEq(s.toSlice().replacen(pat.toSlice(), to.toSlice(), 2), result);
218 |     }
219 | 
220 |     // TODO more tests
221 | 
222 |     /*//////////////////////////////////////////////////////////////////////////
223 |                                     ASCII
224 |     //////////////////////////////////////////////////////////////////////////*/
225 | 
226 |     function testIsAscii() public {
227 |         string memory ascii = hex"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f";
228 |         assertTrue(toSlice("").isAscii());
229 |         assertTrue(toSlice("a").isAscii());
230 |         assertTrue(toSlice(ascii).isAscii());
231 |         assertTrue(toSlice(string(abi.encodePacked(ascii, ascii, ascii, ascii))).isAscii());
232 |         assertFalse(toSlice(unicode"📎").isAscii());
233 |         assertFalse(toSlice(unicode"012こ").isAscii());
234 |         assertFalse(toSlice(string(bytes(hex"FF"))).isAscii());
235 |         assertFalse(toSlice(string(abi.encodePacked(hex"80", ascii))).isAscii());
236 |         assertFalse(toSlice(string(abi.encodePacked(ascii, hex"80"))).isAscii());
237 |         assertFalse(toSlice(string(abi.encodePacked(ascii, unicode"📎"))).isAscii());
238 |     }
239 | }


--------------------------------------------------------------------------------
/src/StrCharsIter.sol:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: MIT
  2 | 
  3 | pragma solidity ^0.8.17;
  4 | 
  5 | import { Slice, Slice__ } from "./Slice.sol";
  6 | import { StrSlice } from "./StrSlice.sol";
  7 | import { SliceIter, SliceIter__, SliceIter__StopIteration } from "./SliceIter.sol";
  8 | import { StrChar, StrChar__, StrChar__InvalidUTF8 } from "./StrChar.sol";
  9 | import { isValidUtf8, utf8CharWidth } from "./utils/utf8.sol";
 10 | import { leftMask } from "./utils/mem.sol";
 11 | 
 12 | /**
 13 |  * @title String chars iterator.
 14 |  * @dev This struct is created by the iter method on `StrSlice`.
 15 |  * Iterates 1 UTF-8 encoded character at a time (which may have 1-4 bytes).
 16 |  *
 17 |  * Note StrCharsIter iterates over UTF-8 encoded codepoints, not unicode scalar values.
 18 |  * This is mostly done for simplicity, since solidity doesn't care about unicode anyways.
 19 |  *
 20 |  * TODO think about actually adding char and unicode awareness?
 21 |  * https://github.com/devstein/unicode-eth attempts something like that
 22 |  */
 23 | struct StrCharsIter {
 24 |     uint256 _ptr;
 25 |     uint256 _len;
 26 | }
 27 | 
 28 | /*//////////////////////////////////////////////////////////////////////////
 29 |                                 STATIC FUNCTIONS
 30 | //////////////////////////////////////////////////////////////////////////*/
 31 | 
 32 | library StrCharsIter__ {
 33 |     /**
 34 |      * @dev Creates a new `StrCharsIter` from `StrSlice`.
 35 |      * Note the `StrSlice` is assumed to be memory-safe.
 36 |      */
 37 |     function from(StrSlice slice) internal pure returns (StrCharsIter memory) {
 38 |         return StrCharsIter(slice.ptr(), slice.len());
 39 | 
 40 |         // TODO I'm curious about gas differences
 41 |         // return StrCharsIter(SliceIter__.from(str.asSlice()));
 42 |     }
 43 | }
 44 | 
 45 | /*//////////////////////////////////////////////////////////////////////////
 46 |                                 GLOBAL FUNCTIONS
 47 | //////////////////////////////////////////////////////////////////////////*/
 48 | 
 49 | using {
 50 |     asStr,
 51 |     ptr, len, isEmpty,
 52 |     next, nextBack, unsafeNext,
 53 |     count, validateUtf8, unsafeCount
 54 | } for StrCharsIter global;
 55 | 
 56 | /**
 57 |  * @dev Views the underlying data as a subslice of the original data.
 58 |  */
 59 | function asStr(StrCharsIter memory self) pure returns (StrSlice slice) {
 60 |     return StrSlice.wrap(Slice.unwrap(
 61 |         self.asSlice()
 62 |     ));
 63 | }
 64 | 
 65 | /**
 66 |  * @dev Returns the pointer to the start of an in-memory string slice.
 67 |  * This method is primarily for internal use.
 68 |  */
 69 | function ptr(StrCharsIter memory self) pure returns (uint256) {
 70 |     return self._ptr;
 71 | }
 72 | 
 73 | /**
 74 |  * @dev Returns the length in bytes, not codepoints.
 75 |  */
 76 | function len(StrCharsIter memory self) pure returns (uint256) {
 77 |     return self._len;
 78 | }
 79 | 
 80 | /**
 81 |  * @dev Returns true if the iterator is empty.
 82 |  */
 83 | function isEmpty(StrCharsIter memory self) pure returns (bool) {
 84 |     return self._len == 0;
 85 | }
 86 | 
 87 | /**
 88 |  * @dev Advances the iterator and returns the next character.
 89 |  * Reverts if len == 0.
 90 |  * Reverts on invalid UTF-8.
 91 |  */
 92 | function next(StrCharsIter memory self) pure returns (StrChar) {
 93 |     if (self._len == 0) revert SliceIter__StopIteration();
 94 |     (bytes32 b, uint256 charLen) = self._nextRaw(true);
 95 |     // safe because _nextRaw guarantees charLen <= selfLen as long as selfLen != 0.
 96 |     unchecked {
 97 |         // charLen > 0 because of `revertOnInvalid` flag
 98 |         self._len -= charLen;
 99 |     }
100 |     // safe because _nextRaw reverts on invalid UTF-8
101 |     return StrChar__.fromUnchecked(b, charLen);
102 | }
103 | 
104 | /**
105 |  * @dev Advances the iterator from the back and returns the next character.
106 |  * Reverts if len == 0.
107 |  * Reverts on invalid UTF-8.
108 |  */
109 | function nextBack(StrCharsIter memory self) pure returns (StrChar char) {
110 |     if (self._len == 0) revert SliceIter__StopIteration();
111 | 
112 |     // _self shares memory with self!
113 |     SliceIter memory _self = self._sliceIter();
114 | 
115 |     bool isValid;
116 |     uint256 b;
117 |     for (uint256 i; i < 4; i++) {
118 |         // an example of what's going on in the loop:
119 |         // b = 0x0000000000..00
120 |         // nextBack = 0x80
121 |         // b = 0x8000000000..00 (not valid UTF-8)
122 |         // nextBack = 0x92
123 |         // b = 0x9280000000..00 (not valid UTF-8)
124 |         // nextBack = 0x9F
125 |         // b = 0x9F92800000..00 (not valid UTF-8)
126 |         // nextBack = 0xF0
127 |         // b = 0xF09F928000..00 (valid UTF-8, break)
128 | 
129 |         // safe because i < 4
130 |         unchecked {
131 |             // free the space in MSB
132 |             b = (b >> 8) | (
133 |                 // get 1 byte in LSB
134 |                 uint256(_self.nextBack())
135 |                 // flip it to MSB
136 |                 << (31 * 8)
137 |             );
138 |         }
139 |         // break if the char is valid
140 |         if (isValidUtf8(bytes32(b)) != 0) {
141 |             isValid = true;
142 |             break;
143 |         }
144 |     }
145 |     if (!isValid) revert StrChar__InvalidUTF8();
146 | 
147 |     // construct the character;
148 |     // wrap is safe, because UTF-8 was validated,
149 |     // and the trailing bytes are 0 (since the loop went byte-by-byte)
150 |     char = StrChar.wrap(bytes32(b));
151 |     // the iterator was already advanced by `_self.nextBack()`
152 |     return char;
153 | }
154 | 
155 | /**
156 |  * @dev Advances the iterator and returns the next character.
157 |  * Does NOT validate iterator length. It could underflow!
158 |  * Does NOT revert on invalid UTF-8.
159 |  * WARNING: for invalid UTF-8 bytes, advances by 1 and returns an invalid `StrChar` with len 0!
160 |  */
161 | function unsafeNext(StrCharsIter memory self) pure returns (StrChar char) {
162 |     // _nextRaw guarantees charLen <= selfLen IF selfLen != 0
163 |     (bytes32 b, uint256 charLen) = self._nextRaw(false);
164 |     if (charLen > 0) {
165 |         // safe IF the caller ensures that self._len != 0
166 |         unchecked {
167 |             self._len -= charLen;
168 |         }
169 |         // ALWAYS produces a valid character
170 |         return StrChar__.fromUnchecked(b, charLen);
171 |     } else {
172 |         // safe IF the caller ensures that self._len != 0
173 |         unchecked {
174 |             self._len -= 1;
175 |         }
176 |         // NEVER produces a valid character (this is always a single 0x80-0xFF byte)
177 |         return StrChar__.fromUnchecked(b, 1);
178 |     }
179 | }
180 | 
181 | /**
182 |  * @dev Consumes the iterator, counting the number of UTF-8 characters.
183 |  * Note O(n) time!
184 |  * Reverts on invalid UTF-8.
185 |  */
186 | function count(StrCharsIter memory self) pure returns (uint256 result) {
187 |     uint256 endPtr;
188 |     // (ptr+len is implicitly safe)
189 |     unchecked {
190 |         endPtr = self._ptr + self._len;
191 |     }
192 |     while (self._ptr < endPtr) {
193 |         self._nextRaw(true);
194 |         // +1 is safe because 2**256 cycles are impossible
195 |         unchecked {
196 |             result += 1;
197 |         }
198 |     }
199 |     // _nextRaw does NOT modify len to allow optimizations like setting it once at the end
200 |     self._len = 0;
201 |     return result;
202 | }
203 | 
204 | /**
205 |  * @dev Consumes the iterator, validating UTF-8 characters.
206 |  * Note O(n) time!
207 |  * Returns true if all are valid; otherwise false on the first invalid UTF-8 character.
208 |  */
209 | function validateUtf8(StrCharsIter memory self) pure returns (bool) {
210 |     uint256 endPtr;
211 |     // (ptr+len is implicitly safe)
212 |     unchecked {
213 |         endPtr = self._ptr + self._len;
214 |     }
215 |     while (self._ptr < endPtr) {
216 |         (, uint256 charLen) = self._nextRaw(false);
217 |         if (charLen == 0) return false;
218 |     }
219 |     return true;
220 | }
221 | 
222 | /**
223 |  * @dev VERY UNSAFE - a single invalid UTF-8 character can severely alter the result!
224 |  * Consumes the iterator, counting the number of UTF-8 characters.
225 |  * Significantly faster than safe `count`, especially for long mutlibyte strings.
226 |  *
227 |  * Note `count` is actually a bit more efficient than `validateUtf8`.
228 |  * `count` is much more efficient than calling `validateUtf8` and `unsafeCount` together.
229 |  * Use `unsafeCount` only when you are already certain that UTF-8 is valid.
230 |  * If you want speed and no validation, just use byte length, it's faster and more predictably wrong.
231 |  *
232 |  * Some gas usage metrics:
233 |  * 1 ascii char:
234 |  *   count:       571 gas
235 |  *   unsafeCount: 423 gas
236 |  * 100 ascii chars:
237 |  *   count:       27406 gas
238 |  *   unsafeCount: 12900 gas
239 |  * 1000 chinese chars (3000 bytes):
240 |  *   count:       799305 gas
241 |  *   unsafeCount: 178301 gas
242 |  */
243 | function unsafeCount(StrCharsIter memory self) pure returns (uint256 result) {
244 |     uint256 endPtr;
245 |     // (ptr+len is implicitly safe)
246 |     unchecked {
247 |         endPtr = self._ptr + self._len;
248 |     }
249 |     while (self._ptr < endPtr) {
250 |         uint256 leadingByte;
251 |         // unchecked mload
252 |         // (unsafe, the last character could move the pointer past the boundary, but only once)
253 |         /// @solidity memory-safe-assembly
254 |         assembly {
255 |             leadingByte := byte(0, mload(
256 |                 // load self._ptr (this is an optimization trick, since it's 1st in the struct)
257 |                 mload(self)
258 |             ))
259 |         }
260 |         unchecked {
261 |             // this is a very unsafe version of `utf8CharWidth`,
262 |             // basically 1 invalid UTF-8 character can severely change the count result
263 |             // (no real infinite loop risks, only one potential corrupt memory read)
264 |             if (leadingByte < 0x80) {
265 |                 self._ptr += 1;
266 |             } else if (leadingByte < 0xE0) {
267 |                 self._ptr += 2;
268 |             } else if (leadingByte < 0xF0) {
269 |                 self._ptr += 3;
270 |             } else {
271 |                 self._ptr += 4;
272 |             }
273 |             // +1 is safe because 2**256 cycles are impossible
274 |             result += 1;
275 |         }
276 |     }
277 |     self._len = 0;
278 | 
279 |     return result;
280 | }
281 | 
282 | /*//////////////////////////////////////////////////////////////////////////
283 |                             FILE-LEVEL FUNCTIONS
284 | //////////////////////////////////////////////////////////////////////////*/
285 | 
286 | using { asSlice, _nextRaw, _sliceIter } for StrCharsIter;
287 | 
288 | /**
289 |  * @dev Views the underlying data as a `bytes` subslice of the original data.
290 |  */
291 | function asSlice(StrCharsIter memory self) pure returns (Slice slice) {
292 |     return Slice__.fromUnchecked(self._ptr, self._len);
293 | }
294 | 
295 | /**
296 |  * @dev Used internally to efficiently reuse iteration logic. Has a lot of caveats.
297 |  * NEITHER checks NOR modifies iterator length.
298 |  * (Caller MUST guarantee that len != 0. Caller MUST modify len correctly themselves.)
299 |  * Does NOT form the character properly, and returns raw unmasked bytes and length.
300 |  * Does advance the iterator pointer.
301 |  *
302 |  * Validates UTF-8.
303 |  * For valid chars advances the pointer by charLen.
304 |  * For invalid chars behaviour depends on `revertOnInvalid`:
305 |  * revertOnInvalid == true: revert.
306 |  * revertOnInvalid == false: advance the pointer by 1, but return charLen 0.
307 |  *
308 |  * @return b raw unmasked bytes; if not discarded, then charLen SHOULD be used to mask it.
309 |  * @return charLen length of a valid UTF-8 char; 0 for invalid chars.
310 |  * Guarantees that charLen <= self._len (as long as self._len != 0, which is the caller's guarantee)
311 |  */
312 | function _nextRaw(StrCharsIter memory self, bool revertOnInvalid)
313 |     pure
314 |     returns (bytes32 b, uint256 charLen)
315 | {
316 |     // unchecked mload
317 |     // (isValidUtf8 only checks the 1st character, which exists since caller guarantees len != 0)
318 |     /// @solidity memory-safe-assembly
319 |     assembly {
320 |         b := mload(
321 |             // load self._ptr (this is an optimization trick, since it's 1st in the struct)
322 |             mload(self)
323 |         )
324 |     }
325 |     // validate character (0 => invalid; 1-4 => valid)
326 |     charLen = isValidUtf8(b);
327 | 
328 |     if (charLen > self._len) {
329 |         // mload didn't check bounds,
330 |         // so a character that goes out of bounds could've been seen as valid.
331 |         if (revertOnInvalid) revert StrChar__InvalidUTF8();
332 |         // safe because caller guarantees _len != 0
333 |         unchecked {
334 |             self._ptr += 1;
335 |         }
336 |         // invalid
337 |         return (b, 0);
338 |     } else if (charLen == 0) {
339 |         if (revertOnInvalid) revert StrChar__InvalidUTF8();
340 |         // safe because caller guarantees _len != 0
341 |         unchecked {
342 |             self._ptr += 1;
343 |         }
344 |         // invalid
345 |         return (b, 0);
346 |     } else {
347 |         // safe because of the `charLen > self._len` check earlier
348 |         unchecked {
349 |             self._ptr += charLen;
350 |         }
351 |         // valid
352 |         return (b, charLen);
353 |     }
354 | }
355 | 
356 | /**
357 |  * @dev Returns the underlying `SliceIter`.
358 |  * AVOID USING THIS EXTERNALLY!
359 |  * Advancing the underlying slice could lead to invalid UTF-8 for StrCharsIter.
360 |  */
361 | function _sliceIter(StrCharsIter memory self) pure returns (SliceIter memory result) {
362 |     assembly {
363 |         result := self
364 |     }
365 | }


--------------------------------------------------------------------------------
/test/StrChar.t.sol:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: MIT
  2 | 
  3 | pragma solidity ^0.8.17;
  4 | 
  5 | import { PRBTest } from "@prb/test/src/PRBTest.sol";
  6 | 
  7 | import { StrChar, StrChar__, StrChar__InvalidUTF8 } from "../src/StrChar.sol";
  8 | import { Unicode__InvalidCode } from "../src/utils/unicode.sol";
  9 | 
 10 | contract StrCharTest is PRBTest {
 11 |     StrCharRevertHelper revertHelper;
 12 | 
 13 |     function setUp() public {
 14 |         revertHelper = new StrCharRevertHelper();
 15 |     }
 16 | 
 17 |     function testCmp(uint32 _a, uint32 _b) public {
 18 |         vm.assume(
 19 |             !(0xD800 <= _a && _a <= 0xDFFF) && _a <= 0x10FFFF
 20 |             && !(0xD800 <= _b && _b <= 0xDFFF) && _b <= 0x10FFFF
 21 |         );
 22 |         StrChar a = StrChar__.fromCodePoint(_a);
 23 |         StrChar b = StrChar__.fromCodePoint(_b);
 24 | 
 25 |         if (_a < _b) {
 26 |             assertTrue(a.cmp(b) < 0);
 27 |             assertFalse(a.eq(b));
 28 |             assertTrue(a.ne(b));
 29 |             assertTrue(a.lt(b));
 30 |             assertTrue(a.lte(b));
 31 |             assertFalse(a.gt(b));
 32 |             assertFalse(a.gte(b));
 33 |         } else if (_a > _b) {
 34 |             assertTrue(a.cmp(b) > 0);
 35 |             assertFalse(a.eq(b));
 36 |             assertTrue(a.ne(b));
 37 |             assertFalse(a.lt(b));
 38 |             assertFalse(a.lte(b));
 39 |             assertTrue(a.gt(b));
 40 |             assertTrue(a.gte(b));
 41 |         } else if (_a == _b) {
 42 |             assertTrue(a.cmp(b) == 0);
 43 |             assertTrue(a.eq(b));
 44 |             assertFalse(a.ne(b));
 45 |             assertFalse(a.lt(b));
 46 |             assertTrue(a.lte(b));
 47 |             assertFalse(a.gt(b));
 48 |             assertTrue(a.gte(b));
 49 |         }
 50 |     }
 51 | 
 52 |     function testCmp__Manual() public {
 53 |         StrChar a = StrChar__.fromCodePoint(0x00);
 54 |         StrChar b = StrChar__.fromCodePoint(0x01);
 55 |         assertTrue(a.cmp(b) < 0);
 56 |         assertFalse(a.eq(b));
 57 |         assertTrue(a.ne(b));
 58 |         assertTrue(a.lt(b));
 59 |         assertTrue(a.lte(b));
 60 |         assertFalse(a.gt(b));
 61 |         assertFalse(a.gte(b));
 62 | 
 63 |         a = StrChar__.fromCodePoint(0x757);
 64 |         b = StrChar__.fromCodePoint(0x7);
 65 |         assertTrue(a.cmp(b) > 0);
 66 |         assertFalse(a.eq(b));
 67 |         assertTrue(a.ne(b));
 68 |         assertFalse(a.lt(b));
 69 |         assertFalse(a.lte(b));
 70 |         assertTrue(a.gt(b));
 71 |         assertTrue(a.gte(b));
 72 | 
 73 |         a = StrChar__.fromCodePoint(0x10FFFF);
 74 |         b = StrChar__.fromCodePoint(0x10FFFF);
 75 |         assertTrue(a.cmp(b) == 0);
 76 |         assertTrue(a.eq(b));
 77 |         assertFalse(a.ne(b));
 78 |         assertFalse(a.lt(b));
 79 |         assertTrue(a.lte(b));
 80 |         assertFalse(a.gt(b));
 81 |         assertTrue(a.gte(b));
 82 |     }
 83 | 
 84 |     /*//////////////////////////////////////////////////////////////////////////
 85 |                                         1 BYTE
 86 |     //////////////////////////////////////////////////////////////////////////*/
 87 | 
 88 |     function testOneByte() public {
 89 |         for (uint256 i; i < 0x80; i++) {
 90 |             StrChar char = StrChar__.fromCodePoint(i);
 91 |             assertTrue(char.isValidUtf8());
 92 |             assertEq(char.len(), 1);
 93 |             assertEq(char.toCodePoint(), i);
 94 |             assertEq(uint256(uint8(char.toBytes32()[0])), i);
 95 |             assertEq(uint256(uint8(bytes(char.toString())[0])), i);
 96 |         }
 97 |     }
 98 | 
 99 |     function testOneByte__Invalid() public {
100 |         for (uint256 i = 0x80; i < 0x100; i++) {
101 |             vm.expectRevert(StrChar__InvalidUTF8.selector);
102 |             revertHelper.from(bytes32(i << 248));
103 |         }
104 |     }
105 | 
106 |     // anything after a valid UTF-8 character is ignored
107 |     function testOneByte__Trailing() public {
108 |         assertEq(StrChar__.from(bytes32(hex"0080")).toCodePoint(), 0);
109 |         assertEq(StrChar__.from(bytes32(hex"0011111111")).toCodePoint(), 0);
110 |     }
111 | 
112 |     /*//////////////////////////////////////////////////////////////////////////
113 |                                         2 BYTES
114 |     //////////////////////////////////////////////////////////////////////////*/
115 | 
116 |     function testTwoByte() public {
117 |         for (uint256 i = 0x80; i < 0x800; i++) {
118 |             StrChar char = StrChar__.fromCodePoint(i);
119 |             assertTrue(char.isValidUtf8());
120 |             assertEq(char.len(), 2);
121 |             assertEq(char.toCodePoint(), i);
122 |         }
123 |     }
124 | 
125 |     // testing against solidity's own encoder
126 |     function testTwoByte__Manual() public {
127 |         assertEq(StrChar__.fromCodePoint(0x80).toBytes32(),  bytes32("\u0080"));
128 |         assertEq(StrChar__.fromCodePoint(0x80).toString(),    string("\u0080"));
129 |         assertEq(StrChar__.fromCodePoint(0x81).toBytes32(),  bytes32("\u0081"));
130 |         assertEq(StrChar__.fromCodePoint(0x81).toString(),    string("\u0081"));
131 |         assertEq(StrChar__.fromCodePoint(0x100).toBytes32(), bytes32("\u0100"));
132 |         assertEq(StrChar__.fromCodePoint(0x100).toString(),   string("\u0100"));
133 |         assertEq(StrChar__.fromCodePoint(0x101).toBytes32(), bytes32("\u0101"));
134 |         assertEq(StrChar__.fromCodePoint(0x101).toString(),   string("\u0101"));
135 |         assertEq(StrChar__.fromCodePoint(0x256).toBytes32(), bytes32("\u0256"));
136 |         assertEq(StrChar__.fromCodePoint(0x256).toString(),   string("\u0256"));
137 |         assertEq(StrChar__.fromCodePoint(0x600).toBytes32(), bytes32("\u0600"));
138 |         assertEq(StrChar__.fromCodePoint(0x600).toString(),   string("\u0600"));
139 |         assertEq(StrChar__.fromCodePoint(0x799).toBytes32(), bytes32("\u0799"));
140 |         assertEq(StrChar__.fromCodePoint(0x799).toString(),   string("\u0799"));
141 |     }
142 | 
143 |     function testTwoByte__Invalid() public {
144 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
145 |         revertHelper.from(bytes32(hex"E000"));
146 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
147 |         revertHelper.from(bytes32(hex"E555"));
148 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
149 |         revertHelper.from(bytes32(hex"FFFF"));
150 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
151 |         revertHelper.from(bytes32(hex"C000"));
152 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
153 |         revertHelper.from(bytes32(hex"C080"));
154 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
155 |         revertHelper.from(bytes32(hex"C0C0"));
156 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
157 |         revertHelper.from(bytes32(hex"C190"));
158 |     }
159 | 
160 |     function testTwoByte__Trailing() public {
161 |         assertEq(StrChar__.from(bytes32(hex"C280111111")).toCodePoint(), 0x80);
162 |         assertEq(StrChar__.from(bytes32(hex"C28000FFFF")).toCodePoint(), 0x80);
163 |     }
164 | 
165 |     /*//////////////////////////////////////////////////////////////////////////
166 |                                         3 BYTES
167 |     //////////////////////////////////////////////////////////////////////////*/
168 | 
169 |     function testThreeByte() public {
170 |         for (uint256 i = 0x800; i < 0x10000; i++) {
171 |             if (0xD800 <= i && i <= 0xDFFF) {
172 |                 // skip surrogate halves
173 |                 continue;
174 |             }
175 |             StrChar char = StrChar__.fromCodePoint(i);
176 |             assertTrue(char.isValidUtf8());
177 |             assertEq(char.len(), 3);
178 |             assertEq(char.toCodePoint(), i);
179 |         }
180 |     }
181 | 
182 |     function testThreeByte__InvalidSurrogateHalf() public {
183 |         for (uint256 i = 0xD800; i <= 0xDFFF; i++) {
184 |             vm.expectRevert(Unicode__InvalidCode.selector);
185 |             revertHelper.fromCodePoint(i);
186 |         }
187 |     }
188 | 
189 |     function testThreeByte__Manual() public {
190 |         assertEq(StrChar__.fromCodePoint(0x800).toBytes32(),  bytes32("\u0800"));
191 |         assertEq(StrChar__.fromCodePoint(0x800).toString(),    string("\u0800"));
192 |         assertEq(StrChar__.fromCodePoint(0x801).toBytes32(),  bytes32("\u0801"));
193 |         assertEq(StrChar__.fromCodePoint(0x801).toString(),    string("\u0801"));
194 |         assertEq(StrChar__.fromCodePoint(0x999).toBytes32(),  bytes32("\u0999"));
195 |         assertEq(StrChar__.fromCodePoint(0x999).toString(),    string("\u0999"));
196 |         assertEq(StrChar__.fromCodePoint(0xFFF).toBytes32(),  bytes32("\u0FFF"));
197 |         assertEq(StrChar__.fromCodePoint(0xFFF).toString(),    string("\u0FFF"));
198 |         assertEq(StrChar__.fromCodePoint(0x1000).toBytes32(), bytes32("\u1000"));
199 |         assertEq(StrChar__.fromCodePoint(0x1000).toString(),   string("\u1000"));
200 |         assertEq(StrChar__.fromCodePoint(0x1001).toBytes32(), bytes32("\u1001"));
201 |         assertEq(StrChar__.fromCodePoint(0x1001).toString(),   string("\u1001"));
202 |         assertEq(StrChar__.fromCodePoint(0x2500).toBytes32(), bytes32("\u2500"));
203 |         assertEq(StrChar__.fromCodePoint(0x2500).toString(),   string("\u2500"));
204 |         assertEq(StrChar__.fromCodePoint(0xD799).toBytes32(), bytes32("\uD799"));
205 |         assertEq(StrChar__.fromCodePoint(0xD799).toString(),   string("\uD799"));
206 |         assertEq(StrChar__.fromCodePoint(0xE000).toBytes32(), bytes32("\uE000"));
207 |         assertEq(StrChar__.fromCodePoint(0xE000).toString(),   string("\uE000"));
208 |         assertEq(StrChar__.fromCodePoint(0xF0FF).toBytes32(), bytes32("\uF0FF"));
209 |         assertEq(StrChar__.fromCodePoint(0xF0FF).toString(),   string("\uF0FF"));
210 |         assertEq(StrChar__.fromCodePoint(0xFFFF).toBytes32(), bytes32("\uFFFF"));
211 |         assertEq(StrChar__.fromCodePoint(0xFFFF).toString(),   string("\uFFFF"));
212 |     }
213 | 
214 |     function testThreeByte__Invalid() public {
215 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
216 |         revertHelper.from(bytes32(hex"F00000"));
217 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
218 |         revertHelper.from(bytes32(hex"F08080"));
219 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
220 |         revertHelper.from(bytes32(hex"FFFFFF"));
221 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
222 |         revertHelper.from(bytes32(hex"E08080"));
223 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
224 |         revertHelper.from(bytes32(hex"E09F80"));
225 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
226 |         revertHelper.from(bytes32(hex"E0C080"));
227 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
228 |         revertHelper.from(bytes32(hex"E0A07F"));
229 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
230 |         revertHelper.from(bytes32(hex"EDA080"));
231 |     }
232 | 
233 |     function testThreeByte__Trailing() public {
234 |         assertEq(StrChar__.from(bytes32(hex"E0A0801111")).toCodePoint(), 0x800);
235 |         assertEq(StrChar__.from(bytes32(hex"E0A08000FF")).toCodePoint(), 0x800);
236 |     }
237 | 
238 |     /*//////////////////////////////////////////////////////////////////////////
239 |                                         4 BYTES
240 |     //////////////////////////////////////////////////////////////////////////*/
241 | 
242 |     function testFourByte() public {
243 |         // it's a ~million, don't really want to loop the whole thing (takes like 15 secs),
244 |         // so just take 65k from each side 
245 |         for (uint256 i = 0x10000; i < 0x20000; i++) {
246 |             StrChar char = StrChar__.fromCodePoint(i);
247 |             assertTrue(char.isValidUtf8());
248 |             assertEq(char.len(), 4);
249 |             assertEq(char.toCodePoint(), i);
250 |         }
251 |         for (uint256 i = 0x100000; i <= 0x10FFFF; i++) {
252 |             StrChar char = StrChar__.fromCodePoint(i);
253 |             assertTrue(char.isValidUtf8());
254 |             assertEq(char.len(), 4);
255 |             assertEq(char.toCodePoint(), i);
256 |         }
257 |     }
258 | 
259 |     function testFourByte__Manual() public {
260 |         // solidity's \u doesn't work with 4-byte code points :(
261 |         assertEq(StrChar__.fromCodePoint(0x10000).toBytes32(),  unicode"𐀀");
262 |         assertEq(StrChar__.fromCodePoint(0x10000).toString(),   unicode"𐀀");
263 |         assertEq(StrChar__.fromCodePoint(0x10001).toBytes32(),  unicode"𐀁");
264 |         assertEq(StrChar__.fromCodePoint(0x10001).toString(),   unicode"𐀁");
265 |         assertEq(StrChar__.fromCodePoint(0x20000).toBytes32(),  unicode"𠀀");
266 |         assertEq(StrChar__.fromCodePoint(0x20000).toString(),   unicode"𠀀");
267 |         assertEq(StrChar__.fromCodePoint(0x34567).toBytes32(),  unicode"𴕧");
268 |         assertEq(StrChar__.fromCodePoint(0x34567).toString(),   unicode"𴕧");
269 |         assertEq(StrChar__.fromCodePoint(0xF0000).toBytes32(),  unicode"󰀀");
270 |         assertEq(StrChar__.fromCodePoint(0xF0000).toString(),   unicode"󰀀");
271 |         assertEq(StrChar__.fromCodePoint(0xFFFFF).toBytes32(),  unicode"󿿿");
272 |         assertEq(StrChar__.fromCodePoint(0xFFFFF).toString(),   unicode"󿿿");
273 |         assertEq(StrChar__.fromCodePoint(0x100000).toBytes32(), unicode"􀀀");
274 |         assertEq(StrChar__.fromCodePoint(0x100000).toString(),  unicode"􀀀");
275 |         assertEq(StrChar__.fromCodePoint(0x10FFFF).toBytes32(), unicode"􏿿");
276 |         assertEq(StrChar__.fromCodePoint(0x10FFFF).toString(),  unicode"􏿿");
277 |     }
278 | 
279 |     function testFourByte__Invalid() public {
280 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
281 |         revertHelper.from(bytes32(hex"F0000000"));
282 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
283 |         revertHelper.from(bytes32(hex"F0808080"));
284 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
285 |         revertHelper.from(bytes32(hex"FFFFFFFF"));
286 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
287 |         revertHelper.from(bytes32(hex"F08F8080"));
288 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
289 |         revertHelper.from(bytes32(hex"F0C08080"));
290 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
291 |         revertHelper.from(bytes32(hex"F17F8080"));
292 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
293 |         revertHelper.from(bytes32(hex"F4908080"));
294 |         vm.expectRevert(StrChar__InvalidUTF8.selector);
295 |         revertHelper.from(bytes32(hex"F4BF8080"));
296 |     }
297 | 
298 |     function testFourByte__Trailing() public {
299 |         assertEq(StrChar__.from(bytes32(hex"F09080801111")).toCodePoint(), 0x10000);
300 |         assertEq(StrChar__.from(bytes32(hex"F090808000FF")).toCodePoint(), 0x10000);
301 |     }
302 | 
303 |     /*//////////////////////////////////////////////////////////////////////////
304 |                                     ASCII
305 |     //////////////////////////////////////////////////////////////////////////*/
306 | 
307 |     function testIsAscii() public {
308 |         for (uint256 i; i < 0x80; i++) {
309 |             assertTrue(StrChar__.fromCodePoint(i).isAscii());
310 |         }
311 | 
312 |         for (uint256 i = 0x80; i < 0x20000; i++) {
313 |             if (0xD800 <= i && i <= 0xDFFF) {
314 |                 // skip surrogate halves
315 |                 continue;
316 |             }
317 |             assertFalse(StrChar__.fromCodePoint(i).isAscii());
318 |         }
319 |         assertFalse(StrChar__.fromCodePoint(0x10FFFF).isAscii());
320 |     }
321 | }
322 | 
323 | contract StrCharRevertHelper {
324 |     function from(bytes32 b) public pure returns (StrChar char) {
325 |         return StrChar__.from(b);
326 |     }
327 | 
328 |     function fromCodePoint(uint256 code) public pure returns (StrChar char) {
329 |         return StrChar__.fromCodePoint(code);
330 |     }
331 | }


--------------------------------------------------------------------------------
/src/StrSlice.sol:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: MIT
  2 | 
  3 | pragma solidity ^0.8.17;
  4 | 
  5 | import { Slice, Slice__, Slice__OutOfBounds } from "./Slice.sol";
  6 | import { StrChar, StrChar__ } from "./StrChar.sol";
  7 | import { StrCharsIter, StrCharsIter__ } from "./StrCharsIter.sol";
  8 | import { isValidUtf8 } from "./utils/utf8.sol";
  9 | import { memIsAscii } from "./utils/memascii.sol";
 10 | import { PackPtrLen } from "./utils/PackPtrLen.sol";
 11 | 
 12 | /**
 13 |  * @title A string slice.
 14 |  * @dev String slices must always be valid UTF-8.
 15 |  * Internally `StrSlice` uses `Slice`, adding only UTF-8 related logic on top.
 16 |  */
 17 | type StrSlice is uint256;
 18 | 
 19 | /*//////////////////////////////////////////////////////////////////////////
 20 |                                 CUSTOM ERRORS
 21 | //////////////////////////////////////////////////////////////////////////*/
 22 | 
 23 | error StrSlice__InvalidCharBoundary();
 24 | 
 25 | /*//////////////////////////////////////////////////////////////////////////
 26 |                               STATIC FUNCTIONS
 27 | //////////////////////////////////////////////////////////////////////////*/
 28 | 
 29 | library StrSlice__ {
 30 |     /**
 31 |      * @dev Converts a `string` to a `StrSlice`.
 32 |      * The string is not copied.
 33 |      * `StrSlice` points to the memory of `string`, right after the length word.
 34 |      */
 35 |     function from(string memory str) internal pure returns (StrSlice slice) {
 36 |         uint256 _ptr;
 37 |         assembly {
 38 |             _ptr := add(str, 0x20)
 39 |         }
 40 |         return fromRawParts(_ptr, bytes(str).length);
 41 |     }
 42 | 
 43 |     /**
 44 |      * @dev Creates a new `StrSlice` directly from length and memory pointer.
 45 |      * Note that the caller MUST guarantee memory-safety.
 46 |      * This method is primarily for internal use.
 47 |      */
 48 |     function fromRawParts(uint256 _ptr, uint256 _len) internal pure returns (StrSlice slice) {
 49 |         return StrSlice.wrap(Slice.unwrap(
 50 |             Slice__.fromRawParts(_ptr, _len)
 51 |         ));
 52 |     }
 53 | 
 54 |     /**
 55 |      * @dev Returns true if the byte slice starts with a valid UTF-8 character.
 56 |      * Note this does not validate the whole slice.
 57 |      */
 58 |     function isBoundaryStart(Slice slice) internal pure returns (bool) {
 59 |         bytes32 b = slice.toBytes32();
 60 |         return isValidUtf8(b) != 0;
 61 |     }
 62 | }
 63 | 
 64 | /**
 65 |  * @dev Alternative to StrSlice__.from()
 66 |  * Put this in your file (using for global is only for user-defined types):
 67 |  * ```
 68 |  * using { toSlice } for string;
 69 |  * ```
 70 |  */
 71 | function toSlice(string memory str) pure returns (StrSlice slice) {
 72 |     return StrSlice__.from(str);
 73 | }
 74 | 
 75 | /*//////////////////////////////////////////////////////////////////////////
 76 |                               GLOBAL FUNCTIONS
 77 | //////////////////////////////////////////////////////////////////////////*/
 78 | 
 79 | using {
 80 |     asSlice,
 81 |     ptr, len, isEmpty,
 82 |     // conversion
 83 |     toString,
 84 |     keccak,
 85 |     // concatenation
 86 |     add, join,
 87 |     // compare
 88 |     cmp, eq, ne, lt, lte, gt, gte,
 89 |     // index
 90 |     isCharBoundary,
 91 |     get,
 92 |     splitAt, getSubslice,
 93 |     // search
 94 |     find, rfind, contains,
 95 |     startsWith, endsWith,
 96 |     // modify
 97 |     stripPrefix, stripSuffix,
 98 |     splitOnce, rsplitOnce,
 99 |     replacen,
100 |     // iteration
101 |     chars,
102 |     // ascii
103 |     isAscii
104 | } for StrSlice global;
105 | 
106 | /**
107 |  * @dev Returns the underlying `Slice`.
108 |  * WARNING: manipulating `Slice`s can break UTF-8 for related `StrSlice`s!
109 |  */
110 | function asSlice(StrSlice self) pure returns (Slice) {
111 |     return Slice.wrap(StrSlice.unwrap(self));
112 | }
113 | 
114 | /**
115 |  * @dev Returns the pointer to the start of an in-memory string slice.
116 |  * This method is primarily for internal use.
117 |  */
118 | function ptr(StrSlice self) pure returns (uint256) {
119 |     return StrSlice.unwrap(self) >> 128;
120 | }
121 | 
122 | /**
123 |  * @dev Returns the length in bytes, not codepoints.
124 |  */
125 | function len(StrSlice self) pure returns (uint256) {
126 |     return StrSlice.unwrap(self) & PackPtrLen.MASK_LEN;
127 | }
128 | 
129 | /**
130 |  * @dev Returns true if the slice has a length of 0.
131 |  */
132 | function isEmpty(StrSlice self) pure returns (bool) {
133 |     return StrSlice.unwrap(self) & PackPtrLen.MASK_LEN == 0;
134 | }
135 | 
136 | /**
137 |  * @dev Copies `StrSlice` to a newly allocated string.
138 |  * The `StrSlice` will NOT point to the new string.
139 |  */
140 | function toString(StrSlice self) view returns (string memory) {
141 |     return string(self.asSlice().toBytes());
142 | }
143 | 
144 | /**
145 |  * @dev Returns keccak256 of all the bytes of `StrSlice`.
146 |  * Note that for any `string memory b`, keccak256(b) == b.toSlice().keccak()
147 |  * (keccak256 does not include the length byte)
148 |  */
149 | function keccak(StrSlice self) pure returns (bytes32 result) {
150 |     return self.asSlice().keccak();
151 | }
152 | 
153 | /**
154 |  * @dev Concatenates two `StrSlice`s into a newly allocated string.
155 |  */
156 | function add(StrSlice self, StrSlice other) view returns (string memory) {
157 |     return string(self.asSlice().add(other.asSlice()));
158 | }
159 | 
160 | /**
161 |  * @dev Flattens an array of `StrSlice`s into a single newly allocated string,
162 |  * placing `self` as the separator between each.
163 |  */
164 | function join(StrSlice self, StrSlice[] memory strs) view returns (string memory) {
165 |     Slice[] memory slices;
166 |     assembly {
167 |         slices := strs
168 |     }
169 |     return string(self.asSlice().join(slices));
170 | }
171 | 
172 | /**
173 |  * @dev Compare string slices lexicographically.
174 |  * @return result 0 for equal, < 0 for less than and > 0 for greater than.
175 |  */
176 | function cmp(StrSlice self, StrSlice other) pure returns (int256 result) {
177 |     return self.asSlice().cmp(other.asSlice());
178 | }
179 | 
180 | /// @dev `self` == `other`
181 | /// Note more efficient than cmp
182 | function eq(StrSlice self, StrSlice other) pure returns (bool) {
183 |     return self.asSlice().eq(other.asSlice());
184 | }
185 | 
186 | /// @dev `self` != `other`
187 | /// Note more efficient than cmp
188 | function ne(StrSlice self, StrSlice other) pure returns (bool) {
189 |     return self.asSlice().ne(other.asSlice());
190 | }
191 | 
192 | /// @dev `self` < `other`
193 | function lt(StrSlice self, StrSlice other) pure returns (bool) {
194 |     return self.cmp(other) < 0;
195 | }
196 | 
197 | /// @dev `self` <= `other`
198 | function lte(StrSlice self, StrSlice other) pure returns (bool) {
199 |     return self.cmp(other) <= 0;
200 | }
201 | 
202 | /// @dev `self` > `other`
203 | function gt(StrSlice self, StrSlice other) pure returns (bool) {
204 |     return self.cmp(other) > 0;
205 | }
206 | 
207 | /// @dev `self` >= `other`
208 | function gte(StrSlice self, StrSlice other) pure returns (bool) {
209 |     return self.cmp(other) >= 0;
210 | }
211 | 
212 | /**
213 |  * @dev Checks that `index`-th byte is safe to split on.
214 |  * The start and end of the string (when index == self.len()) are considered to be boundaries.
215 |  * Returns false if index is greater than self.len().
216 |  */
217 | function isCharBoundary(StrSlice self, uint256 index) pure returns (bool) {
218 |     if (index < self.len()) {
219 |         return isValidUtf8(self.asSlice().getAfter(index).toBytes32()) != 0;
220 |     } else if (index == self.len()) {
221 |         return true;
222 |     } else {
223 |         return false;
224 |     }
225 | }
226 | 
227 | /**
228 |  * @dev Returns the character at `index` (in bytes).
229 |  * Reverts if index is out of bounds.
230 |  */
231 | function get(StrSlice self, uint256 index) pure returns (StrChar char) {
232 |     bytes32 b = self.asSlice().getAfterStrict(index).toBytes32();
233 |     uint256 charLen = isValidUtf8(b);
234 |     if (charLen == 0) revert StrSlice__InvalidCharBoundary();
235 |     return StrChar__.fromUnchecked(b, charLen);
236 | }
237 | 
238 | /**
239 |  * @dev Divides one string slice into two at an index.
240 |  * Reverts when splitting on a non-boundary (use isCharBoundary).
241 |  */
242 | function splitAt(StrSlice self, uint256 mid) pure returns (StrSlice, StrSlice) {
243 |     (Slice lSlice, Slice rSlice) = self.asSlice().splitAt(mid);
244 |     if (!StrSlice__.isBoundaryStart(lSlice) || !StrSlice__.isBoundaryStart(rSlice)) {
245 |         revert StrSlice__InvalidCharBoundary();
246 |     }
247 |     return (
248 |         StrSlice.wrap(Slice.unwrap(lSlice)),
249 |         StrSlice.wrap(Slice.unwrap(rSlice))
250 |     );
251 | }
252 | 
253 | /**
254 |  * @dev Returns a subslice [start..end) of `self`.
255 |  * Reverts when slicing a non-boundary (use isCharBoundary).
256 |  */
257 | function getSubslice(StrSlice self, uint256 start, uint256 end) pure returns (StrSlice) {
258 |     Slice subslice = self.asSlice().getSubslice(start, end);
259 |     if (!StrSlice__.isBoundaryStart(subslice)) revert StrSlice__InvalidCharBoundary();
260 |     if (end != self.len()) {
261 |         (, Slice nextSubslice) = self.asSlice().splitAt(end);
262 |         if (!StrSlice__.isBoundaryStart(nextSubslice)) revert StrSlice__InvalidCharBoundary();
263 |     }
264 |     return StrSlice.wrap(Slice.unwrap(subslice));
265 | }
266 | 
267 | /**
268 |  * @dev Returns the byte index of the first slice of `self` that matches `pattern`.
269 |  * Returns type(uint256).max if the `pattern` does not match.
270 |  */
271 | function find(StrSlice self, StrSlice pattern) pure returns (uint256) {
272 |     return self.asSlice().find(pattern.asSlice());
273 | }
274 | 
275 | /**
276 |  * @dev Returns the byte index of the last slice of `self` that matches `pattern`.
277 |  * Returns type(uint256).max if the `pattern` does not match.
278 |  */
279 | function rfind(StrSlice self, StrSlice pattern) pure returns (uint256) {
280 |     return self.asSlice().rfind(pattern.asSlice());
281 | }
282 | 
283 | /**
284 |  * @dev Returns true if the given pattern matches a sub-slice of this string slice.
285 |  */
286 | function contains(StrSlice self, StrSlice pattern) pure returns (bool) {
287 |     return self.asSlice().contains(pattern.asSlice());
288 | }
289 | 
290 | /**
291 |  * @dev Returns true if the given pattern matches a prefix of this string slice.
292 |  */
293 | function startsWith(StrSlice self, StrSlice pattern) pure returns (bool) {
294 |     return self.asSlice().startsWith(pattern.asSlice());
295 | }
296 | 
297 | /**
298 |  * @dev Returns true if the given pattern matches a suffix of this string slice.
299 |  */
300 | function endsWith(StrSlice self, StrSlice pattern) pure returns (bool) {
301 |     return self.asSlice().endsWith(pattern.asSlice());
302 | }
303 | 
304 | /**
305 |  * @dev Returns a subslice with the prefix removed.
306 |  * If it does not start with `prefix`, returns `self` unmodified.
307 |  */
308 | function stripPrefix(StrSlice self, StrSlice pattern) pure returns (StrSlice result) {
309 |     return StrSlice.wrap(Slice.unwrap(
310 |         self.asSlice().stripPrefix(pattern.asSlice())
311 |     ));
312 | }
313 | 
314 | /**
315 |  * @dev Returns a subslice with the suffix removed.
316 |  * If it does not end with `suffix`, returns `self` unmodified.
317 |  */
318 | function stripSuffix(StrSlice self, StrSlice pattern) pure returns (StrSlice result) {
319 |     return StrSlice.wrap(Slice.unwrap(
320 |         self.asSlice().stripSuffix(pattern.asSlice())
321 |     ));
322 | }
323 | 
324 | /**
325 |  * @dev Splits a slice into 2 on the first match of `pattern`.
326 |  * If found == true, `prefix` and `suffix` will be strictly before and after the match.
327 |  * If found == false, `prefix` will be the entire string and `suffix` will be empty.
328 |  */
329 | function splitOnce(StrSlice self, StrSlice pattern)
330 |     pure
331 |     returns (bool found, StrSlice prefix, StrSlice suffix)
332 | {
333 |     uint256 index = self.asSlice().find(pattern.asSlice());
334 |     if (index == type(uint256).max) {
335 |         // not found
336 |         return (false, self, StrSlice.wrap(0));
337 |     } else {
338 |         // found
339 |         return self._splitFound(index, pattern.len());
340 |     }
341 | }
342 | 
343 | /**
344 |  * @dev Splits a slice into 2 on the last match of `pattern`.
345 |  * If found == true, `prefix` and `suffix` will be strictly before and after the match.
346 |  * If found == false, `prefix` will be empty and `suffix` will be the entire string.
347 |  */
348 | function rsplitOnce(StrSlice self, StrSlice pattern)
349 |     pure
350 |     returns (bool found, StrSlice prefix, StrSlice suffix)
351 | {
352 |     uint256 index = self.asSlice().rfind(pattern.asSlice());
353 |     if (index == type(uint256).max) {
354 |         // not found
355 |         return (false, StrSlice.wrap(0), self);
356 |     } else {
357 |         // found
358 |         return self._splitFound(index, pattern.len());
359 |     }
360 | }
361 | 
362 | /**
363 |  * *EXPERIMENTAL*
364 |  * @dev Replaces first `n` matches of a pattern with another string slice.
365 |  * Returns the result in a newly allocated string.
366 |  * Note this does not modify the string `self` is a slice of.
367 |  * WARNING: Requires 0 < pattern.len() <= to.len()
368 |  */
369 | function replacen(
370 |     StrSlice self,
371 |     StrSlice pattern,
372 |     StrSlice to,
373 |     uint256 n
374 | ) view returns (string memory str) {
375 |     uint256 patLen = pattern.len();
376 |     uint256 toLen = to.len();
377 |     // TODO dynamic string; atm length can be reduced but not increased
378 |     assert(patLen >= toLen);
379 |     assert(patLen > 0);
380 | 
381 |     str = new string(self.len());
382 |     Slice iterSlice = self.asSlice();
383 |     Slice resultSlice = Slice__.from(bytes(str));
384 | 
385 |     uint256 matchNum;
386 |     while (matchNum < n) {
387 |         uint256 index = iterSlice.find(pattern.asSlice());
388 |         // break if no more matches
389 |         if (index == type(uint256).max) break;
390 |         // copy prefix
391 |         if (index > 0) {
392 |             resultSlice
393 |                 .getBefore(index)
394 |                 .copyFromSlice(
395 |                     iterSlice.getBefore(index)
396 |                 );
397 |         }
398 | 
399 |         uint256 indexToEnd;
400 |         // TODO this is fine atm only because patLen <= toLen
401 |         unchecked {
402 |             indexToEnd = index + toLen;
403 |         }
404 | 
405 |         // copy replacement
406 |         resultSlice
407 |             .getSubslice(index, indexToEnd)
408 |             .copyFromSlice(to.asSlice());
409 | 
410 |         // advance slices past the match
411 |         iterSlice = iterSlice.getAfter(index + patLen);
412 |         resultSlice = resultSlice.getAfter(indexToEnd);
413 | 
414 |         // break if iterSlice is done
415 |         if (iterSlice.len() == 0) {
416 |             break;
417 |         }
418 |         // safe because of `while` condition
419 |         unchecked {
420 |             matchNum++;
421 |         }
422 |     }
423 | 
424 |     uint256 realLen = resultSlice.ptr() - StrSlice__.from(str).ptr();
425 |     // copy suffix
426 |     uint256 iterLen = iterSlice.len();
427 |     if (iterLen > 0) {
428 |         resultSlice
429 |             .getBefore(iterLen)
430 |             .copyFromSlice(iterSlice);
431 |         realLen += iterLen;
432 |     }
433 |     // remove extra length
434 |     if (bytes(str).length != realLen) {
435 |         // TODO atm only accepting patLen <= toLen
436 |         assert(realLen <= bytes(str).length);
437 |         /// @solidity memory-safe-assembly
438 |         assembly {
439 |             mstore(str, realLen)
440 |         }
441 |     }
442 |     return str;
443 | }
444 | 
445 | /**
446 |  * @dev Returns an character iterator over the slice.
447 |  * The iterator yields items from either side.
448 |  */
449 | function chars(StrSlice self) pure returns (StrCharsIter memory) {
450 |     return StrCharsIter(self.ptr(), self.len());
451 | }
452 | 
453 | /**
454 |  * @dev Checks if all characters are within the ASCII range.
455 |  * 
456 |  * Note this does NOT explicitly validate UTF-8.
457 |  * Whereas ASCII certainly is valid UTF-8, non-ASCII *could* be invalid UTF-8.
458 |  * Use `StrCharsIter` for explicit validation.
459 |  */
460 | function isAscii(StrSlice self) pure returns (bool) {
461 |     return memIsAscii(self.ptr(), self.len());
462 | }
463 | 
464 | /*//////////////////////////////////////////////////////////////////////////
465 |                               FILE FUNCTIONS
466 | //////////////////////////////////////////////////////////////////////////*/
467 | 
468 | using { _splitFound } for StrSlice;
469 | 
470 | /**
471 |  * @dev Splits a slice into [:index] and [index+patLen:].
472 |  * CALLER GUARANTEE: `index` < self.len()
473 |  * For internal use by split/rsplit.
474 |  *
475 |  * This is mostly just a faster alternative to `getBefore`+`getAfter`.
476 |  */
477 | function _splitFound(StrSlice self, uint256 index, uint256 patLen)
478 |     pure
479 |     returns (bool, StrSlice prefix, StrSlice suffix)
480 | {
481 |     uint256 selfPtr = self.ptr();
482 |     uint256 selfLen = self.len();
483 |     uint256 indexAfterPat;
484 |     // safe because caller guarantees index to be < selfLen
485 |     unchecked {
486 |         indexAfterPat = index + patLen;
487 |         if (indexAfterPat > selfLen) revert Slice__OutOfBounds();
488 |     }
489 |     // [:index] (inlined `getBefore`)
490 |     prefix = StrSlice.wrap(Slice.unwrap(
491 |         Slice__.fromUnchecked(selfPtr, index)
492 |     ));
493 |     // [(index+patLen):] (inlined `getAfter`)
494 |     // safe because indexAfterPat <= selfLen
495 |     unchecked {
496 |         suffix = StrSlice.wrap(Slice.unwrap(
497 |             Slice__.fromUnchecked(selfPtr + indexAfterPat, selfLen - indexAfterPat)
498 |         ));
499 |     }
500 |     return (true, prefix, suffix);
501 | }


--------------------------------------------------------------------------------
/test/Slice.t.sol:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: MIT
  2 | 
  3 | pragma solidity ^0.8.17;
  4 | 
  5 | import { PRBTest } from "@prb/test/src/PRBTest.sol";
  6 | import { SliceAssertions } from "../src/test/SliceAssertions.sol";
  7 | 
  8 | import { Slice, Slice__, toSlice } from "../src/Slice.sol";
  9 | import { Slice__OutOfBounds } from "../src/Slice.sol";
 10 | 
 11 | using { toSlice } for bytes;
 12 | 
 13 | contract SliceTest is PRBTest, SliceAssertions {
 14 |     function checkOffset(bytes memory b1, bytes memory b2, uint256 offset) internal {
 15 |         require(b2.length <= b1.length, "checkOffset expects b2.length <= b1.length");
 16 |         for (uint256 i; i < b2.length; i++) {
 17 |             assertEq(b1[offset + i], b2[i]);
 18 |         }
 19 |     }
 20 | 
 21 |     function testLen(bytes calldata _b) public {
 22 |         assertEq(_b.toSlice().len(), _b.length);
 23 |     }
 24 | 
 25 |     function testIsEmpty() public {
 26 |         assertTrue(bytes("").toSlice().isEmpty());
 27 |         assertFalse(new bytes(1).toSlice().isEmpty());
 28 |     }
 29 | 
 30 |     function testToBytes(bytes calldata _b) public {
 31 |         assertEq(_b, _b.toSlice().toBytes());
 32 |     }
 33 | 
 34 |     function testToBytes32(bytes memory _b) public {
 35 |         bytes32 b32;
 36 |         if (_b.length > 0) {
 37 |             /// @solidity memory-safe-assembly
 38 |             assembly {
 39 |                 b32 := mload(add(_b, 0x20))
 40 |             }
 41 |         }
 42 |         assertEq(b32, _b.toSlice().toBytes32());
 43 |     }
 44 | 
 45 |     function testKeccak__Eq(bytes calldata _b) public {
 46 |         bytes memory b1 = _b;
 47 |         bytes memory b2 = _b;
 48 |         
 49 |         assertEq(b1.toSlice().keccak(), b2.toSlice().keccak());
 50 |         assertEq(keccak256(b1), keccak256(b2));
 51 |         assertEq(b1.toSlice().keccak(), keccak256(b1));
 52 |     }
 53 | 
 54 |     function testKeccak__NotEq(bytes calldata _b) public {
 55 |         vm.assume(_b.length > 0);
 56 |         bytes memory b1 = _b;
 57 |         bytes memory b2 = _b;
 58 | 
 59 |         uint256 i = uint256(keccak256(abi.encode(_b, "i"))) % _b.length;
 60 |         b1[i] ^= 0x01;
 61 |         assertEq(b1.toSlice().keccak(), keccak256(b1));
 62 |         assertNotEq(b1.toSlice().keccak(), b2.toSlice().keccak());
 63 |         assertNotEq(keccak256(b1), keccak256(b2));
 64 |     }
 65 | 
 66 |     /*//////////////////////////////////////////////////////////////////////////
 67 |                                     COMPARE
 68 |     //////////////////////////////////////////////////////////////////////////*/
 69 | 
 70 |     // don't use slice assertions here, since that'd be testing them with themselves
 71 |     function testCmp() public {
 72 |         assertGt(toSlice("1").cmp(toSlice("0")),  0);
 73 |         assertEq(toSlice("1").cmp(toSlice("1")),  0);
 74 |         assertLt(toSlice("0").cmp(toSlice("1")),  0);
 75 |         assertGt(toSlice("1").cmp(toSlice("")),   0);
 76 |         assertEq(toSlice("").cmp(toSlice("")),    0);
 77 |         assertLt(toSlice("").cmp(toSlice("1")),   0);
 78 |         assertGt(toSlice("12").cmp(toSlice("1")), 0);
 79 |         assertLt(toSlice("1").cmp(toSlice("12")), 0);
 80 |     }
 81 | 
 82 |     function testCmp__Long() public {
 83 |         bytes memory b0  = "1234567890______________________________________________________0";
 84 |         bytes memory b1  = "1234567890______________________________________________________1";
 85 |         bytes memory b12 = "1234567890______________________________________________________12";
 86 |         bytes memory bn  = "1234567890______________________________________________________";
 87 | 
 88 |         assertGt(toSlice(b1).cmp(toSlice(b0)),  0);
 89 |         assertEq(toSlice(b1).cmp(toSlice(b1)),  0);
 90 |         assertLt(toSlice(b0).cmp(toSlice(b1)),  0);
 91 |         assertGt(toSlice(b1).cmp(toSlice(bn)),  0);
 92 |         assertEq(toSlice(bn).cmp(toSlice(bn)),  0);
 93 |         assertLt(toSlice(bn).cmp(toSlice(b1)),  0);
 94 |         assertGt(toSlice(b12).cmp(toSlice(b1)), 0);
 95 |         assertLt(toSlice(b1).cmp(toSlice(b12)), 0);
 96 |     }
 97 | 
 98 |     // TODO more comparison tests for specialized funcs
 99 | 
100 |     /*//////////////////////////////////////////////////////////////////////////
101 |                                         COPY
102 |     //////////////////////////////////////////////////////////////////////////*/
103 | 
104 |     function _copyFromValue(uint256 length, bytes32 value) internal pure returns (Slice slice) {
105 |         bytes memory b = new bytes(length);
106 |         slice = b.toSlice();
107 |         slice.copyFromValue(value, length);
108 |     }
109 | 
110 |     function _copyFromValueRightAligned(uint256 length, bytes32 value) internal pure returns (Slice slice) {
111 |         bytes memory b = new bytes(length);
112 |         slice = b.toSlice();
113 |         slice.copyFromValueRightAligned(value, length);
114 |     }
115 | 
116 |     function testCopyFromSlice(bytes calldata _b) public {
117 |         Slice sliceSrc = _b.toSlice();
118 | 
119 |         bytes memory bDest = new bytes(_b.length);
120 |         Slice sliceDest = bDest.toSlice();
121 |         sliceDest.copyFromSlice(sliceSrc);
122 | 
123 |         assertEq(sliceDest, sliceSrc);
124 |     }
125 | 
126 |     function testCopyFromValue__Fuzz(bytes32 value) public {
127 |         bytes memory b = new bytes(32);
128 |         Slice slice = b.toSlice();
129 | 
130 |         slice.copyFromValue(value, 32);
131 | 
132 |         assertEq(slice, abi.encodePacked(value));
133 |     }
134 | 
135 |     function testCopyFromValue__LeftAligned() public {
136 |         bytes1 v1 = "1";
137 |         assertEq(_copyFromValue(1, bytes32(v1)), abi.encodePacked(v1));
138 | 
139 |         bytes2 v2 = "22";
140 |         assertEq(_copyFromValue(2, bytes32(v2)), abi.encodePacked(v2));
141 | 
142 |         bytes16 v16 = "1234567890123456";
143 |         assertEq(_copyFromValue(16, bytes32(v16)), abi.encodePacked(v16));
144 | 
145 |         bytes25 v25 = "1234567890123456789012345";
146 |         assertEq(_copyFromValue(25, bytes32(v25)), abi.encodePacked(v25));
147 | 
148 |         bytes32 v32 = "12345678901234567890123456789012";
149 |         assertEq(_copyFromValue(32, bytes32(v32)), abi.encodePacked(v32));
150 |     }
151 | 
152 |     function testCopyFromValue__RightAligned() public {
153 |         uint8 v1 = 1;
154 |         assertEq(_copyFromValueRightAligned(1, bytes32(uint256(v1))), abi.encodePacked(v1));
155 | 
156 |         uint16 v2 = 1000;
157 |         assertEq(_copyFromValueRightAligned(2, bytes32(uint256(v2))), abi.encodePacked(v2));
158 | 
159 |         uint128 v16 = 2**15 + 1;
160 |         assertEq(_copyFromValueRightAligned(16, bytes32(uint256(v16))), abi.encodePacked(v16));
161 | 
162 |         uint200 v25 = 123;
163 |         assertEq(_copyFromValueRightAligned(25, bytes32(uint256(v25))), abi.encodePacked(v25));
164 | 
165 |         uint256 v32 = type(uint256).max;
166 |         assertEq(_copyFromValueRightAligned(32, bytes32(uint256(v32))), abi.encodePacked(v32));
167 |     }
168 | 
169 |     function testCopyFromValue__Multiple() public {
170 |         bytes memory b = new bytes(86);
171 |         Slice slice = b.toSlice();
172 | 
173 |         slice.copyFromValueRightAligned(bytes32(uint256(1)), 1);
174 |         slice = slice.getAfter(1);
175 | 
176 |         slice.copyFromValueRightAligned(bytes32(uint256(1000)), 2);
177 |         slice = slice.getAfter(2);
178 | 
179 |         slice.copyFromValue("12345678901", 11);
180 |         slice = slice.getAfter(11);
181 | 
182 |         slice.copyFromValue("12345678901234567890123456789012", 32);
183 |         slice = slice.getAfter(32);
184 | 
185 |         // address to bytes20 has an autoshift
186 |         slice.copyFromValue(bytes20(address(this)), 20);
187 |         slice = slice.getAfter(20);
188 | 
189 |         // try it without autoshift too
190 |         address addr = address(this);
191 |         bytes32 addrRaw;
192 |         assembly {
193 |             addrRaw := addr
194 |         }
195 |         slice.copyFromValueRightAligned(addrRaw, 20);
196 |         slice = slice.getAfter(20);
197 | 
198 |         assertEq(
199 |             b,
200 |             abi.encodePacked(
201 |                 uint8(1),
202 |                 uint16(1000),
203 |                 bytes11("12345678901"),
204 |                 bytes32("12345678901234567890123456789012"),
205 |                 bytes20(address(this)),
206 |                 address(this)
207 |             )
208 |         );
209 |     }
210 | 
211 |     /*//////////////////////////////////////////////////////////////////////////
212 |                                     CONCATENATION
213 |     //////////////////////////////////////////////////////////////////////////*/
214 | 
215 |     function testAdd(bytes calldata _b) public {
216 |         bytes memory b1 = _b[:_b.length / 2];
217 |         bytes memory b2 = _b[_b.length / 2:];
218 | 
219 |         assertEq(b1.toSlice().add(b2.toSlice()), _b);
220 |     }
221 | 
222 |     function testJoin__EmptySeparator(bytes calldata _b) public {
223 |         bytes memory b1 = _b[:_b.length / 2];
224 |         bytes memory b2 = _b[_b.length / 2:];
225 | 
226 |         bytes memory sep;
227 |         Slice[] memory slices = new Slice[](2);
228 |         slices[0] = b1.toSlice();
229 |         slices[1] = b2.toSlice();
230 | 
231 |         assertEq(sep.toSlice().join(slices), _b);
232 |     }
233 | 
234 |     function testJoin__RandomSeparator(bytes calldata _b) public {
235 |         bytes memory b1 = _b[:_b.length * 1/4];
236 |         bytes memory b2 = _b[_b.length * 1/4:_b.length * 2/4];
237 |         bytes memory b3 = _b[_b.length * 2/4:_b.length * 3/4];
238 |         bytes memory sep = _b[_b.length * 3/4:];
239 | 
240 |         Slice[] memory slices = new Slice[](3);
241 |         slices[0] = b1.toSlice();
242 |         slices[1] = b2.toSlice();
243 |         slices[2] = b3.toSlice();
244 | 
245 |         assertEq(sep.toSlice().join(slices), abi.encodePacked(b1, sep, b2, sep, b3));
246 |     }
247 | 
248 |     function testJoin__ArrayLen1(bytes calldata _b) public {
249 |         bytes memory b1 = _b;
250 |         bytes memory sep = hex'ABCD';
251 | 
252 |         Slice[] memory slices = new Slice[](1);
253 |         slices[0] = b1.toSlice();
254 | 
255 |         assertEq(sep.toSlice().join(slices), abi.encodePacked(b1));
256 |     }
257 | 
258 |     function testJoin__ArrayLen0() public {
259 |         bytes memory sep = hex'ABCD';
260 | 
261 |         Slice[] memory slices;
262 | 
263 |         assertEq(sep.toSlice().join(slices), '');
264 |     }
265 | 
266 |     /*//////////////////////////////////////////////////////////////////////////
267 |                                         INDEX
268 |     //////////////////////////////////////////////////////////////////////////*/
269 | 
270 |     function testGet(bytes calldata _b) public {
271 |         Slice slice = _b.toSlice();
272 |         for (uint256 i; i < _b.length; i++) {
273 |             assertEq(slice.get(i), uint8(_b[i]));
274 |         }
275 |     }
276 | 
277 |     function testGet__RevertOutOfBounds(bytes calldata _b) public {
278 |         Slice slice = _b.toSlice();
279 |         vm.expectRevert(Slice__OutOfBounds.selector);
280 |         slice.get(_b.length);
281 |     }
282 | 
283 |     function testFirstLast(bytes calldata _b) public {
284 |         vm.assume(_b.length > 0);
285 |         Slice slice = _b.toSlice();
286 |         assertEq(slice.first(), uint8(_b[0]));
287 |         assertEq(slice.last(), uint8(_b[_b.length - 1]));
288 |     }
289 | 
290 |     function testSplitAt(bytes calldata _b) public {
291 |         Slice slice = _b.toSlice();
292 |         (Slice s1, Slice s2) = slice.splitAt(_b.length / 2);
293 |         assertEq(
294 |             abi.encodePacked(
295 |                 s1.toBytes(), s2.toBytes()
296 |             ),
297 |             _b
298 |         );
299 |     }
300 | 
301 |     function testSplitAt__0(bytes calldata _b) public {
302 |         Slice slice = _b.toSlice();
303 |         (Slice s1, Slice s2) = slice.splitAt(0);
304 |         assertEq(s2.toBytes(), _b);
305 |         assertEq(s1.len(), 0);
306 |     }
307 | 
308 |     function testSplitAt__Length(bytes calldata _b) public {
309 |         Slice slice = _b.toSlice();
310 |         (Slice s1, Slice s2) = slice.splitAt(_b.length);
311 |         assertEq(s1.toBytes(), _b);
312 |         assertEq(s2.len(), 0);
313 |     }
314 | 
315 |     function testGetSubslice(bytes calldata _b) public {
316 |         // TODO fix self-referential pseudorandomness
317 |         uint256 start = _b.length == 0 ? 0 : uint256(keccak256(abi.encode(_b, "start"))) % _b.length;
318 |         uint256 end = _b.length == 0 ? 0 : uint256(keccak256(abi.encode(_b, "end"))) % _b.length;
319 |         vm.assume(start <= end);
320 |         Slice subslice = _b.toSlice().getSubslice(start, end);
321 |         assertEq(subslice.toBytes(), _b[start:end]);
322 |     }
323 | 
324 |     function testGetSubslice__RevertStartAfterEnd(bytes calldata _b) public {
325 |         // TODO fix self-referential pseudorandomness
326 |         uint256 start = _b.length == 0 ? 0 : uint256(keccak256(abi.encode(_b, "start"))) % _b.length;
327 |         uint256 end = _b.length == 0 ? 0 : uint256(keccak256(abi.encode(_b, "end"))) % _b.length;
328 |         vm.assume(start > end);
329 |         vm.expectRevert(Slice__OutOfBounds.selector);
330 |         _b.toSlice().getSubslice(start, end);
331 |     }
332 | 
333 |     function testGetBefore(bytes calldata _b) public {
334 |         Slice s1 = _b.toSlice().getBefore(_b.length / 2);
335 |         assertEq(s1, _b[:_b.length / 2]);
336 |     }
337 | 
338 |     function testGetBefore_RevertOutOfBounds() public {
339 |         bytes memory _b;
340 |         vm.expectRevert(Slice__OutOfBounds.selector);
341 |         _b.toSlice().getBefore(1);
342 |     }
343 | 
344 |     function testGetAfter(bytes calldata _b) public {
345 |         Slice s1 = _b.toSlice().getAfter(_b.length / 2);
346 |         assertEq(s1, _b[_b.length / 2:]);
347 |     }
348 | 
349 |     function testGetAfter_RevertOutOfBounds() public {
350 |         bytes memory _b;
351 |         vm.expectRevert(Slice__OutOfBounds.selector);
352 |         _b.toSlice().getAfter(1);
353 |     }
354 | 
355 |     function testGetAfterStrict(bytes calldata _b) public {
356 |         vm.assume(_b.length > 0);
357 |         Slice s1 = _b.toSlice().getAfterStrict(_b.length / 2);
358 |         assertEq(s1, _b[_b.length / 2:]);
359 |     }
360 | 
361 |     function testGetAfterStrict_RevertOutOfBounds() public {
362 |         bytes memory _b;
363 |         vm.expectRevert(Slice__OutOfBounds.selector);
364 |         _b.toSlice().getAfterStrict(0);
365 |     }
366 | 
367 |     /*//////////////////////////////////////////////////////////////////////////
368 |                                         FIND
369 |     //////////////////////////////////////////////////////////////////////////*/
370 | 
371 | 	function testFind(bytes calldata _b) public {
372 |         bytes memory b1 = _b;
373 |         bytes memory b2 = _b[_b.length / 8 : _b.length * 3 / 8];
374 |         vm.assume(b2.length > 0);
375 | 
376 |         uint256 offset = b1.toSlice().find(b2.toSlice());
377 |         // don't use assertContains here, since that'd be testing find with find itself
378 |         checkOffset(b1, b2, offset);
379 |     }
380 | 
381 |     function testFindEmpty(bytes calldata _b) public {
382 |         bytes memory b1 = _b;
383 |         bytes memory b2;
384 | 
385 |         uint256 offset = b1.toSlice().find(b2.toSlice());
386 |         assertEq(offset, 0);
387 |     }
388 | 
389 |     function testFindEmptyInEmpty() public {
390 |         bytes memory b1;
391 |         bytes memory b2;
392 | 
393 |         uint256 offset = b1.toSlice().find(b2.toSlice());
394 |         assertEq(offset, 0);
395 |     }
396 | 
397 |     function testFindNotEmptyInEmpty(bytes calldata _b) public {
398 |         vm.assume(_b.length > 0);
399 |         bytes memory b1;
400 |         bytes memory b2 = _b;
401 | 
402 |         uint256 offset = b1.toSlice().find(b2.toSlice());
403 |         assertEq(offset, type(uint256).max);
404 |     }
405 | 
406 |     /*//////////////////////////////////////////////////////////////////////////
407 |                                         RFIND
408 |     //////////////////////////////////////////////////////////////////////////*/
409 | 
410 |     function testRfind(bytes calldata _b) public {
411 |         bytes memory b1 = _b;
412 |         bytes memory b2 = _b[_b.length * 5 / 8 : _b.length * 7 / 8];
413 |         vm.assume(b2.length > 0);
414 | 
415 |         uint256 offset = b1.toSlice().rfind(b2.toSlice());
416 |         checkOffset(b1, b2, offset);
417 |     }
418 | 
419 |     function testRfindEmpty(bytes calldata _b) public {
420 |         bytes memory b1 = _b;
421 |         bytes memory b2;
422 | 
423 |         uint256 offset = b1.toSlice().rfind(b2.toSlice());
424 |         assertEq(offset, 0);
425 |     }
426 | 
427 |     function testRfindEmptyInEmpty() public {
428 |         bytes memory b1;
429 |         bytes memory b2;
430 | 
431 |         uint256 offset = b1.toSlice().rfind(b2.toSlice());
432 |         assertEq(offset, 0);
433 |     }
434 | 
435 |     function testRfindNotEmptyInEmpty(bytes calldata _b) public {
436 |         vm.assume(_b.length > 0);
437 |         bytes memory b1;
438 |         bytes memory b2 = _b;
439 | 
440 |         uint256 offset = b1.toSlice().rfind(b2.toSlice());
441 |         assertEq(offset, type(uint256).max);
442 |     }
443 | 
444 |     /*//////////////////////////////////////////////////////////////////////////
445 |                                         SEARCH
446 |     //////////////////////////////////////////////////////////////////////////*/
447 | 
448 |     function testContains(bytes calldata _b) public {
449 |         vm.assume(_b.length > 0);
450 |         bytes memory pat = _b[_b.length / 2:_b.length / 2 + 1];
451 |         assertTrue(_b.toSlice().contains(pat.toSlice()));
452 |     }
453 | 
454 |     function testContains__NotFound() public {
455 |         bytes memory _b = "123456789";
456 |         bytes memory pat = "0";
457 |         assertFalse(_b.toSlice().contains(pat.toSlice()));
458 |     }
459 | 
460 |     function testContains__EmptySelf() public {
461 |         bytes memory _b = "";
462 |         bytes memory pat = "0";
463 |         assertFalse(_b.toSlice().contains(pat.toSlice()));
464 |     }
465 | 
466 |     function testContains__EmptyPat() public {
467 |         bytes memory _b = "123456789";
468 |         bytes memory pat = "";
469 |         assertTrue(_b.toSlice().contains(pat.toSlice()));
470 |     }
471 | 
472 |     function testContains__EmptyBoth() public {
473 |         bytes memory _b = "";
474 |         bytes memory pat = "";
475 |         assertTrue(_b.toSlice().contains(pat.toSlice()));
476 |     }
477 | 
478 |     function testStartsWith(bytes calldata _b) public {
479 |         uint256 i = _b.length == 0 ? 0 : uint256(keccak256(abi.encode(_b, "i"))) % _b.length;
480 |         bytes memory pat = _b[:i];
481 |         assertTrue(_b.toSlice().startsWith(pat.toSlice()));
482 |     }
483 | 
484 |     function testStartsWith__False() public {
485 |         bytes memory _b = "123456789";
486 |         assertFalse(_b.toSlice().startsWith(bytes("2").toSlice()));
487 |         assertFalse(_b.toSlice().startsWith(bytes("9").toSlice()));
488 |     }
489 | 
490 |     function testEndsWith(bytes calldata _b) public {
491 |         uint256 i = _b.length == 0 ? 0 : uint256(keccak256(abi.encode(_b, "i"))) % _b.length;
492 |         bytes memory pat = _b[i:];
493 |         assertTrue(_b.toSlice().endsWith(pat.toSlice()));
494 |     }
495 | 
496 |     function testEndsWith__False() public {
497 |         bytes memory _b = "123456789";
498 |         assertFalse(_b.toSlice().endsWith(bytes("1").toSlice()));
499 |         assertFalse(_b.toSlice().endsWith(bytes("8").toSlice()));
500 |     }
501 | 
502 |     /*//////////////////////////////////////////////////////////////////////////
503 |                                         MODIFY
504 |     //////////////////////////////////////////////////////////////////////////*/
505 | 
506 |     function testStripPrefix() public {
507 |         bytes memory _b = "12345";
508 |         assertEq(_b.toSlice().stripPrefix(bytes("123").toSlice()),    bytes("45"));
509 |         assertEq(_b.toSlice().stripPrefix(_b.toSlice()),              bytes(""));
510 |         assertEq(_b.toSlice().stripPrefix(bytes("").toSlice()),       _b);
511 |         assertEq(_b.toSlice().stripPrefix(bytes("5").toSlice()),      _b);
512 |         assertEq(_b.toSlice().stripPrefix(bytes("123456").toSlice()), _b);
513 |     }
514 | 
515 |     function testStripPrefix__FromEmpty() public {
516 |         bytes memory _b;
517 |         assertEq(_b.toSlice().stripPrefix(bytes("1").toSlice()), _b);
518 |         assertEq(_b.toSlice().stripPrefix(bytes("").toSlice()),  _b);
519 |     }
520 | 
521 |     function testStripSuffix() public {
522 |         bytes memory _b = "12345";
523 |         assertEq(_b.toSlice().stripSuffix(bytes("345").toSlice()),    bytes("12"));
524 |         assertEq(_b.toSlice().stripSuffix(_b.toSlice()),              bytes(""));
525 |         assertEq(_b.toSlice().stripSuffix(bytes("").toSlice()),       _b);
526 |         assertEq(_b.toSlice().stripSuffix(bytes("1").toSlice()),      _b);
527 |         assertEq(_b.toSlice().stripSuffix(bytes("123456").toSlice()), _b);
528 |     }
529 | 
530 |     function testStripSuffix__FromEmpty() public {
531 |         bytes memory _b;
532 |         assertEq(_b.toSlice().stripSuffix(bytes("1").toSlice()), _b);
533 |         assertEq(_b.toSlice().stripSuffix(bytes("").toSlice()),  _b);
534 |     }
535 | }


--------------------------------------------------------------------------------
/src/Slice.sol:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: MIT
  2 | 
  3 | pragma solidity ^0.8.17;
  4 | 
  5 | import { mload8, memmove, memcmp, memeq, mstoreN, leftMask } from "./utils/mem.sol";
  6 | import { memchr, memrchr } from "./utils/memchr.sol";
  7 | import { PackPtrLen } from "./utils/PackPtrLen.sol";
  8 | 
  9 | import { SliceIter, SliceIter__ } from "./SliceIter.sol";
 10 | 
 11 | /**
 12 |  * @title A view into a contiguous sequence of 1-byte items.
 13 |  */
 14 | type Slice is uint256;
 15 | 
 16 | /*//////////////////////////////////////////////////////////////////////////
 17 |                                 CUSTOM ERRORS
 18 | //////////////////////////////////////////////////////////////////////////*/
 19 | 
 20 | error Slice__OutOfBounds();
 21 | error Slice__LengthMismatch();
 22 | 
 23 | /*//////////////////////////////////////////////////////////////////////////
 24 |                               STATIC FUNCTIONS
 25 | //////////////////////////////////////////////////////////////////////////*/
 26 | 
 27 | library Slice__ {
 28 |     /**
 29 |      * @dev Converts a `bytes` to a `Slice`.
 30 |      * The bytes are not copied.
 31 |      * `Slice` points to the memory of `bytes`, right after the length word.
 32 |      */
 33 |     function from(bytes memory b) internal pure returns (Slice slice) {
 34 |         uint256 _ptr;
 35 |         assembly {
 36 |             _ptr := add(b, 0x20)
 37 |         }
 38 |         return fromRawParts(_ptr, b.length);
 39 |     }
 40 | 
 41 |     /**
 42 |      * @dev Creates a new `Slice` directly from length and memory pointer.
 43 |      * Note that the caller MUST guarantee memory-safety.
 44 |      * This method is primarily for internal use.
 45 |      */
 46 |     function fromRawParts(uint256 _ptr, uint256 _len) internal pure returns (Slice slice) {
 47 |         return Slice.wrap(PackPtrLen.pack(_ptr, _len));
 48 |     }
 49 | 
 50 |     /**
 51 |      * @dev Like `fromRawParts`, but does NO validity checks.
 52 |      * _ptr and _len MUST fit into uint128.
 53 |      * The caller MUST guarantee memory-safety.
 54 |      * Primarily for internal use.
 55 |      */
 56 |     function fromUnchecked(uint256 _ptr, uint256 _len) internal pure returns (Slice slice) {
 57 |         return Slice.wrap(
 58 |             (_ptr << 128) | (_len & PackPtrLen.MASK_LEN)
 59 |         );
 60 |     }
 61 | }
 62 | 
 63 | /**
 64 |  * @dev Alternative to Slice__.from()
 65 |  * Put this in your file (using for global is only for user-defined types):
 66 |  * ```
 67 |  * using { toSlice } for bytes;
 68 |  * ```
 69 |  */
 70 | function toSlice(bytes memory b) pure returns (Slice slice) {
 71 |     return Slice__.from(b);
 72 | }
 73 | 
 74 | /*//////////////////////////////////////////////////////////////////////////
 75 |                               GLOBAL FUNCTIONS
 76 | //////////////////////////////////////////////////////////////////////////*/
 77 | 
 78 | using {
 79 |     ptr, len, isEmpty,
 80 |     // conversion
 81 |     toBytes, toBytes32,
 82 |     keccak,
 83 |     // concatenation
 84 |     add, join,
 85 |     // copy
 86 |     copyFromSlice, copyFromValue, copyFromValueRightAligned,
 87 |     // compare
 88 |     cmp, eq, ne, lt, lte, gt, gte,
 89 |     // index
 90 |     get, first, last,
 91 |     splitAt, getSubslice, getBefore, getAfter, getAfterStrict,
 92 |     // search
 93 |     find, rfind, contains,
 94 |     startsWith, endsWith,
 95 |     // modify
 96 |     stripPrefix, stripSuffix,
 97 |     // iteration
 98 |     iter
 99 | } for Slice global;
100 | 
101 | /**
102 |  * @dev Returns the pointer to the start of an in-memory slice.
103 |  */
104 | function ptr(Slice self) pure returns (uint256) {
105 |     return Slice.unwrap(self) >> 128;
106 | }
107 | 
108 | /**
109 |  * @dev Returns the length in bytes.
110 |  */
111 | function len(Slice self) pure returns (uint256) {
112 |     return Slice.unwrap(self) & PackPtrLen.MASK_LEN;
113 | }
114 | 
115 | /**
116 |  * @dev Returns true if the slice has a length of 0.
117 |  */
118 | function isEmpty(Slice self) pure returns (bool) {
119 |     return Slice.unwrap(self) & PackPtrLen.MASK_LEN == 0;
120 | }
121 | 
122 | /**
123 |  * @dev Copies `Slice` to a new `bytes`.
124 |  * The `Slice` will NOT point to the new `bytes`.
125 |  */
126 | function toBytes(Slice self) view returns (bytes memory b) {
127 |     b = new bytes(self.len());
128 |     uint256 bPtr;
129 |     assembly {
130 |         bPtr := add(b, 0x20)
131 |     }
132 | 
133 |     memmove(bPtr, self.ptr(), self.len());
134 |     return b;
135 | }
136 | 
137 | /**
138 |  * @dev Fills a `bytes32` (value type) with the first 32 bytes of `Slice`.
139 |  * Goes from left(MSB) to right(LSB).
140 |  * If len < 32, the leftover bytes are zeros.
141 |  */
142 | function toBytes32(Slice self) pure returns (bytes32 b) {
143 |     uint256 selfPtr = self.ptr();
144 | 
145 |     // mask removes any trailing bytes
146 |     uint256 selfLen = self.len();
147 |     uint256 mask = leftMask(selfLen);
148 | 
149 |     /// @solidity memory-safe-assembly
150 |     assembly {
151 |         b := and(mload(selfPtr), mask)
152 |     }
153 |     return b;
154 | }
155 | 
156 | /**
157 |  * @dev Returns keccak256 of all the bytes of `Slice`.
158 |  * Note that for any `bytes memory b`, keccak256(b) == b.toSlice().keccak()
159 |  * (keccak256 does not include the length byte)
160 |  */
161 | function keccak(Slice self) pure returns (bytes32 result) {
162 |     uint256 selfPtr = self.ptr();
163 |     uint256 selfLen = self.len();
164 |     /// @solidity memory-safe-assembly
165 |     assembly {
166 |         result := keccak256(selfPtr, selfLen)
167 |     }
168 | }
169 | 
170 | /**
171 |  * @dev Concatenates two `Slice`s into a newly allocated `bytes`.
172 |  */
173 | function add(Slice self, Slice other) view returns (bytes memory b) {
174 |     uint256 selfLen = self.len();
175 |     uint256 otherLen = other.len();
176 | 
177 |     b = new bytes(selfLen + otherLen);
178 |     uint256 bPtr;
179 |     assembly {
180 |         bPtr := add(b, 0x20)
181 |     }
182 | 
183 |     memmove(bPtr, self.ptr(), selfLen);
184 |     memmove(bPtr + selfLen, other.ptr(), otherLen);
185 |     return b;
186 | }
187 | 
188 | /**
189 |  * @dev Flattens an array of `Slice`s into a single newly allocated `bytes`,
190 |  * placing `self` as the separator between each.
191 |  *
192 |  * TODO this is the wrong place for this method, but there are no other places atm
193 |  * (since there's no proper chaining/reducers/anything)
194 |  */
195 | function join(Slice self, Slice[] memory slices) view returns (bytes memory b) {
196 |     uint256 slicesLen = slices.length;
197 |     if (slicesLen == 0) return "";
198 | 
199 |     uint256 selfLen = self.len();
200 |     uint256 repetitionLen;
201 |     // -1 is safe because of ==0 check earlier
202 |     unchecked {
203 |         repetitionLen = slicesLen - 1;
204 |     }
205 |     // add separator repetitions length
206 |     uint256 totalLen = selfLen * repetitionLen;
207 |     // add slices length
208 |     for (uint256 i; i < slicesLen; i++) {
209 |         totalLen += slices[i].len();
210 |     }
211 | 
212 |     b = new bytes(totalLen);
213 |     uint256 bPtr;
214 |     assembly {
215 |         bPtr := add(b, 0x20)
216 |     }
217 |     for (uint256 i; i < slicesLen; i++) {
218 |         Slice slice = slices[i];
219 |         // copy slice
220 |         memmove(bPtr, slice.ptr(), slice.len());
221 |         bPtr += slice.len();
222 |         // copy separator (skips the last cycle)
223 |         if (i < repetitionLen) {
224 |             memmove(bPtr, self.ptr(), selfLen);
225 |             bPtr += selfLen;
226 |         }
227 |     }
228 | }
229 | 
230 | /**
231 |  * @dev Copies all elements from `src` into `self`.
232 |  * The length of `src` must be the same as `self`.
233 |  */
234 | function copyFromSlice(Slice self, Slice src) view {
235 |     uint256 selfLen = self.len();
236 |     if (selfLen != src.len()) revert Slice__LengthMismatch();
237 | 
238 |     memmove(self.ptr(), src.ptr(), selfLen);
239 | }
240 | 
241 | /**
242 |  * @dev Copies `length` bytes from `value` into `self`, starting from MSB.
243 |  */
244 | function copyFromValue(Slice self, bytes32 value, uint256 length) pure {
245 |     if (length > self.len() || length > 32) {
246 |         revert Slice__OutOfBounds();
247 |     }
248 | 
249 |     mstoreN(self.ptr(), value, length);
250 | }
251 | 
252 | /**
253 |  * @dev Shifts `value` to MSB by (32 - `length`),
254 |  * then copies `length` bytes from `value` into `self`, starting from MSB.
255 |  * (this is for right-aligned values like uint32, so you don't have to shift them to MSB yourself)
256 |  */
257 | function copyFromValueRightAligned(Slice self, bytes32 value, uint256 length) pure {
258 |     if (length > self.len() || length > 32) {
259 |         revert Slice__OutOfBounds();
260 |     }
261 |     if (length < 32) {
262 |         // safe because length < 32
263 |         unchecked {
264 |             value <<= (32 - length) * 8;
265 |         }
266 |     }
267 | 
268 |     mstoreN(self.ptr(), value, length);
269 | }
270 | 
271 | /**
272 |  * @dev Compare slices lexicographically.
273 |  * @return result 0 for equal, < 0 for less than and > 0 for greater than.
274 |  */
275 | function cmp(Slice self, Slice other) pure returns (int256 result) {
276 |     uint256 selfLen = self.len();
277 |     uint256 otherLen = other.len();
278 |     uint256 minLen = selfLen;
279 |     if (otherLen < minLen) {
280 |         minLen = otherLen;
281 |     }
282 | 
283 |     result = memcmp(self.ptr(), other.ptr(), minLen);
284 |     if (result == 0) {
285 |         // the longer slice is greater than its prefix
286 |         // (lengths take only 16 bytes, so signed sub is safe)
287 |         unchecked {
288 |             return int256(selfLen) - int256(otherLen);
289 |         }
290 |     }
291 |     // if not equal, return the diff sign
292 |     return result;
293 | }
294 | 
295 | /// @dev self == other
296 | /// Note more efficient than cmp
297 | function eq(Slice self, Slice other) pure returns (bool) {
298 |     uint256 selfLen = self.len();
299 |     if (selfLen != other.len()) return false;
300 |     return memeq(self.ptr(), other.ptr(), selfLen);
301 | }
302 | 
303 | /// @dev self != other
304 | /// Note more efficient than cmp
305 | function ne(Slice self, Slice other) pure returns (bool) {
306 |     uint256 selfLen = self.len();
307 |     if (selfLen != other.len()) return true;
308 |     return !memeq(self.ptr(), other.ptr(), selfLen);
309 | }
310 | 
311 | /// @dev `self` < `other`
312 | function lt(Slice self, Slice other) pure returns (bool) {
313 |     return self.cmp(other) < 0;
314 | }
315 | 
316 | /// @dev `self` <= `other`
317 | function lte(Slice self, Slice other) pure returns (bool) {
318 |     return self.cmp(other) <= 0;
319 | }
320 | 
321 | /// @dev `self` > `other`
322 | function gt(Slice self, Slice other) pure returns (bool) {
323 |     return self.cmp(other) > 0;
324 | }
325 | 
326 | /// @dev `self` >= `other`
327 | function gte(Slice self, Slice other) pure returns (bool) {
328 |     return self.cmp(other) >= 0;
329 | }
330 | 
331 | /**
332 |  * @dev Returns the byte at `index`.
333 |  * Reverts if index is out of bounds.
334 |  */
335 | function get(Slice self, uint256 index) pure returns (uint8 item) {
336 |     if (index >= self.len()) revert Slice__OutOfBounds();
337 | 
338 |     // ptr and len are uint128 (because PackPtrLen); index < len
339 |     unchecked {
340 |         return mload8(self.ptr() + index);
341 |     }
342 | }
343 | 
344 | /**
345 |  * @dev Returns the first byte of the slice.
346 |  * Reverts if the slice is empty.
347 |  */
348 | function first(Slice self) pure returns (uint8 item) {
349 |     if (self.len() == 0) revert Slice__OutOfBounds();
350 |     return mload8(self.ptr());
351 | }
352 | 
353 | /**
354 |  * @dev Returns the last byte of the slice.
355 |  * Reverts if the slice is empty.
356 |  */
357 | function last(Slice self) pure returns (uint8 item) {
358 |     uint256 selfLen = self.len();
359 |     if (selfLen == 0) revert Slice__OutOfBounds();
360 |     // safe because selfLen > 0 (ptr+len is implicitly safe)
361 |     unchecked {
362 |         return mload8(self.ptr() + (selfLen - 1));
363 |     }
364 | }
365 | 
366 | /**
367 |  * @dev Divides one slice into two at an index.
368 |  */
369 | function splitAt(Slice self, uint256 mid) pure returns (Slice, Slice) {
370 |     uint256 selfPtr = self.ptr();
371 |     uint256 selfLen = self.len();
372 |     if (mid > selfLen) revert Slice__OutOfBounds();
373 |     return (Slice__.fromUnchecked(selfPtr, mid), Slice__.fromUnchecked(selfPtr + mid, selfLen - mid));
374 | }
375 | 
376 | /**
377 |  * @dev Returns a subslice [start:end] of `self`.
378 |  * Reverts if start/end are out of bounds.
379 |  */
380 | function getSubslice(Slice self, uint256 start, uint256 end) pure returns (Slice) {
381 |     if (!(start <= end && end <= self.len())) revert Slice__OutOfBounds();
382 |     // selfPtr + start is safe because start <= selfLen (pointers are implicitly safe)
383 |     // end - start is safe because start <= end
384 |     unchecked {
385 |         return Slice__.fromUnchecked(self.ptr() + start, end - start);
386 |     }
387 | }
388 | 
389 | /**
390 |  * @dev Returns a subslice [:index] of `self`.
391 |  * Reverts if `index` > length.
392 |  */
393 | function getBefore(Slice self, uint256 index) pure returns (Slice) {
394 |     uint256 selfLen = self.len();
395 |     if (index > selfLen) revert Slice__OutOfBounds();
396 |     return Slice__.fromUnchecked(self.ptr(), index);
397 | }
398 | 
399 | /**
400 |  * @dev Returns a subslice [index:] of `self`.
401 |  * Reverts if `index` > length.
402 |  */
403 | function getAfter(Slice self, uint256 index) pure returns (Slice) {
404 |     uint256 selfLen = self.len();
405 |     if (index > selfLen) revert Slice__OutOfBounds();
406 |     // safe because index <= selfLen (ptr+len is implicitly safe)
407 |     unchecked {
408 |         return Slice__.fromUnchecked(self.ptr() + index, selfLen - index);
409 |     }
410 | }
411 | 
412 | /**
413 |  * @dev Returns a non-zero subslice [index:] of `self`.
414 |  * Reverts if `index` >= length.
415 |  */
416 | function getAfterStrict(Slice self, uint256 index) pure returns (Slice) {
417 |     uint256 selfLen = self.len();
418 |     if (index >= selfLen) revert Slice__OutOfBounds();
419 |     // safe because index < selfLen (ptr+len is implicitly safe)
420 |     unchecked {
421 |         return Slice__.fromUnchecked(self.ptr() + index, selfLen - index);
422 |     }
423 | }
424 | 
425 | /**
426 |  * @dev Returns the byte index of the first slice of `self` that matches `pattern`.
427 |  * Returns type(uint256).max if the `pattern` does not match.
428 |  */
429 | function find(Slice self, Slice pattern) pure returns (uint256) {
430 |     // offsetLen == selfLen initially, then starts shrinking
431 |     uint256 offsetLen = self.len();
432 |     uint256 patLen = pattern.len();
433 |     if (patLen == 0) {
434 |         return 0;
435 |     } else if (offsetLen == 0 || patLen > offsetLen) {
436 |         return type(uint256).max;
437 |     }
438 | 
439 |     uint256 offsetPtr = self.ptr();
440 |     uint256 patPtr = pattern.ptr();
441 |     // low-level alternative to `first()` (safe because patLen != 0)
442 |     uint8 patFirst = mload8(patPtr);
443 | 
444 |     while (true) {
445 |         uint256 index = memchr(offsetPtr, offsetLen, patFirst);
446 |         // not found
447 |         if (index == type(uint256).max) return type(uint256).max;
448 | 
449 |         // move pointer to the found byte
450 |         // safe because index < offsetLen (ptr+len is implicitly safe)
451 |         unchecked {
452 |             offsetPtr += index;
453 |             offsetLen -= index;
454 |         }
455 |         // can't find, pattern won't fit after index
456 |         if (patLen > offsetLen) {
457 |             return type(uint256).max;
458 |         }
459 | 
460 |         if (memeq(offsetPtr, patPtr, patLen)) {
461 |             // found, return offset index
462 |             return (offsetPtr - self.ptr());
463 |         } else if (offsetLen == 1) {
464 |             // not found and this was the last character
465 |             return type(uint256).max;
466 |         } else {
467 |             // not found and can keep going;
468 |             // increment pointer, memchr shouldn't receive what it returned (otherwise infinite loop)
469 |             unchecked {
470 |                 // safe because offsetLen > 1 (see offsetLen -= index, and index < offsetLen)
471 |                 offsetPtr++;
472 |                 offsetLen--;
473 |             }
474 |         }
475 |     }
476 |     return type(uint256).max;
477 | }
478 | 
479 | /**
480 |  * @dev Returns the byte index of the last slice of `self` that matches `pattern`.
481 |  * Returns type(uint256).max if the `pattern` does not match.
482 |  */
483 | function rfind(Slice self, Slice pattern) pure returns (uint256) {
484 |     // offsetLen == selfLen initially, then starts shrinking
485 |     uint256 offsetLen = self.len();
486 |     uint256 patLen = pattern.len();
487 |     if (patLen == 0) {
488 |         return 0;
489 |     } else if (offsetLen == 0 || patLen > offsetLen) {
490 |         return type(uint256).max;
491 |     }
492 | 
493 |     uint256 selfPtr = self.ptr();
494 |     uint256 patPtr = pattern.ptr();
495 |     uint8 patLast = pattern.last();
496 |     // using indexes instead of lengths saves some gas on redundant increments/decrements
497 |     uint256 patLastIndex;
498 |     // safe because of patLen == 0 check earlier
499 |     unchecked {
500 |         patLastIndex = patLen - 1;
501 |     }
502 | 
503 |     while (true) {
504 |         uint256 endIndex = memrchr(selfPtr, offsetLen, patLast);
505 |         // not found
506 |         if (endIndex == type(uint256).max) return type(uint256).max;
507 |         // can't find, pattern won't fit after index
508 |         if (patLastIndex > endIndex) return type(uint256).max;
509 | 
510 |         // (endIndex - patLastIndex is safe because of the check just earlier)
511 |         // (selfPtr + startIndex is safe because startIndex <= endIndex < offsetLen <= selfLen)
512 |         // (ptr+len is implicitly safe)
513 |         unchecked {
514 |             // need startIndex, but memrchr returns endIndex
515 |             uint256 startIndex = endIndex - patLastIndex;
516 | 
517 |             if (memeq(selfPtr + startIndex, patPtr, patLen)) {
518 |                 // found, return index
519 |                 return startIndex;
520 |             } else if (endIndex > 0) {
521 |                 // not found and can keep going;
522 |                 // "decrement pointer", memrchr shouldn't receive what it returned
523 |                 // (index is basically a decremented length already, saves an op)
524 |                 // (I could even use 1 variable for both, but that'd be too confusing)
525 |                 offsetLen = endIndex;
526 |                 // an explicit continue is better for optimization here
527 |                 continue;
528 |             } else {
529 |                 // not found and this was the last character
530 |                 return type(uint256).max;
531 |             }
532 |         }
533 |     }
534 |     return type(uint256).max;
535 | }
536 | 
537 | /**
538 |  * @dev Returns true if the given pattern matches a sub-slice of this `bytes` slice.
539 |  */
540 | function contains(Slice self, Slice pattern) pure returns (bool) {
541 |     return self.find(pattern) != type(uint256).max;
542 | }
543 | 
544 | /**
545 |  * @dev Returns true if the given pattern matches a prefix of this slice.
546 |  */
547 | function startsWith(Slice self, Slice pattern) pure returns (bool) {
548 |     uint256 selfLen = self.len();
549 |     uint256 patLen = pattern.len();
550 |     if (selfLen < patLen) return false;
551 | 
552 |     Slice prefix = self;
553 |     // make prefix's length equal patLen
554 |     if (selfLen > patLen) {
555 |         prefix = self.getBefore(patLen);
556 |     }
557 |     return prefix.eq(pattern);
558 | }
559 | 
560 | /**
561 |  * @dev Returns true if the given pattern matches a suffix of this slice.
562 |  */
563 | function endsWith(Slice self, Slice pattern) pure returns (bool) {
564 |     uint256 selfLen = self.len();
565 |     uint256 patLen = pattern.len();
566 |     if (selfLen < patLen) return false;
567 | 
568 |     Slice suffix = self;
569 |     // make suffix's length equal patLen
570 |     if (selfLen > patLen) {
571 |         suffix = self.getAfter(selfLen - patLen);
572 |     }
573 |     return suffix.eq(pattern);
574 | }
575 | 
576 | /**
577 |  * @dev Returns a subslice with the prefix removed.
578 |  * If it does not start with `prefix`, returns `self` unmodified.
579 |  */
580 | function stripPrefix(Slice self, Slice pattern) pure returns (Slice) {
581 |     uint256 selfLen = self.len();
582 |     uint256 patLen = pattern.len();
583 |     if (patLen > selfLen) return self;
584 | 
585 |     (Slice prefix, Slice suffix) = self.splitAt(patLen);
586 | 
587 |     if (prefix.eq(pattern)) {
588 |         return suffix;
589 |     } else {
590 |         return self;
591 |     }
592 | }
593 | 
594 | /**
595 |  * @dev Returns a subslice with the suffix removed.
596 |  * If it does not end with `suffix`, returns `self` unmodified.
597 |  */
598 | function stripSuffix(Slice self, Slice pattern) pure returns (Slice) {
599 |     uint256 selfLen = self.len();
600 |     uint256 patLen = pattern.len();
601 |     if (patLen > selfLen) return self;
602 | 
603 |     uint256 index;
604 |     // safe because selfLen >= patLen
605 |     unchecked {
606 |         index = selfLen - patLen;
607 |     }
608 |     (Slice prefix, Slice suffix) = self.splitAt(index);
609 | 
610 |     if (suffix.eq(pattern)) {
611 |         return prefix;
612 |     } else {
613 |         return self;
614 |     }
615 | }
616 | 
617 | /**
618 |  * @dev Returns an iterator over the slice.
619 |  * The iterator yields items from either side.
620 |  */
621 | function iter(Slice self) pure returns (SliceIter memory) {
622 |     return SliceIter__.from(self);
623 | }


--------------------------------------------------------------------------------