├── README.rst ├── nimtorst ├── src └── strslice.nim └── strslice.nimble /README.rst: -------------------------------------------------------------------------------- 1 | strslice 2 | =========== 3 | This is an implementation of string slices that works on a common underlying 4 | string shared through a reference instead of copying parts of the string. 5 | This has the benefit of not requiring the time and memory of copying parts 6 | of the string over and over. The only thing that get's copied is the 7 | reference of the underlying string, and two new indices for the start and 8 | stop of the string slice. This means that by changing the original string, 9 | any string slice that was created from it will be updated as well. The 10 | benefit of using string slices comes when copying parts of the string to 11 | pass on, for example in a combinatorial parser. 12 | 13 | This file is automatically generated from the documentation found in 14 | strslice.nim. Use ``nim doc strslice.nim`` to get the full documentation. 15 | -------------------------------------------------------------------------------- /nimtorst: -------------------------------------------------------------------------------- 1 | (cat <(echo -e "## strslice\n## ===========\n") src/strslice.nim <(echo -e "##\n## This file is automatically generated from the documentation found in\n## strslice.nim. Use \`\`nim doc strslice.nim\`\` to get the full documentation.") | sed -n 's/^##\( \)\?//p') > README.rst 2 | -------------------------------------------------------------------------------- /src/strslice.nim: -------------------------------------------------------------------------------- 1 | ## This is an implementation of string slices that works on a common underlying 2 | ## string shared through a reference instead of copying parts of the string. 3 | ## This has the benefit of not requiring the time and memory of copying parts 4 | ## of the string over and over. The only thing that get's copied is the 5 | ## reference of the underlying string, and two new indices for the start and 6 | ## stop of the string slice. This means that by changing the original string, 7 | ## any string slice that was created from it will be updated as well. The 8 | ## benefit of using string slices comes when copying parts of the string to 9 | ## pass on, for example in a combinatorial parser. 10 | import strutils 11 | 12 | type 13 | StringSlice* = ref object 14 | str*: ref string 15 | start*: int 16 | stop*: int 17 | 18 | proc `$`*(str: StringSlice): string = 19 | ## Converts a string slice to a string 20 | if str == nil or str.str == nil: "" 21 | else: str.str[str.start .. str.stop] 22 | 23 | proc newStringSlice*(str: string): StringSlice {.noInit.} = 24 | ## Create a new string slice that references the string. This creates a new 25 | ## reference to the string, so any changes to the underlying string will be 26 | ## visible in all slices made from this string. 27 | new result 28 | new result.str 29 | result.str[] = str 30 | result.start = 0 31 | result.stop = str.len-1 32 | 33 | converter toStringSlice*(str: string): StringSlice {.noInit.} = 34 | ## Automatic converter to create a string slice from a string 35 | newStringSlice(str) 36 | 37 | proc `[]`*(str: StringSlice, 38 | slc: HSlice[int, int or BackwardsIndex]): StringSlice {.noInit.} = 39 | ## Grab a slice of a string slice. This returns a new string slice that 40 | ## references the same underlying string. 41 | if slc.a < 0: 42 | raise newException(IndexError, "index out of bounds") 43 | new result 44 | result.str = str.str 45 | result.start = str.start + slc.a 46 | when slc.b is BackwardsIndex: 47 | if slc.b.int > str.len + 1: 48 | raise newException(RangeError, "value out of range: " & 49 | $(str.len + 1 - slc.b.int)) 50 | result.stop = str.stop - slc.b.int + 1 51 | else: 52 | if slc.b + 1 < slc.a or slc.b > str.high: 53 | raise newException(IndexError, "index out of bounds") 54 | result.stop = str.start + slc.b 55 | 56 | proc high*(str: StringSlice): int = 57 | ## Get the highest index of a string slice 58 | str.stop - str.start 59 | 60 | proc len*(str: StringSlice): int = 61 | ## Get the length of a string slice 62 | str.high + 1 63 | 64 | proc `&`*(sl1, sl2: StringSlice): StringSlice {.noInit.} = 65 | ## Concatenate two string slices like the regular `&` operator does for 66 | ## strings. WARNING: This creates a new underlying string. 67 | newStringSlice($sl1 & $sl2) 68 | 69 | proc startsWith*[T: StringSlice or string](str: StringSlice, sub: T): bool = 70 | ## Compares a string slice with a string or another string slice of shorter or 71 | ## equal length. Returns true if the first string slice starts with the next. 72 | if sub.len > str.len: return false 73 | when T is StringSlice: 74 | for i in sub.start..sub.stop: 75 | if str.str[i + str.start - sub.start] != sub.str[i]: return false 76 | else: 77 | for idx, c in sub: 78 | if str.str[idx + str.start] != c: return false 79 | return true 80 | 81 | proc `==`*[T: StringSlice or string](str: StringSlice, cmp: T): bool = 82 | ## Compare a string slice to a string or another string slice. Returns true 83 | ## if they are both identical. 84 | if str.len != cmp.len: return false 85 | when T is StringSlice: 86 | for i in cmp.start..cmp.stop: 87 | if str.str[i + str.start - cmp.start] != cmp.str[i]: return false 88 | return true 89 | else: 90 | return str.startsWith(cmp) 91 | 92 | import strutils 93 | 94 | proc find*(a: SkipTable, s: StringSlice, sub: string, 95 | start: Natural = 0, last: Natural = 0): int = 96 | ## Finds a string in a string slice. Calls the similar procedure from 97 | ## ``strutils`` but with updated start and last references. 98 | result = strutils.find(a, s.str[], sub, start + s.start, last + s.start) - s.start 99 | if result < 0 or result > s.stop - sub.high: 100 | result = -1 101 | 102 | proc find*(s: StringSlice, sub: char, 103 | start: Natural = 0, last: Natural = 0): int = 104 | ## Finds a string in a string slice. Calls the similar procedure from 105 | ## ``strutils`` but with updated start and last references. 106 | result = strutils.find(s.str[], sub, start + s.start, last + s.start) - s.start 107 | if result < 0 or result > s.stop: 108 | result = -1 109 | 110 | proc find*(s: StringSlice, sub: string, 111 | start: Natural = 0, last: Natural = 0): int = 112 | ## Finds a string in a string slice. Calls the similar procedure from 113 | ## ``strutils`` but with updated start and last references. 114 | result = strutils.find(s.str[], sub, start + s.start, s.start + (if last == 0: s.stop - s.start else: last)) - s.start 115 | if result < 0 or result > s.stop - sub.high: 116 | result = -1 117 | 118 | proc find*(s: StringSlice, sub: StringSlice, 119 | start: Natural = 0, last: Natural = 0): int = 120 | ## Finds a string slice in another string slice. This should be really fast 121 | ## when both string slices are from the same base string, as it will compare 122 | ## only the indices. Otherwise it will convert the string slice to find into 123 | ## a regular string and call the normal find operation. 124 | if s.str == sub.str: 125 | if sub.start >= s.start + start and sub.stop - s.start <= s.stop - (s.start + last): 126 | sub.start - s.start 127 | else: 128 | -1 129 | else: 130 | s.find($sub, start, last) 131 | 132 | proc strip*(s: StringSlice, first = true, last = true): StringSlice {.noInit.} = 133 | ## Strips whitespace from both sides (controllable with the ``first`` and 134 | ## ``last`` arguments) of the string slice and returns a new string slice 135 | ## with the same underlying string. 136 | new result 137 | result.str = s.str 138 | result.start = s.start 139 | result.stop = s.stop 140 | if first: 141 | for i in result.start..result.stop: 142 | if not (result.str[i] in Whitespace): break 143 | result.start += 1 144 | if last: 145 | for i in countdown(result.stop, result.start): 146 | if not (result.str[i] in Whitespace): break 147 | result.stop -= 1 148 | 149 | iterator items*(a: StringSlice): char = 150 | ## Iterate over each character in a string slice 151 | for i in a.start..a.stop: 152 | yield a.str[i] 153 | 154 | when isMainModule: 155 | let 156 | s1 = "Hello world" 157 | s2 = newStringSlice("Hello world") 158 | s3 = s2[6 .. ^1] 159 | s4 = s2[2 .. ^1] 160 | 161 | assert s1.find("world") == 6 162 | assert s2.find("world") == 6 163 | assert s3.find("world") == 0 164 | echo "HERE: ", s2.find(s3) 165 | echo s2 166 | echo s3 167 | assert s2.find(s3) == 6 168 | assert s2.find(s3, last = 8) == s1.find($s3, last = 8) 169 | assert s2.find(s3, start = 8) == s1.find($s3, start = 8) 170 | assert s3.find(s4) == -1 171 | 172 | var 173 | s = "0123456789" 174 | ss = s.toStringSlice 175 | upToFour = ss[0..4] 176 | upToFive = ss[0..5] 177 | upToSix = ss[0..6] 178 | threeToFive = ss[3..5] 179 | 180 | assert s.find("123", last = 5) == ss.find("123", last = 5) 181 | assert s.find("456", last = 5) == ss.find("456", last = 5) 182 | assert s.find("789", last = 5) == s.find("789", last = 5) 183 | assert s.find("123", start = 2) == ss.find("123", start = 2) 184 | assert s.find("123", start = 2, last = 5) == ss.find("123", start = 2, last = 5) 185 | 186 | assert s.find("456") != upToFive.find("456") 187 | assert upToFive.find("456") == -1 188 | assert s.find("456") == upToSix.find("456") 189 | 190 | assert s.find("4") == threeToFive.find("4") + 3 191 | assert upToFour.find(threeToFive) == -1 192 | 193 | echo s2 == s1 194 | -------------------------------------------------------------------------------- /strslice.nimble: -------------------------------------------------------------------------------- 1 | # Package 2 | 3 | version = "0.2.1" 4 | author = "Peter Munch-Ellingsen" 5 | description = "Efficient string slices that works on a shared underlying string instead of copying" 6 | license = "MIT" 7 | srcDir = "src" 8 | 9 | # Dependencies 10 | 11 | requires "nim >= 0.18.0" 12 | --------------------------------------------------------------------------------