├── .gitignore ├── README.md └── string.zig /.gitignore: -------------------------------------------------------------------------------- 1 | zig-cache/ 2 | /build/ 3 | /build-*/ 4 | /docgen_tmp/ 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | A String struct made for Zig. 3 | 4 | Inspired by this repo: https://github.com/clownpriest/strings/ 5 | 6 | To test: 7 | ``` 8 | $ cd zig-string/ 9 | $ zig test string.zig 10 | ``` 11 | 12 | Basic Usage: 13 | ```zig 14 | const std = @import("std"); 15 | const String = @import("/some/path/string.zig").String; 16 | 17 | pub fn main() !void { 18 | var buf: [1024]u8 = undefined; 19 | var fba = std.heap.ThreadSafeFixedBufferAllocator.init(buf[0..]); 20 | var s = try String.init(&fba.allocator, "hello, world"); 21 | defer s.deinit(); 22 | var matches = try s.findSubstringIndices(&fba.allocator, "hello"); 23 | defer fba.allocator.free(matches); 24 | // Should print: 25 | // 0 26 | // hello, world 27 | for (matches) |val| { 28 | std.debug.warn("{}\n", val); 29 | } 30 | std.debug.warn("{}\n", s.toSliceConst()); 31 | } 32 | ``` 33 | -------------------------------------------------------------------------------- /string.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const mem = std.mem; 3 | const ascii = std.ascii; 4 | const Allocator = mem.Allocator; 5 | const Buffer = std.Buffer; 6 | const ArrayList = std.ArrayList; 7 | const testing = std.testing; 8 | 9 | pub const String = struct { 10 | buffer: Buffer, 11 | 12 | pub fn init(allocator: *Allocator, m: []const u8) !String { 13 | return String{ .buffer = try Buffer.init(allocator, m) }; 14 | } 15 | 16 | pub fn deinit(self: *String) void { 17 | self.buffer.deinit(); 18 | } 19 | 20 | pub fn startsWith(self: *const String, m: []const u8) bool { 21 | return self.buffer.startsWith(m); 22 | } 23 | 24 | pub fn endsWith(self: *const String, m: []const u8) bool { 25 | return self.buffer.endsWith(m); 26 | } 27 | 28 | pub fn isEmpty(self: *const String) bool { 29 | // Can't use Buffer.isNull because Buffer maintains a null byte at the 30 | // end. (e.g., string of "" in a Buffer is not null) 31 | return self.buffer.len() == 0; 32 | } 33 | 34 | pub fn len(self: *const String) usize { 35 | return self.buffer.len(); 36 | } 37 | 38 | pub fn append(self: *String, m: []const u8) !void { 39 | try self.buffer.append(m); 40 | } 41 | 42 | pub fn eql(self: *const String, m: []const u8) bool { 43 | return self.buffer.eql(m); 44 | } 45 | 46 | pub fn reverse(self: *String) void { 47 | if (self.len() <= 1) { 48 | return; 49 | } 50 | var i: usize = 0; 51 | var j: usize = self.len() - 1; 52 | while (i < j) { 53 | var temp = self.at(i); 54 | self.buffer.list.set(i, self.buffer.list.at(j)); 55 | self.buffer.list.set(j, temp); 56 | i += 1; 57 | j -= 1; 58 | } 59 | } 60 | 61 | pub fn at(self: *const String, i: usize) u8 { 62 | return self.buffer.list.at(i); 63 | } 64 | 65 | /// Caller owns the returned memory 66 | fn computeLongestPrefixSuffixArray(self: *const String, allocator: *Allocator, pattern: []const u8) ![]usize { 67 | var m = pattern.len; 68 | var lps = ArrayList(usize).init(allocator); 69 | defer lps.deinit(); 70 | var size: usize = 0; 71 | while (size < m) : (size += 1) { 72 | try lps.append(0); 73 | } 74 | // Left and right positions going through the pattern 75 | var left: usize = 0; 76 | var right: usize = 1; 77 | while (right < m) { 78 | if (pattern[right] == pattern[left]) { 79 | lps.set(right, left + 1); 80 | left += 1; 81 | right += 1; 82 | } else { 83 | if (left != 0) { 84 | left = lps.at(left - 1); 85 | } else { 86 | lps.set(right, 0); 87 | right += 1; 88 | } 89 | } 90 | } 91 | return lps.toOwnedSlice(); 92 | } 93 | 94 | /// Return an array of indices containing substring matches for a given pattern 95 | /// Uses Knuth-Morris-Pratt Algorithm for string searching 96 | /// https://en.wikipedia.org/wiki/Knuth–Morris–Pratt_algorithm 97 | /// Caller owns the returned memory 98 | pub fn findSubstringIndices(self: *const String, allocator: *Allocator, pattern: []const u8) ![]usize { 99 | var indices = ArrayList(usize).init(allocator); 100 | defer indices.deinit(); 101 | if (self.isEmpty() or pattern.len < 1 or pattern.len > self.len()) { 102 | return indices.toSlice(); 103 | } 104 | 105 | var lps = try self.computeLongestPrefixSuffixArray(allocator, pattern); 106 | defer allocator.free(lps); 107 | 108 | var str_index: usize = 0; 109 | var pat_index: usize = 0; 110 | while (str_index < self.len() and pat_index < pattern.len) { 111 | if (self.at(str_index) == pattern[pat_index]) { 112 | str_index += 1; 113 | pat_index += 1; 114 | } else { 115 | if (pat_index != 0) { 116 | pat_index = lps[pat_index - 1]; 117 | } else { 118 | str_index += 1; 119 | } 120 | } 121 | if (pat_index == pattern.len) { 122 | try indices.append(str_index - pattern.len); 123 | pat_index = 0; 124 | } 125 | } 126 | return indices.toOwnedSlice(); 127 | } 128 | 129 | pub fn contains(self: *const String, allocator: *Allocator, pattern: []const u8) !bool { 130 | var matches = try self.findSubstringIndices(allocator, pattern); 131 | defer allocator.free(matches); 132 | return matches.len > 0; 133 | } 134 | 135 | pub fn toSlice(self: *const String) []u8 { 136 | return self.buffer.toSlice(); 137 | } 138 | 139 | pub fn toSliceConst(self: *const String) []const u8 { 140 | return self.buffer.toSliceConst(); 141 | } 142 | 143 | pub fn trim(self: *String, trim_pattern: []const u8) !void { 144 | var trimmed_str = mem.trim(u8, self.toSliceConst(), trim_pattern); 145 | try self.setTrimmedStr(trimmed_str); 146 | } 147 | 148 | pub fn trimLeft(self: *String, trim_pattern: []const u8) !void { 149 | const trimmed_str = mem.trimLeft(u8, self.toSliceConst(), trim_pattern); 150 | try self.setTrimmedStr(trimmed_str); 151 | } 152 | 153 | pub fn trimRight(self: *String, trim_pattern: []const u8) !void { 154 | const trimmed_str = mem.trimRight(u8, self.toSliceConst(), trim_pattern); 155 | try self.setTrimmedStr(trimmed_str); 156 | } 157 | 158 | fn setTrimmedStr(self: *String, trimmed_str: []const u8) !void { 159 | const m = trimmed_str.len; 160 | std.debug.assert(self.len() >= m); // this should always be true 161 | for (trimmed_str) |v, i| { 162 | self.buffer.list.set(i, v); 163 | } 164 | try self.buffer.resize(m); 165 | } 166 | 167 | pub fn split(self: *const String, delimiter: []const u8) mem.SplitIterator { 168 | return mem.separate(self.toSliceConst(), delimiter); 169 | } 170 | 171 | /// Replaces all occurrences of substring `old` replaced with `new` in place 172 | pub fn replace(self: *String, allocator: *Allocator, old: []const u8, new: []const u8) !void { 173 | if (self.len() < 1 or old.len < 1) { 174 | return; 175 | } 176 | 177 | var matches = try self.findSubstringIndices(allocator, old); 178 | defer allocator.free(matches); 179 | if (matches.len < 1) { 180 | return; 181 | } 182 | var new_contents = ArrayList(u8).init(allocator); 183 | defer new_contents.deinit(); 184 | 185 | var orig_index: usize = 0; 186 | for (matches) |match_index| { 187 | while (orig_index < match_index) { 188 | try new_contents.append(self.at(orig_index)); 189 | orig_index += 1; 190 | } 191 | orig_index = match_index + old.len; 192 | for (new) |val| { 193 | try new_contents.append(val); 194 | } 195 | } 196 | // Append end of string if match does not end original string 197 | while (orig_index < self.len()) { 198 | try new_contents.append(self.at(orig_index)); 199 | orig_index += 1; 200 | } 201 | try self.buffer.replaceContents(new_contents.toSliceConst()); 202 | } 203 | 204 | pub fn count(self: *const String, allocator: *Allocator, pattern: []const u8) !usize { 205 | var matches = try self.findSubstringIndices(allocator, pattern); 206 | return matches.len; 207 | } 208 | 209 | /// Only makes ASCII characters lowercase 210 | pub fn toLower(self: *String) void { 211 | for (self.toSlice()) |*c| { 212 | c.* = ascii.toLower(c.*); 213 | } 214 | } 215 | 216 | /// Only makes ASCII characters uppercase 217 | pub fn toUpper(self: *String) void { 218 | for (self.toSlice()) |*c| { 219 | c.* = ascii.toUpper(c.*); 220 | } 221 | } 222 | 223 | pub fn ptr(self: *const String) [*]u8 { 224 | return self.buffer.ptr(); 225 | } 226 | }; 227 | 228 | test ".startsWith" { 229 | var buf: [256]u8 = undefined; 230 | const allocator = &std.heap.FixedBufferAllocator.init(&buf).allocator; 231 | var s = try String.init(allocator, "hello world"); 232 | defer s.deinit(); 233 | 234 | testing.expect(s.startsWith("hel")); 235 | } 236 | 237 | test ".endsWith" { 238 | var buf: [256]u8 = undefined; 239 | const allocator = &std.heap.FixedBufferAllocator.init(&buf).allocator; 240 | var s = try String.init(allocator, "hello world"); 241 | defer s.deinit(); 242 | 243 | testing.expect(s.endsWith("orld")); 244 | } 245 | 246 | test ".isEmpty" { 247 | var buf: [256]u8 = undefined; 248 | const allocator = &std.heap.FixedBufferAllocator.init(&buf).allocator; 249 | var s = try String.init(allocator, ""); 250 | defer s.deinit(); 251 | 252 | testing.expect(s.isEmpty()); 253 | try s.append("hello"); 254 | std.testing.expect(!s.isEmpty()); 255 | } 256 | 257 | test ".len" { 258 | var buf: [256]u8 = undefined; 259 | const allocator = &std.heap.FixedBufferAllocator.init(&buf).allocator; 260 | var s = try String.init(allocator, ""); 261 | defer s.deinit(); 262 | 263 | testing.expect(s.len() == 0); 264 | try s.append("hello"); 265 | std.testing.expect(s.len() == 5); 266 | } 267 | 268 | test ".eql" { 269 | var buf: [256]u8 = undefined; 270 | const allocator = &std.heap.FixedBufferAllocator.init(&buf).allocator; 271 | var s = try String.init(allocator, "hello world"); 272 | defer s.deinit(); 273 | 274 | testing.expect(s.eql("hello world")); 275 | } 276 | 277 | test ".reverse" { 278 | var buf: [256]u8 = undefined; 279 | const allocator = &std.heap.FixedBufferAllocator.init(&buf).allocator; 280 | var s = try String.init(allocator, ""); 281 | defer s.deinit(); 282 | 283 | s.reverse(); 284 | testing.expect(s.eql("")); 285 | 286 | try s.append("h"); 287 | s.reverse(); 288 | testing.expect(s.eql("h")); 289 | 290 | try s.append("e"); 291 | s.reverse(); 292 | testing.expect(s.eql("eh")); 293 | 294 | try s.buffer.replaceContents("hello"); 295 | s.reverse(); 296 | testing.expect(s.eql("olleh")); 297 | } 298 | 299 | test ".findSubstringIndices" { 300 | var buf: [1024]u8 = undefined; 301 | const allocator = &std.heap.FixedBufferAllocator.init(&buf).allocator; 302 | var s = try String.init(allocator, "Mississippi"); 303 | defer s.deinit(); 304 | 305 | const m1 = try s.findSubstringIndices(allocator, "i"); 306 | testing.expect(mem.eql(usize, m1, [_]usize{ 1, 4, 7, 10 })); 307 | 308 | const m2 = try s.findSubstringIndices(allocator, "iss"); 309 | testing.expect(mem.eql(usize, m2, [_]usize{ 1, 4 })); 310 | 311 | const m3 = try s.findSubstringIndices(allocator, "z"); 312 | testing.expect(mem.eql(usize, m3, [_]usize{})); 313 | 314 | const m4 = try s.findSubstringIndices(allocator, "Mississippi"); 315 | testing.expect(mem.eql(usize, m4, [_]usize{0})); 316 | 317 | var s2 = try String.init(allocator, "的中对不起我的中文不好"); 318 | defer s2.deinit(); 319 | const m5 = try s2.findSubstringIndices(allocator, "的中"); 320 | testing.expect(mem.eql(usize, m5, [_]usize{ 0, 18 })); 321 | } 322 | 323 | test ".contains" { 324 | var buf: [1024]u8 = undefined; 325 | const allocator = &std.heap.FixedBufferAllocator.init(&buf).allocator; 326 | var s = try String.init(allocator, "Mississippi"); 327 | defer s.deinit(); 328 | 329 | const m1 = try s.contains(allocator, "i"); 330 | testing.expect(m1 == true); 331 | 332 | const m2 = try s.contains(allocator, "iss"); 333 | testing.expect(m2 == true); 334 | 335 | const m3 = try s.contains(allocator, "z"); 336 | testing.expect(m3 == false); 337 | 338 | const m4 = try s.contains(allocator, "Mississippi"); 339 | testing.expect(m4 == true); 340 | } 341 | 342 | test ".toSlice" { 343 | var buf: [256]u8 = undefined; 344 | const allocator = &std.heap.FixedBufferAllocator.init(&buf).allocator; 345 | var s = try String.init(allocator, "hello world"); 346 | defer s.deinit(); 347 | testing.expect(mem.eql(u8, "hello world", s.toSlice())); 348 | } 349 | 350 | test ".toSliceConst" { 351 | var buf: [256]u8 = undefined; 352 | const allocator = &std.heap.FixedBufferAllocator.init(&buf).allocator; 353 | var s = try String.init(allocator, "hello world"); 354 | defer s.deinit(); 355 | testing.expect(mem.eql(u8, "hello world", s.toSliceConst())); 356 | } 357 | 358 | test ".trim" { 359 | var buf: [256]u8 = undefined; 360 | const allocator = &std.heap.FixedBufferAllocator.init(&buf).allocator; 361 | var s = try String.init(allocator, " foo\n "); 362 | defer s.deinit(); 363 | try s.trim(" \n"); 364 | testing.expectEqualSlices(u8, "foo", s.toSliceConst()); 365 | testing.expect(3 == s.len()); 366 | try s.trim(" \n"); 367 | testing.expectEqualSlices(u8, "foo", s.toSliceConst()); 368 | } 369 | 370 | test ".trimLeft" { 371 | var buf: [256]u8 = undefined; 372 | const allocator = &std.heap.FixedBufferAllocator.init(&buf).allocator; 373 | var s = try String.init(allocator, " foo\n "); 374 | defer s.deinit(); 375 | try s.trimLeft(" \n"); 376 | testing.expectEqualSlices(u8, "foo\n ", s.toSliceConst()); 377 | } 378 | 379 | test ".trimRight" { 380 | var buf: [256]u8 = undefined; 381 | const allocator = &std.heap.FixedBufferAllocator.init(&buf).allocator; 382 | var s = try String.init(allocator, " foo\n "); 383 | defer s.deinit(); 384 | try s.trimRight(" \n"); 385 | testing.expectEqualSlices(u8, " foo", s.toSliceConst()); 386 | } 387 | 388 | test ".split" { 389 | var buf: [256]u8 = undefined; 390 | const allocator = &std.heap.FixedBufferAllocator.init(&buf).allocator; 391 | var s = try String.init(allocator, "abc|def||ghi"); 392 | defer s.deinit(); 393 | 394 | // All of these tests are from std/mem.zig 395 | var it = s.split("|"); 396 | testing.expect(mem.eql(u8, it.next().?, "abc")); 397 | testing.expect(mem.eql(u8, it.next().?, "def")); 398 | testing.expect(mem.eql(u8, it.next().?, "")); 399 | testing.expect(mem.eql(u8, it.next().?, "ghi")); 400 | testing.expect(it.next() == null); 401 | 402 | try s.buffer.replaceContents(""); 403 | it = s.split("|"); 404 | testing.expect(mem.eql(u8, it.next().?, "")); 405 | testing.expect(it.next() == null); 406 | 407 | try s.buffer.replaceContents("|"); 408 | it = s.split("|"); 409 | testing.expect(mem.eql(u8, it.next().?, "")); 410 | testing.expect(mem.eql(u8, it.next().?, "")); 411 | testing.expect(it.next() == null); 412 | } 413 | 414 | test ".replace" { 415 | var buf: [1024]u8 = undefined; 416 | const allocator = &std.heap.FixedBufferAllocator.init(&buf).allocator; 417 | var s = try String.init(allocator, "Mississippi"); 418 | defer s.deinit(); 419 | try s.replace(allocator, "iss", "e"); 420 | testing.expectEqualSlices(u8, "Meeippi", s.toSliceConst()); 421 | 422 | try s.buffer.replaceContents("Mississippi"); 423 | try s.replace(allocator, "iss", "issi"); 424 | testing.expectEqualSlices(u8, "Missiissiippi", s.toSliceConst()); 425 | 426 | try s.buffer.replaceContents("Mississippi"); 427 | try s.replace(allocator, "i", "a"); 428 | testing.expectEqualSlices(u8, "Massassappa", s.toSliceConst()); 429 | 430 | try s.buffer.replaceContents("Mississippi"); 431 | try s.replace(allocator, "iss", ""); 432 | testing.expectEqualSlices(u8, "Mippi", s.toSliceConst()); 433 | 434 | try s.buffer.replaceContents("Mississippi"); 435 | try s.replace(allocator, s.toSliceConst(), "Foo"); 436 | testing.expectEqualSlices(u8, "Foo", s.toSliceConst()); 437 | } 438 | 439 | test ".count" { 440 | var buf: [1024]u8 = undefined; 441 | const allocator = &std.heap.FixedBufferAllocator.init(&buf).allocator; 442 | var s = try String.init(allocator, "Mississippi"); 443 | defer s.deinit(); 444 | const c1 = try s.count(allocator, "i"); 445 | testing.expect(c1 == 4); 446 | 447 | const c2 = try s.count(allocator, "M"); 448 | testing.expect(c2 == 1); 449 | 450 | const c3 = try s.count(allocator, "abc"); 451 | testing.expect(c3 == 0); 452 | 453 | const c4 = try s.count(allocator, "iss"); 454 | testing.expect(c4 == 2); 455 | } 456 | 457 | test ".toLower" { 458 | var buf: [256]u8 = undefined; 459 | const allocator = &std.heap.FixedBufferAllocator.init(&buf).allocator; 460 | var s = try String.init(allocator, "ABCDEF"); 461 | defer s.deinit(); 462 | s.toLower(); 463 | testing.expectEqualSlices(u8, "abcdef", s.toSliceConst()); 464 | 465 | try s.buffer.replaceContents("的ABcdEF中"); 466 | s.toLower(); 467 | testing.expectEqualSlices(u8, "的abcdef中", s.toSliceConst()); 468 | 469 | try s.buffer.replaceContents("AB的cd中EF"); 470 | s.toLower(); 471 | testing.expectEqualSlices(u8, "ab的cd中ef", s.toSliceConst()); 472 | } 473 | 474 | test ".toUpper" { 475 | var buf: [256]u8 = undefined; 476 | const allocator = &std.heap.FixedBufferAllocator.init(&buf).allocator; 477 | var s = try String.init(allocator, "abcdef"); 478 | defer s.deinit(); 479 | s.toUpper(); 480 | testing.expectEqualSlices(u8, "ABCDEF", s.toSliceConst()); 481 | 482 | try s.buffer.replaceContents("的abCDef中"); 483 | s.toUpper(); 484 | testing.expectEqualSlices(u8, "的ABCDEF中", s.toSliceConst()); 485 | 486 | try s.buffer.replaceContents("ab的CD中ef"); 487 | s.toUpper(); 488 | testing.expectEqualSlices(u8, "AB的CD中EF", s.toSliceConst()); 489 | } 490 | 491 | test ".ptr" { 492 | var buf: [256]u8 = undefined; 493 | const allocator = &std.heap.FixedBufferAllocator.init(&buf).allocator; 494 | var s = try String.init(allocator, "abcdef"); 495 | defer s.deinit(); 496 | testing.expect(mem.eql(u8, mem.toSliceConst(u8, s.ptr()), s.toSliceConst())); 497 | } --------------------------------------------------------------------------------