├── .gitattributes ├── .github └── workflows │ └── main.yml ├── .gitignore ├── DiffMatchPatch.zig ├── LICENSE ├── README.md ├── build.zig └── build.zig.zon /.gitattributes: -------------------------------------------------------------------------------- 1 | *.zig text=auto eol=lf 2 | *.zon text=auto eol=lf 3 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | workflow_dispatch: 11 | 12 | jobs: 13 | build: 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | zig-version: [master] 18 | os: [ubuntu-latest, macos-latest, windows-latest] 19 | include: 20 | - zig-version: "0.14.0" 21 | os: ubuntu-latest 22 | runs-on: ${{ matrix.os }} 23 | steps: 24 | - name: Checkout 25 | uses: actions/checkout@v4 26 | 27 | - name: Setup Zig 28 | uses: mlugg/setup-zig@v1 29 | with: 30 | version: ${{ matrix.zig-version }} 31 | 32 | - name: Check Formatting 33 | run: zig fmt --ast-check --check . 34 | 35 | - name: Run Tests 36 | run: zig build test --summary all 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .zig-cache 2 | zig-cache 3 | zig-out 4 | -------------------------------------------------------------------------------- /DiffMatchPatch.zig: -------------------------------------------------------------------------------- 1 | const DiffMatchPatch = @This(); 2 | 3 | const std = @import("std"); 4 | const testing = std.testing; 5 | const Allocator = std.mem.Allocator; 6 | 7 | /// DMP with default configuration options 8 | pub const default: DiffMatchPatch = .{}; 9 | 10 | pub const Diff = struct { 11 | pub const Operation = enum { 12 | insert, 13 | delete, 14 | equal, 15 | }; 16 | 17 | operation: Operation, 18 | text: []const u8, 19 | 20 | pub fn format(value: Diff, _: anytype, _: anytype, writer: anytype) !void { 21 | try writer.print("({s}, \"{s}\")", .{ 22 | switch (value.operation) { 23 | .equal => "=", 24 | .insert => "+", 25 | .delete => "-", 26 | }, 27 | value.text, 28 | }); 29 | } 30 | 31 | pub fn eql(a: Diff, b: Diff) bool { 32 | return a.operation == b.operation and std.mem.eql(u8, a.text, b.text); 33 | } 34 | 35 | test eql { 36 | const equal_a: Diff = .{ .operation = .equal, .text = "a" }; 37 | const insert_a: Diff = .{ .operation = .insert, .text = "a" }; 38 | const equal_b: Diff = .{ .operation = .equal, .text = "b" }; 39 | const delete_b: Diff = .{ .operation = .delete, .text = "b" }; 40 | 41 | try testing.expect(equal_a.eql(equal_a)); 42 | try testing.expect(!insert_a.eql(equal_a)); 43 | try testing.expect(!equal_a.eql(equal_b)); 44 | try testing.expect(!equal_a.eql(delete_b)); 45 | } 46 | }; 47 | 48 | /// Number of milliseconds to map a diff before giving up (0 for infinity). 49 | diff_timeout: u64 = 1000, 50 | /// Cost of an empty edit operation in terms of edit characters. 51 | diff_edit_cost: u16 = 4, 52 | /// Number of bytes in each string needed to trigger a line-based diff 53 | diff_check_lines_over: u64 = 100, 54 | 55 | /// At what point is no match declared (0.0 = perfection, 1.0 = very loose). 56 | match_threshold: f32 = 0.5, 57 | /// How far to search for a match (0 = exact location, 1000+ = broad match). 58 | /// A match this many characters away from the expected location will add 59 | /// 1.0 to the score (0.0 is a perfect match). 60 | match_distance: u32 = 1000, 61 | /// The number of bits in an int. 62 | match_max_bits: u16 = 32, 63 | 64 | /// When deleting a large block of text (over ~64 characters), how close 65 | /// do the contents have to be to match the expected contents. (0.0 = 66 | /// perfection, 1.0 = very loose). Note that Match_Threshold controls 67 | /// how closely the end points of a delete need to match. 68 | patch_delete_threshold: f32 = 0.5, 69 | /// Chunk size for context length. 70 | patch_margin: u16 = 4, 71 | 72 | pub const DiffError = error{OutOfMemory}; 73 | 74 | /// Find the differences between two texts. The return value 75 | /// must be freed with `deinitDiffList(allocator, &diffs)`. 76 | /// @param before Old string to be diffed. 77 | /// @param after New string to be diffed. 78 | /// @param checklines Speedup flag. If false, then don't run a 79 | /// line-level diff first to identify the changed areas. 80 | /// If true, then run a faster slightly less optimal diff. 81 | /// @return List of Diff objects. 82 | pub fn diff( 83 | dmp: DiffMatchPatch, 84 | allocator: std.mem.Allocator, 85 | before: []const u8, 86 | after: []const u8, 87 | /// If false, then don't run a line-level diff first 88 | /// to identify the changed areas. If true, then run 89 | /// a faster slightly less optimal diff. 90 | check_lines: bool, 91 | ) DiffError!DiffList { 92 | const deadline = if (dmp.diff_timeout == 0) 93 | std.math.maxInt(u64) 94 | else 95 | @as(u64, @intCast(std.time.milliTimestamp())) + dmp.diff_timeout; 96 | return dmp.diffInternal(allocator, before, after, check_lines, deadline); 97 | } 98 | 99 | const DiffList = std.ArrayListUnmanaged(Diff); 100 | 101 | /// Deinit an `std.ArrayListUnmanaged(Diff)` and the allocated slices of 102 | /// text in each `Diff`. 103 | pub fn deinitDiffList(allocator: Allocator, diffs: *DiffList) void { 104 | defer diffs.deinit(allocator); 105 | for (diffs.items) |d| { 106 | allocator.free(d.text); 107 | } 108 | } 109 | 110 | fn freeRangeDiffList( 111 | allocator: Allocator, 112 | diffs: *DiffList, 113 | start: usize, 114 | len: usize, 115 | ) void { 116 | const range = diffs.items[start..][0..len]; 117 | for (range) |d| { 118 | allocator.free(d.text); 119 | } 120 | } 121 | 122 | fn diffInternal( 123 | dmp: DiffMatchPatch, 124 | allocator: std.mem.Allocator, 125 | before: []const u8, 126 | after: []const u8, 127 | check_lines: bool, 128 | deadline: u64, 129 | ) DiffError!DiffList { 130 | // Trim off common prefix (speedup). 131 | const common_prefix_length = std.mem.indexOfDiff(u8, before, after) orelse { 132 | // equality 133 | var diffs: DiffList = .empty; 134 | errdefer deinitDiffList(allocator, &diffs); 135 | if (before.len != 0) { 136 | try diffs.ensureUnusedCapacity(allocator, 1); 137 | diffs.appendAssumeCapacity(.{ 138 | .operation = .equal, 139 | .text = try allocator.dupe(u8, before), 140 | }); 141 | } 142 | return diffs; 143 | }; 144 | 145 | const common_prefix = before[0..common_prefix_length]; 146 | var trimmed_before = before[common_prefix_length..]; 147 | var trimmed_after = after[common_prefix_length..]; 148 | 149 | // Trim off common suffix (speedup). 150 | const common_suffix_length = diffCommonSuffix(trimmed_before, trimmed_after); 151 | const common_suffix = trimmed_before[trimmed_before.len - common_suffix_length ..]; 152 | trimmed_before = trimmed_before[0 .. trimmed_before.len - common_suffix_length]; 153 | trimmed_after = trimmed_after[0 .. trimmed_after.len - common_suffix_length]; 154 | 155 | // Compute the diff on the middle block. 156 | var diffs = try dmp.diffCompute(allocator, trimmed_before, trimmed_after, check_lines, deadline); 157 | errdefer deinitDiffList(allocator, &diffs); 158 | 159 | // Restore the prefix and suffix. 160 | 161 | if (common_prefix.len != 0) { 162 | try diffs.ensureUnusedCapacity(allocator, 1); 163 | diffs.insertAssumeCapacity(0, .{ 164 | .operation = .equal, 165 | .text = try allocator.dupe(u8, common_prefix), 166 | }); 167 | } 168 | if (common_suffix.len != 0) { 169 | try diffs.ensureUnusedCapacity(allocator, 1); 170 | diffs.appendAssumeCapacity(.{ 171 | .operation = .equal, 172 | .text = try allocator.dupe(u8, common_suffix), 173 | }); 174 | } 175 | 176 | try diffCleanupMerge(allocator, &diffs); 177 | return diffs; 178 | } 179 | 180 | fn indexOfDiff(comptime T: type, a: []const T, b: []const T) ?usize { 181 | const shortest = @min(a.len, b.len); 182 | for (a[0..shortest], b[0..shortest], 0..) |a_char, b_char, index| { 183 | if (a_char != b_char) return index; 184 | } 185 | return if (a.len == b.len) null else shortest; 186 | } 187 | 188 | fn diffCommonPrefix(before: []const u8, after: []const u8) usize { 189 | return indexOfDiff(u8, before, after) orelse @min(before.len, after.len); 190 | } 191 | 192 | fn diffCommonSuffix(before: []const u8, after: []const u8) usize { 193 | const n = @min(before.len, after.len); 194 | var i: usize = 1; 195 | 196 | while (i <= n) : (i += 1) { 197 | if (before[before.len - i] != after[after.len - i]) { 198 | return i - 1; 199 | } 200 | } 201 | 202 | return n; 203 | } 204 | 205 | /// Find the differences between two texts. Assumes that the texts do not 206 | /// have any common prefix or suffix. 207 | /// @param before Old string to be diffed. 208 | /// @param after New string to be diffed. 209 | /// @param checklines Speedup flag. If false, then don't run a 210 | /// line-level diff first to identify the changed areas. 211 | /// If true, then run a faster slightly less optimal diff. 212 | /// @param deadline Time when the diff should be complete by. 213 | /// @return List of Diff objects. 214 | fn diffCompute( 215 | dmp: DiffMatchPatch, 216 | allocator: std.mem.Allocator, 217 | before: []const u8, 218 | after: []const u8, 219 | check_lines: bool, 220 | deadline: u64, 221 | ) DiffError!DiffList { 222 | if (before.len == 0) { 223 | // Just add some text (speedup). 224 | var diffs: DiffList = .empty; 225 | errdefer deinitDiffList(allocator, &diffs); 226 | try diffs.ensureUnusedCapacity(allocator, 1); 227 | diffs.appendAssumeCapacity(.{ 228 | .operation = .insert, 229 | .text = try allocator.dupe(u8, after), 230 | }); 231 | return diffs; 232 | } 233 | 234 | if (after.len == 0) { 235 | // Just delete some text (speedup). 236 | var diffs: DiffList = .empty; 237 | errdefer deinitDiffList(allocator, &diffs); 238 | try diffs.ensureUnusedCapacity(allocator, 1); 239 | diffs.appendAssumeCapacity(.{ 240 | .operation = .delete, 241 | .text = try allocator.dupe(u8, before), 242 | }); 243 | return diffs; 244 | } 245 | 246 | const long_text = if (before.len > after.len) before else after; 247 | const short_text = if (before.len > after.len) after else before; 248 | 249 | if (std.mem.indexOf(u8, long_text, short_text)) |index| { 250 | var diffs: DiffList = .empty; 251 | errdefer deinitDiffList(allocator, &diffs); 252 | // Shorter text is inside the longer text (speedup). 253 | const op: Diff.Operation = if (before.len > after.len) 254 | .delete 255 | else 256 | .insert; 257 | try diffs.ensureUnusedCapacity(allocator, 3); 258 | diffs.appendAssumeCapacity(.{ 259 | .operation = op, 260 | .text = try allocator.dupe(u8, long_text[0..index]), 261 | }); 262 | diffs.appendAssumeCapacity(.{ 263 | .operation = .equal, 264 | .text = try allocator.dupe(u8, short_text), 265 | }); 266 | diffs.appendAssumeCapacity(.{ 267 | .operation = op, 268 | .text = try allocator.dupe(u8, long_text[index + short_text.len ..]), 269 | }); 270 | return diffs; 271 | } 272 | 273 | if (short_text.len == 1) { 274 | // Single character string. 275 | // After the previous speedup, the character can't be an equality. 276 | var diffs: DiffList = .empty; 277 | errdefer deinitDiffList(allocator, &diffs); 278 | try diffs.ensureUnusedCapacity(allocator, 2); 279 | diffs.appendAssumeCapacity(.{ 280 | .operation = .delete, 281 | .text = try allocator.dupe(u8, before), 282 | }); 283 | diffs.appendAssumeCapacity(.{ 284 | .operation = .insert, 285 | .text = try allocator.dupe(u8, after), 286 | }); 287 | return diffs; 288 | } 289 | 290 | // Check to see if the problem can be split in two. 291 | if (try dmp.diffHalfMatch(allocator, before, after)) |half_match| { 292 | // A half-match was found, sort out the return data. 293 | defer half_match.deinit(allocator); 294 | // Send both pairs off for separate processing. 295 | var diffs = try dmp.diffInternal( 296 | allocator, 297 | half_match.prefix_before, 298 | half_match.prefix_after, 299 | check_lines, 300 | deadline, 301 | ); 302 | errdefer deinitDiffList(allocator, &diffs); 303 | var diffs_b = try dmp.diffInternal( 304 | allocator, 305 | half_match.suffix_before, 306 | half_match.suffix_after, 307 | check_lines, 308 | deadline, 309 | ); 310 | defer diffs_b.deinit(allocator); 311 | // we have to deinit regardless, so deinitDiffList would be 312 | // a double free: 313 | errdefer { 314 | for (diffs_b.items) |d| { 315 | allocator.free(d.text); 316 | } 317 | } 318 | 319 | // Merge the results. 320 | try diffs.ensureUnusedCapacity(allocator, 1); 321 | diffs.appendAssumeCapacity(.{ 322 | .operation = .equal, 323 | .text = try allocator.dupe(u8, half_match.common_middle), 324 | }); 325 | try diffs.appendSlice(allocator, diffs_b.items); 326 | return diffs; 327 | } 328 | if (check_lines and before.len > dmp.diff_check_lines_over and after.len > dmp.diff_check_lines_over) { 329 | return dmp.diffLineMode(allocator, before, after, deadline); 330 | } 331 | 332 | return dmp.diffBisect(allocator, before, after, deadline); 333 | } 334 | 335 | const HalfMatchResult = struct { 336 | prefix_before: []const u8, 337 | suffix_before: []const u8, 338 | prefix_after: []const u8, 339 | suffix_after: []const u8, 340 | common_middle: []const u8, 341 | 342 | pub fn deinit(hmr: HalfMatchResult, alloc: Allocator) void { 343 | alloc.free(hmr.prefix_before); 344 | alloc.free(hmr.suffix_before); 345 | alloc.free(hmr.prefix_after); 346 | alloc.free(hmr.suffix_after); 347 | alloc.free(hmr.common_middle); 348 | } 349 | }; 350 | 351 | /// Do the two texts share a Substring which is at least half the length of 352 | /// the longer text? 353 | /// This speedup can produce non-minimal diffs. 354 | /// @param before First string. 355 | /// @param after Second string. 356 | /// @return Five element String array, containing the prefix of text1, the 357 | /// suffix of text1, the prefix of text2, the suffix of text2 and the 358 | /// common middle. Or null if there was no match. 359 | fn diffHalfMatch( 360 | dmp: DiffMatchPatch, 361 | allocator: std.mem.Allocator, 362 | before: []const u8, 363 | after: []const u8, 364 | ) DiffError!?HalfMatchResult { 365 | if (dmp.diff_timeout <= 0) { 366 | // Don't risk returning a non-optimal diff if we have unlimited time. 367 | return null; 368 | } 369 | const long_text = if (before.len > after.len) before else after; 370 | const short_text = if (before.len > after.len) after else before; 371 | 372 | if (long_text.len < 4 or short_text.len * 2 < long_text.len) { 373 | return null; // Pointless. 374 | } 375 | 376 | // First check if the second quarter is the seed for a half-match. 377 | const half_match_1 = try dmp.diffHalfMatchInternal(allocator, long_text, short_text, (long_text.len + 3) / 4); 378 | errdefer { 379 | if (half_match_1) |h_m| h_m.deinit(allocator); 380 | } 381 | // Check again based on the third quarter. 382 | const half_match_2 = try dmp.diffHalfMatchInternal(allocator, long_text, short_text, (long_text.len + 1) / 2); 383 | errdefer { 384 | if (half_match_2) |h_m| h_m.deinit(allocator); 385 | } 386 | 387 | var half_match: ?HalfMatchResult = null; 388 | if (half_match_1 == null and half_match_2 == null) { 389 | return null; 390 | } else if (half_match_2 == null) { 391 | half_match = half_match_1.?; 392 | } else if (half_match_1 == null) { 393 | half_match = half_match_2.?; 394 | } else { 395 | // Both matched. Select the longest. 396 | half_match = half: { 397 | if (half_match_1.?.common_middle.len > half_match_2.?.common_middle.len) { 398 | half_match_2.?.deinit(allocator); 399 | break :half half_match_1; 400 | } else { 401 | half_match_1.?.deinit(allocator); 402 | break :half half_match_2; 403 | } 404 | }; 405 | } 406 | 407 | // A half-match was found, sort out the return data. 408 | if (before.len > after.len) { 409 | return half_match.?; 410 | } else { 411 | // Transfers ownership of all memory to new, permuted, half_match. 412 | const half_match_yes = half_match.?; 413 | return .{ 414 | .prefix_before = half_match_yes.prefix_after, 415 | .suffix_before = half_match_yes.suffix_after, 416 | .prefix_after = half_match_yes.prefix_before, 417 | .suffix_after = half_match_yes.suffix_before, 418 | .common_middle = half_match_yes.common_middle, 419 | }; 420 | } 421 | } 422 | 423 | /// Does a Substring of shorttext exist within longtext such that the 424 | /// Substring is at least half the length of longtext? 425 | /// @param longtext Longer string. 426 | /// @param shorttext Shorter string. 427 | /// @param i Start index of quarter length Substring within longtext. 428 | /// @return Five element string array, containing the prefix of longtext, the 429 | /// suffix of longtext, the prefix of shorttext, the suffix of shorttext 430 | /// and the common middle. Or null if there was no match. 431 | fn diffHalfMatchInternal( 432 | _: DiffMatchPatch, 433 | allocator: std.mem.Allocator, 434 | long_text: []const u8, 435 | short_text: []const u8, 436 | i: usize, 437 | ) DiffError!?HalfMatchResult { 438 | // Start with a 1/4 length Substring at position i as a seed. 439 | const seed = long_text[i .. i + long_text.len / 4]; 440 | var j: isize = -1; 441 | 442 | var best_common: std.ArrayListUnmanaged(u8) = .empty; 443 | defer best_common.deinit(allocator); 444 | var best_long_text_a: []const u8 = ""; 445 | var best_long_text_b: []const u8 = ""; 446 | var best_short_text_a: []const u8 = ""; 447 | var best_short_text_b: []const u8 = ""; 448 | 449 | while (j < short_text.len and b: { 450 | j = @as(isize, @intCast(std.mem.indexOf(u8, short_text[@intCast(j + 1)..], seed) orelse break :b false)) + j + 1; 451 | break :b true; 452 | }) { 453 | const prefix_length = diffCommonPrefix(long_text[i..], short_text[@intCast(j)..]); 454 | const suffix_length = diffCommonSuffix(long_text[0..i], short_text[0..@intCast(j)]); 455 | if (best_common.items.len < suffix_length + prefix_length) { 456 | best_common.clearRetainingCapacity(); 457 | const a = short_text[@intCast(j - @as(isize, @intCast(suffix_length))) .. @as(usize, @intCast(j - @as(isize, @intCast(suffix_length)))) + suffix_length]; 458 | try best_common.appendSlice(allocator, a); 459 | const b = short_text[@intCast(j) .. @as(usize, @intCast(j)) + prefix_length]; 460 | try best_common.appendSlice(allocator, b); 461 | 462 | best_long_text_a = long_text[0 .. i - suffix_length]; 463 | best_long_text_b = long_text[i + prefix_length ..]; 464 | best_short_text_a = short_text[0..@intCast(j - @as(isize, @intCast(suffix_length)))]; 465 | best_short_text_b = short_text[@intCast(j + @as(isize, @intCast(prefix_length)))..]; 466 | } 467 | } 468 | if (best_common.items.len * 2 >= long_text.len) { 469 | const prefix_before = try allocator.dupe(u8, best_long_text_a); 470 | errdefer allocator.free(prefix_before); 471 | const suffix_before = try allocator.dupe(u8, best_long_text_b); 472 | errdefer allocator.free(suffix_before); 473 | const prefix_after = try allocator.dupe(u8, best_short_text_a); 474 | errdefer allocator.free(prefix_after); 475 | const suffix_after = try allocator.dupe(u8, best_short_text_b); 476 | errdefer allocator.free(suffix_after); 477 | const best_common_text = try best_common.toOwnedSlice(allocator); 478 | errdefer allocator.free(best_common_text); 479 | return .{ 480 | .prefix_before = prefix_before, 481 | .suffix_before = suffix_before, 482 | .prefix_after = prefix_after, 483 | .suffix_after = suffix_after, 484 | .common_middle = best_common_text, 485 | }; 486 | } else { 487 | return null; 488 | } 489 | } 490 | 491 | /// Find the 'middle snake' of a diff, split the problem in two 492 | /// and return the recursively constructed diff. 493 | /// See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. 494 | /// @param before Old string to be diffed. 495 | /// @param after New string to be diffed. 496 | /// @param deadline Time at which to bail if not yet complete. 497 | /// @return List of Diff objects. 498 | fn diffBisect( 499 | dmp: DiffMatchPatch, 500 | allocator: std.mem.Allocator, 501 | before: []const u8, 502 | after: []const u8, 503 | deadline: u64, 504 | ) DiffError!DiffList { 505 | const before_length: isize = @intCast(before.len); 506 | const after_length: isize = @intCast(after.len); 507 | const max_d: isize = @intCast((before.len + after.len + 1) / 2); 508 | const v_offset = max_d; 509 | const v_length = 2 * max_d; 510 | 511 | var v1: std.ArrayListUnmanaged(isize) = try .initCapacity(allocator, @intCast(v_length)); 512 | defer v1.deinit(allocator); 513 | v1.items.len = @intCast(v_length); 514 | var v2: std.ArrayListUnmanaged(isize) = try .initCapacity(allocator, @intCast(v_length)); 515 | defer v2.deinit(allocator); 516 | v2.items.len = @intCast(v_length); 517 | 518 | var x: usize = 0; 519 | while (x < v_length) : (x += 1) { 520 | v1.items[x] = -1; 521 | v2.items[x] = -1; 522 | } 523 | v1.items[@intCast(v_offset + 1)] = 0; 524 | v2.items[@intCast(v_offset + 1)] = 0; 525 | const delta = before_length - after_length; 526 | // If the total number of characters is odd, then the front path will 527 | // collide with the reverse path. 528 | const front = (@mod(delta, 2) != 0); 529 | // Offsets for start and end of k loop. 530 | // Prevents mapping of space beyond the grid. 531 | var k1start: isize = 0; 532 | var k1end: isize = 0; 533 | var k2start: isize = 0; 534 | var k2end: isize = 0; 535 | 536 | var d: isize = 0; 537 | while (d < max_d) : (d += 1) { 538 | // Bail out if deadline is reached. 539 | if (@as(u64, @intCast(std.time.milliTimestamp())) > deadline) { 540 | break; 541 | } 542 | 543 | // Walk the front path one step. 544 | var k1 = -d + k1start; 545 | while (k1 <= d - k1end) : (k1 += 2) { 546 | const k1_offset = v_offset + k1; 547 | var x1: isize = 0; 548 | if (k1 == -d or (k1 != d and 549 | v1.items[@intCast(k1_offset - 1)] < v1.items[@intCast(k1_offset + 1)])) 550 | { 551 | x1 = v1.items[@intCast(k1_offset + 1)]; 552 | } else { 553 | x1 = v1.items[@intCast(k1_offset - 1)] + 1; 554 | } 555 | var y1 = x1 - k1; 556 | while (x1 < before_length and 557 | y1 < after_length and before[@intCast(x1)] == after[@intCast(y1)]) 558 | { 559 | x1 += 1; 560 | y1 += 1; 561 | } 562 | v1.items[@intCast(k1_offset)] = x1; 563 | if (x1 > before_length) { 564 | // Ran off the right of the graph. 565 | k1end += 2; 566 | } else if (y1 > after_length) { 567 | // Ran off the bottom of the graph. 568 | k1start += 2; 569 | } else if (front) { 570 | const k2_offset = v_offset + delta - k1; 571 | if (k2_offset >= 0 and k2_offset < v_length and v2.items[@intCast(k2_offset)] != -1) { 572 | // Mirror x2 onto top-left coordinate system. 573 | const x2 = before_length - v2.items[@intCast(k2_offset)]; 574 | if (x1 >= x2) { 575 | // Overlap detected. 576 | return dmp.diffBisectSplit(allocator, before, after, x1, y1, deadline); 577 | } 578 | } 579 | } 580 | } 581 | 582 | // Walk the reverse path one step. 583 | var k2: isize = -d + k2start; 584 | while (k2 <= d - k2end) : (k2 += 2) { 585 | const k2_offset = v_offset + k2; 586 | var x2: isize = 0; 587 | if (k2 == -d or (k2 != d and 588 | v2.items[@intCast(k2_offset - 1)] < v2.items[@intCast(k2_offset + 1)])) 589 | { 590 | x2 = v2.items[@intCast(k2_offset + 1)]; 591 | } else { 592 | x2 = v2.items[@intCast(k2_offset - 1)] + 1; 593 | } 594 | var y2: isize = x2 - k2; 595 | while (x2 < before_length and y2 < after_length and 596 | before[@intCast(before_length - x2 - 1)] == 597 | after[@intCast(after_length - y2 - 1)]) 598 | { 599 | x2 += 1; 600 | y2 += 1; 601 | } 602 | v2.items[@intCast(k2_offset)] = x2; 603 | if (x2 > before_length) { 604 | // Ran off the left of the graph. 605 | k2end += 2; 606 | } else if (y2 > after_length) { 607 | // Ran off the top of the graph. 608 | k2start += 2; 609 | } else if (!front) { 610 | const k1_offset = v_offset + delta - k2; 611 | if (k1_offset >= 0 and k1_offset < v_length and v1.items[@intCast(k1_offset)] != -1) { 612 | const x1 = v1.items[@intCast(k1_offset)]; 613 | const y1 = v_offset + x1 - k1_offset; 614 | // Mirror x2 onto top-left coordinate system. 615 | x2 = before_length - v2.items[@intCast(k2_offset)]; 616 | if (x1 >= x2) { 617 | // Overlap detected. 618 | return dmp.diffBisectSplit(allocator, before, after, x1, y1, deadline); 619 | } 620 | } 621 | } 622 | } 623 | } 624 | // Diff took too long and hit the deadline or 625 | // number of diffs equals number of characters, no commonality at all. 626 | var diffs: DiffList = .empty; 627 | errdefer deinitDiffList(allocator, &diffs); 628 | try diffs.ensureUnusedCapacity(allocator, 2); 629 | diffs.appendAssumeCapacity(.{ 630 | .operation = .delete, 631 | .text = try allocator.dupe(u8, before), 632 | }); 633 | diffs.appendAssumeCapacity(.{ 634 | .operation = .insert, 635 | .text = try allocator.dupe(u8, after), 636 | }); 637 | return diffs; 638 | } 639 | 640 | /// Given the location of the 'middle snake', split the diff in two parts 641 | /// and recurse. 642 | /// @param text1 Old string to be diffed. 643 | /// @param text2 New string to be diffed. 644 | /// @param x Index of split point in text1. 645 | /// @param y Index of split point in text2. 646 | /// @param deadline Time at which to bail if not yet complete. 647 | /// @return LinkedList of Diff objects. 648 | fn diffBisectSplit( 649 | dmp: DiffMatchPatch, 650 | allocator: std.mem.Allocator, 651 | text1: []const u8, 652 | text2: []const u8, 653 | x: isize, 654 | y: isize, 655 | deadline: u64, 656 | ) DiffError!DiffList { 657 | const text1a = text1[0..@intCast(x)]; 658 | const text2a = text2[0..@intCast(y)]; 659 | const text1b = text1[@intCast(x)..]; 660 | const text2b = text2[@intCast(y)..]; 661 | 662 | // Compute both diffs serially. 663 | var diffs = try dmp.diffInternal(allocator, text1a, text2a, false, deadline); 664 | errdefer deinitDiffList(allocator, &diffs); 665 | var diffs_b = try dmp.diffInternal(allocator, text1b, text2b, false, deadline); 666 | // Free the list, but not the contents: 667 | defer diffs_b.deinit(allocator); 668 | errdefer { 669 | for (diffs_b.items) |d| { 670 | allocator.free(d.text); 671 | } 672 | } 673 | try diffs.appendSlice(allocator, diffs_b.items); 674 | return diffs; 675 | } 676 | 677 | /// Do a quick line-level diff on both strings, then rediff the parts for 678 | /// greater accuracy. 679 | /// This speedup can produce non-minimal diffs. 680 | /// @param text1 Old string to be diffed. 681 | /// @param text2 New string to be diffed. 682 | /// @param deadline Time when the diff should be complete by. 683 | /// @return List of Diff objects. 684 | fn diffLineMode( 685 | dmp: DiffMatchPatch, 686 | allocator: std.mem.Allocator, 687 | text1_in: []const u8, 688 | text2_in: []const u8, 689 | deadline: u64, 690 | ) DiffError!DiffList { 691 | // Scan the text on a line-by-line basis first. 692 | var a = try diffLinesToChars(allocator, text1_in, text2_in); 693 | defer a.deinit(allocator); 694 | const text1 = a.chars_1; 695 | const text2 = a.chars_2; 696 | const line_array = a.line_array; 697 | var diffs: DiffList = undefined; 698 | { 699 | var char_diffs: DiffList = try dmp.diffInternal(allocator, text1, text2, false, deadline); 700 | defer deinitDiffList(allocator, &char_diffs); 701 | // Convert the diff back to original text. 702 | diffs = try diffCharsToLines(allocator, &char_diffs, line_array.items); 703 | // Eliminate freak matches (e.g. blank lines) 704 | } 705 | errdefer deinitDiffList(allocator, &diffs); 706 | try diffCleanupSemantic(allocator, &diffs); 707 | 708 | // Rediff any replacement blocks, this time character-by-character. 709 | // Add a dummy entry at the end. 710 | try diffs.append(allocator, .{ .operation = .equal, .text = "" }); 711 | 712 | var pointer: usize = 0; 713 | var count_delete: usize = 0; 714 | var count_insert: usize = 0; 715 | var text_delete: std.ArrayListUnmanaged(u8) = .empty; 716 | var text_insert: std.ArrayListUnmanaged(u8) = .empty; 717 | defer { 718 | text_delete.deinit(allocator); 719 | text_insert.deinit(allocator); 720 | } 721 | 722 | while (pointer < diffs.items.len) { 723 | switch (diffs.items[pointer].operation) { 724 | .insert => { 725 | count_insert += 1; 726 | try text_insert.appendSlice(allocator, diffs.items[pointer].text); 727 | }, 728 | .delete => { 729 | count_delete += 1; 730 | try text_delete.appendSlice(allocator, diffs.items[pointer].text); 731 | }, 732 | .equal => { 733 | // Upon reaching an equality, check for prior redundancies. 734 | if (count_delete >= 1 and count_insert >= 1) { 735 | // Delete the offending records and add the merged ones. 736 | freeRangeDiffList( 737 | allocator, 738 | &diffs, 739 | pointer - count_delete - count_insert, 740 | count_delete + count_insert, 741 | ); 742 | diffs.replaceRangeAssumeCapacity( 743 | pointer - count_delete - count_insert, 744 | count_delete + count_insert, 745 | &.{}, 746 | ); 747 | pointer = pointer - count_delete - count_insert; 748 | var sub_diff = try dmp.diffInternal(allocator, text_delete.items, text_insert.items, false, deadline); 749 | { 750 | errdefer deinitDiffList(allocator, &sub_diff); 751 | try diffs.ensureUnusedCapacity(allocator, sub_diff.items.len); 752 | } 753 | defer sub_diff.deinit(allocator); 754 | const new_diff = diffs.addManyAtAssumeCapacity(pointer, sub_diff.items.len); 755 | @memcpy(new_diff, sub_diff.items); 756 | pointer = pointer + sub_diff.items.len; 757 | } 758 | count_insert = 0; 759 | count_delete = 0; 760 | text_delete.clearRetainingCapacity(); 761 | text_insert.clearRetainingCapacity(); 762 | }, 763 | } 764 | pointer += 1; 765 | } 766 | diffs.items.len -= 1; // Remove the dummy entry at the end. 767 | 768 | return diffs; 769 | } 770 | 771 | const LinesToCharsResult = struct { 772 | chars_1: []const u8, 773 | chars_2: []const u8, 774 | line_array: std.ArrayListUnmanaged([]const u8), 775 | 776 | pub fn deinit(self: *LinesToCharsResult, allocator: Allocator) void { 777 | allocator.free(self.chars_1); 778 | allocator.free(self.chars_2); 779 | self.line_array.deinit(allocator); 780 | } 781 | }; 782 | 783 | /// Split two texts into a list of strings. Reduce the texts to a string of 784 | /// hashes where each Unicode character represents one line. 785 | /// @param text1 First string. 786 | /// @param text2 Second string. 787 | /// @return Three element Object array, containing the encoded text1, the 788 | /// encoded text2 and the List of unique strings. The zeroth element 789 | /// of the List of unique strings is intentionally blank. 790 | fn diffLinesToChars( 791 | allocator: std.mem.Allocator, 792 | text1: []const u8, 793 | text2: []const u8, 794 | ) DiffError!LinesToCharsResult { 795 | var line_array: std.ArrayListUnmanaged([]const u8) = .empty; 796 | errdefer line_array.deinit(allocator); 797 | var line_hash: std.StringHashMapUnmanaged(usize) = .empty; 798 | defer line_hash.deinit(allocator); 799 | // e.g. line_array[4] == "Hello\n" 800 | // e.g. line_hash.get("Hello\n") == 4 801 | 802 | // "\x00" is a valid character, but various debuggers don't like it. 803 | // So we'll insert a junk entry to avoid generating a null character. 804 | try line_array.append(allocator, ""); 805 | 806 | // Allocate 2/3rds of the space for text1, the rest for text2. 807 | const chars1 = try diffLinesToCharsMunge(allocator, text1, &line_array, &line_hash, 170); 808 | errdefer allocator.free(chars1); 809 | const chars2 = try diffLinesToCharsMunge(allocator, text2, &line_array, &line_hash, 255); 810 | return .{ .chars_1 = chars1, .chars_2 = chars2, .line_array = line_array }; 811 | } 812 | 813 | /// Split a text into a list of strings. Reduce the texts to a string of 814 | /// hashes where each Unicode character represents one line. 815 | /// @param text String to encode. 816 | /// @param lineArray List of unique strings. 817 | /// @param lineHash Map of strings to indices. 818 | /// @param maxLines Maximum length of lineArray. 819 | /// @return Encoded string. 820 | fn diffLinesToCharsMunge( 821 | allocator: std.mem.Allocator, 822 | text: []const u8, 823 | line_array: *std.ArrayListUnmanaged([]const u8), 824 | line_hash: *std.StringHashMapUnmanaged(usize), 825 | max_lines: usize, 826 | ) DiffError![]const u8 { 827 | var line_start: isize = 0; 828 | var line_end: isize = -1; 829 | var chars: std.ArrayListUnmanaged(u8) = .empty; 830 | defer chars.deinit(allocator); 831 | // Walk the text, pulling out a Substring for each line. 832 | // TODO this can be handled with a Reader, avoiding all the manual splitting 833 | while (line_end < @as(isize, @intCast(text.len)) - 1) { 834 | line_end = b: { 835 | break :b @as(isize, @intCast(std.mem.indexOf(u8, text[@intCast(line_start)..], "\n") orelse 836 | break :b @intCast(text.len - 1))) + line_start; 837 | }; 838 | var line = text[@intCast(line_start) .. @as(usize, @intCast(line_start)) + @as(usize, @intCast(line_end + 1 - line_start))]; 839 | 840 | if (line_hash.get(line)) |value| { 841 | try chars.append(allocator, @intCast(value)); 842 | } else { 843 | if (line_array.items.len == max_lines) { 844 | // Bail out at 255 because char 256 == char 0. 845 | line = text[@intCast(line_start)..]; 846 | line_end = @intCast(text.len); 847 | } 848 | try line_array.append(allocator, line); 849 | try line_hash.put(allocator, line, line_array.items.len - 1); 850 | try chars.append(allocator, @intCast(line_array.items.len - 1)); 851 | } 852 | line_start = line_end + 1; 853 | } 854 | return try chars.toOwnedSlice(allocator); 855 | } 856 | 857 | /// Rehydrate the text in a diff from a string of line hashes to real lines 858 | /// of text. 859 | /// @param diffs List of Diff objects. 860 | /// @param lineArray List of unique strings. 861 | fn diffCharsToLines( 862 | allocator: std.mem.Allocator, 863 | char_diffs: *DiffList, 864 | line_array: []const []const u8, 865 | ) DiffError!DiffList { 866 | var diffs: DiffList = .empty; 867 | errdefer deinitDiffList(allocator, &diffs); 868 | try diffs.ensureTotalCapacity(allocator, char_diffs.items.len); 869 | var text: std.ArrayListUnmanaged(u8) = .empty; 870 | defer text.deinit(allocator); 871 | 872 | for (char_diffs.items) |*d| { 873 | var j: usize = 0; 874 | while (j < d.text.len) : (j += 1) { 875 | try text.appendSlice(allocator, line_array[d.text[j]]); 876 | } 877 | diffs.appendAssumeCapacity(.{ 878 | .operation = d.operation, 879 | .text = try text.toOwnedSlice(allocator), 880 | }); 881 | } 882 | return diffs; 883 | } 884 | 885 | /// Reorder and merge like edit sections. Merge equalities. 886 | /// Any edit section can move as long as it doesn't cross an equality. 887 | /// @param diffs List of Diff objects. 888 | fn diffCleanupMerge(allocator: std.mem.Allocator, diffs: *DiffList) DiffError!void { 889 | // Add a dummy entry at the end. 890 | try diffs.append(allocator, .{ .operation = .equal, .text = "" }); 891 | var pointer: usize = 0; 892 | var count_delete: usize = 0; 893 | var count_insert: usize = 0; 894 | 895 | var text_delete: std.ArrayListUnmanaged(u8) = .empty; 896 | defer text_delete.deinit(allocator); 897 | 898 | var text_insert: std.ArrayListUnmanaged(u8) = .empty; 899 | defer text_insert.deinit(allocator); 900 | 901 | var common_length: usize = undefined; 902 | while (pointer < diffs.items.len) { 903 | switch (diffs.items[pointer].operation) { 904 | .insert => { 905 | count_insert += 1; 906 | try text_insert.appendSlice(allocator, diffs.items[pointer].text); 907 | pointer += 1; 908 | }, 909 | .delete => { 910 | count_delete += 1; 911 | try text_delete.appendSlice(allocator, diffs.items[pointer].text); 912 | pointer += 1; 913 | }, 914 | .equal => { 915 | // Upon reaching an equality, check for prior redundancies. 916 | if (count_delete + count_insert > 1) { 917 | if (count_delete != 0 and count_insert != 0) { 918 | // Factor out any common prefixies. 919 | common_length = diffCommonPrefix(text_insert.items, text_delete.items); 920 | if (common_length != 0) { 921 | if ((pointer - count_delete - count_insert) > 0 and 922 | diffs.items[pointer - count_delete - count_insert - 1].operation == .equal) 923 | { 924 | const ii = pointer - count_delete - count_insert - 1; 925 | var nt = try allocator.alloc(u8, diffs.items[ii].text.len + common_length); 926 | const ot = diffs.items[ii].text; 927 | @memcpy(nt[0..ot.len], ot); 928 | @memcpy(nt[ot.len..], text_insert.items[0..common_length]); 929 | diffs.items[ii].text = nt; 930 | allocator.free(ot); 931 | } else { 932 | try diffs.ensureUnusedCapacity(allocator, 1); 933 | const text = try allocator.dupe(u8, text_insert.items[0..common_length]); 934 | diffs.insertAssumeCapacity(0, .{ .operation = .equal, .text = text }); 935 | pointer += 1; 936 | } 937 | text_insert.replaceRangeAssumeCapacity(0, common_length, &.{}); 938 | text_delete.replaceRangeAssumeCapacity(0, common_length, &.{}); 939 | } 940 | // Factor out any common suffixies. 941 | // @ZigPort this seems very wrong 942 | common_length = diffCommonSuffix(text_insert.items, text_delete.items); 943 | if (common_length != 0) { 944 | const old_text = diffs.items[pointer].text; 945 | diffs.items[pointer].text = try std.mem.concat(allocator, u8, &.{ 946 | text_insert.items[text_insert.items.len - common_length ..], 947 | old_text, 948 | }); 949 | allocator.free(old_text); 950 | text_insert.items.len -= common_length; 951 | text_delete.items.len -= common_length; 952 | } 953 | } 954 | // Delete the offending records and add the merged ones. 955 | pointer -= count_delete + count_insert; 956 | if (count_delete + count_insert > 0) { 957 | freeRangeDiffList(allocator, diffs, pointer, count_delete + count_insert); 958 | diffs.replaceRangeAssumeCapacity(pointer, count_delete + count_insert, &.{}); 959 | } 960 | 961 | if (text_delete.items.len != 0) { 962 | try diffs.ensureUnusedCapacity(allocator, 1); 963 | diffs.insertAssumeCapacity(pointer, .{ 964 | .operation = .delete, 965 | .text = try allocator.dupe(u8, text_delete.items), 966 | }); 967 | pointer += 1; 968 | } 969 | if (text_insert.items.len != 0) { 970 | try diffs.ensureUnusedCapacity(allocator, 1); 971 | diffs.insertAssumeCapacity(pointer, .{ 972 | .operation = .insert, 973 | .text = try allocator.dupe(u8, text_insert.items), 974 | }); 975 | pointer += 1; 976 | } 977 | pointer += 1; 978 | } else if (pointer != 0 and diffs.items[pointer - 1].operation == .equal) { 979 | // Merge this equality with the previous one. 980 | // TODO: Fix using realloc or smth 981 | // Note: can't use realloc because the text is const 982 | var nt = try allocator.alloc(u8, diffs.items[pointer - 1].text.len + diffs.items[pointer].text.len); 983 | const ot = diffs.items[pointer - 1].text; 984 | defer (allocator.free(ot)); 985 | @memcpy(nt[0..ot.len], ot); 986 | @memcpy(nt[ot.len..], diffs.items[pointer].text); 987 | diffs.items[pointer - 1].text = nt; 988 | const dead_diff = diffs.orderedRemove(pointer); 989 | allocator.free(dead_diff.text); 990 | } else { 991 | pointer += 1; 992 | } 993 | count_insert = 0; 994 | count_delete = 0; 995 | text_delete.clearRetainingCapacity(); 996 | text_insert.clearRetainingCapacity(); 997 | }, 998 | } 999 | } 1000 | if (diffs.items[diffs.items.len - 1].text.len == 0) { 1001 | diffs.items.len -= 1; 1002 | } 1003 | 1004 | // Second pass: look for single edits surrounded on both sides by 1005 | // equalities which can be shifted sideways to eliminate an equality. 1006 | // e.g: ABAC -> ABAC 1007 | var changes = false; 1008 | pointer = 1; 1009 | // Intentionally ignore the first and last element (don't need checking). 1010 | while (pointer < (diffs.items.len - 1)) { 1011 | if (diffs.items[pointer - 1].operation == .equal and 1012 | diffs.items[pointer + 1].operation == .equal) 1013 | { 1014 | // This is a single edit surrounded by equalities. 1015 | if (std.mem.endsWith(u8, diffs.items[pointer].text, diffs.items[pointer - 1].text)) { 1016 | const old_pt = diffs.items[pointer].text; 1017 | const pt = try std.mem.concat(allocator, u8, &.{ 1018 | diffs.items[pointer - 1].text, 1019 | diffs.items[pointer].text[0 .. diffs.items[pointer].text.len - 1020 | diffs.items[pointer - 1].text.len], 1021 | }); 1022 | allocator.free(old_pt); 1023 | diffs.items[pointer].text = pt; 1024 | const old_pt1t = diffs.items[pointer + 1].text; 1025 | const p1t = try std.mem.concat(allocator, u8, &.{ 1026 | diffs.items[pointer - 1].text, 1027 | diffs.items[pointer + 1].text, 1028 | }); 1029 | allocator.free(old_pt1t); 1030 | diffs.items[pointer + 1].text = p1t; 1031 | freeRangeDiffList(allocator, diffs, pointer - 1, 1); 1032 | diffs.replaceRangeAssumeCapacity(pointer - 1, 1, &.{}); 1033 | changes = true; 1034 | } else if (std.mem.startsWith(u8, diffs.items[pointer].text, diffs.items[pointer + 1].text)) { 1035 | const old_ptm1 = diffs.items[pointer - 1].text; 1036 | const pm1t = try std.mem.concat(allocator, u8, &.{ 1037 | diffs.items[pointer - 1].text, 1038 | diffs.items[pointer + 1].text, 1039 | }); 1040 | allocator.free(old_ptm1); 1041 | diffs.items[pointer - 1].text = pm1t; 1042 | const old_pt = diffs.items[pointer].text; 1043 | const pt = try std.mem.concat(allocator, u8, &.{ 1044 | diffs.items[pointer].text[diffs.items[pointer + 1].text.len..], 1045 | diffs.items[pointer + 1].text, 1046 | }); 1047 | allocator.free(old_pt); 1048 | diffs.items[pointer].text = pt; 1049 | freeRangeDiffList(allocator, diffs, pointer + 1, 1); 1050 | diffs.replaceRangeAssumeCapacity(pointer + 1, 1, &.{}); 1051 | changes = true; 1052 | } 1053 | } 1054 | pointer += 1; 1055 | } 1056 | // If shifts were made, the diff needs reordering and another shift sweep. 1057 | if (changes) { 1058 | try diffCleanupMerge(allocator, diffs); 1059 | } 1060 | } 1061 | 1062 | /// Reduce the number of edits by eliminating semantically trivial 1063 | /// equalities. 1064 | /// @param diffs List of Diff objects. 1065 | pub fn diffCleanupSemantic(allocator: std.mem.Allocator, diffs: *DiffList) DiffError!void { 1066 | var changes = false; 1067 | // Stack of indices where equalities are found. 1068 | var equalities: std.ArrayListUnmanaged(isize) = .empty; 1069 | defer equalities.deinit(allocator); 1070 | // Always equal to equalities[equalitiesLength-1][1] 1071 | var last_equality: ?[]const u8 = null; 1072 | var pointer: isize = 0; // Index of current position. 1073 | // Number of characters that changed prior to the equality. 1074 | var length_insertions1: usize = 0; 1075 | var length_deletions1: usize = 0; 1076 | // Number of characters that changed after the equality. 1077 | var length_insertions2: usize = 0; 1078 | var length_deletions2: usize = 0; 1079 | while (pointer < diffs.items.len) { 1080 | if (diffs.items[@intCast(pointer)].operation == .equal) { // Equality found. 1081 | try equalities.append(allocator, pointer); 1082 | length_insertions1 = length_insertions2; 1083 | length_deletions1 = length_deletions2; 1084 | length_insertions2 = 0; 1085 | length_deletions2 = 0; 1086 | last_equality = diffs.items[@intCast(pointer)].text; 1087 | } else { // an insertion or deletion 1088 | if (diffs.items[@intCast(pointer)].operation == .insert) { 1089 | length_insertions2 += diffs.items[@intCast(pointer)].text.len; 1090 | } else { 1091 | length_deletions2 += diffs.items[@intCast(pointer)].text.len; 1092 | } 1093 | // Eliminate an equality that is smaller or equal to the edits on both 1094 | // sides of it. 1095 | if (last_equality != null and 1096 | (last_equality.?.len <= @max(length_insertions1, length_deletions1)) and 1097 | (last_equality.?.len <= @max(length_insertions2, length_deletions2))) 1098 | { 1099 | // Duplicate record. 1100 | try diffs.ensureUnusedCapacity(allocator, 1); 1101 | diffs.insertAssumeCapacity( 1102 | @intCast(equalities.items[equalities.items.len - 1]), 1103 | .{ 1104 | .operation = .delete, 1105 | .text = try allocator.dupe(u8, last_equality.?), 1106 | }, 1107 | ); 1108 | // Change second copy to insert. 1109 | diffs.items[@intCast(equalities.items[equalities.items.len - 1] + 1)].operation = .insert; 1110 | // Throw away the equality we just deleted. 1111 | _ = equalities.pop(); 1112 | if (equalities.items.len > 0) { 1113 | _ = equalities.pop(); 1114 | } 1115 | pointer = if (equalities.items.len > 0) equalities.items[equalities.items.len - 1] else -1; 1116 | length_insertions1 = 0; // Reset the counters. 1117 | length_deletions1 = 0; 1118 | length_insertions2 = 0; 1119 | length_deletions2 = 0; 1120 | last_equality = null; 1121 | changes = true; 1122 | } 1123 | } 1124 | pointer += 1; 1125 | } 1126 | 1127 | // Normalize the diff. 1128 | if (changes) { 1129 | try diffCleanupMerge(allocator, diffs); 1130 | } 1131 | try diffCleanupSemanticLossless(allocator, diffs); 1132 | 1133 | // Find any overlaps between deletions and insertions. 1134 | // e.g: abcxxxxxxdef 1135 | // -> abcxxxdef 1136 | // e.g: xxxabcdefxxx 1137 | // -> defxxxabc 1138 | // Only extract an overlap if it is as big as the edit ahead or behind it. 1139 | pointer = 1; 1140 | while (pointer < diffs.items.len) { 1141 | if (diffs.items[@intCast(pointer - 1)].operation == .delete and 1142 | diffs.items[@intCast(pointer)].operation == .insert) 1143 | { 1144 | const deletion = diffs.items[@intCast(pointer - 1)].text; 1145 | const insertion = diffs.items[@intCast(pointer)].text; 1146 | const overlap_length1: usize = diffCommonOverlap(deletion, insertion); 1147 | const overlap_length2: usize = diffCommonOverlap(insertion, deletion); 1148 | if (overlap_length1 >= overlap_length2) { 1149 | if (@as(f32, @floatFromInt(overlap_length1)) >= @as(f32, @floatFromInt(deletion.len)) / 2.0 or 1150 | @as(f32, @floatFromInt(overlap_length1)) >= @as(f32, @floatFromInt(insertion.len)) / 2.0) 1151 | { 1152 | // Overlap found. 1153 | // Insert an equality and trim the surrounding edits. 1154 | try diffs.ensureUnusedCapacity(allocator, 1); 1155 | diffs.insertAssumeCapacity(@intCast(pointer), .{ 1156 | .operation = .equal, 1157 | .text = try allocator.dupe(u8, insertion[0..overlap_length1]), 1158 | }); 1159 | diffs.items[@intCast(pointer - 1)].text = 1160 | try allocator.dupe(u8, deletion[0 .. deletion.len - overlap_length1]); 1161 | allocator.free(deletion); 1162 | diffs.items[@intCast(pointer + 1)].text = 1163 | try allocator.dupe(u8, insertion[overlap_length1..]); 1164 | allocator.free(insertion); 1165 | pointer += 1; 1166 | } 1167 | } else { 1168 | if (@as(f32, @floatFromInt(overlap_length2)) >= @as(f32, @floatFromInt(deletion.len)) / 2.0 or 1169 | @as(f32, @floatFromInt(overlap_length2)) >= @as(f32, @floatFromInt(insertion.len)) / 2.0) 1170 | { 1171 | // Reverse overlap found. 1172 | // Insert an equality and swap and trim the surrounding edits. 1173 | try diffs.ensureUnusedCapacity(allocator, 1); 1174 | diffs.insertAssumeCapacity(@intCast(pointer), .{ 1175 | .operation = .equal, 1176 | .text = try allocator.dupe(u8, deletion[0..overlap_length2]), 1177 | }); 1178 | const new_minus = try allocator.dupe(u8, insertion[0 .. insertion.len - overlap_length2]); 1179 | errdefer allocator.free(new_minus); // necessary due to swap 1180 | const new_plus = try allocator.dupe(u8, deletion[overlap_length2..]); 1181 | allocator.free(deletion); 1182 | allocator.free(insertion); 1183 | diffs.items[@intCast(pointer - 1)].operation = .insert; 1184 | diffs.items[@intCast(pointer - 1)].text = new_minus; 1185 | diffs.items[@intCast(pointer + 1)].operation = .delete; 1186 | diffs.items[@intCast(pointer + 1)].text = new_plus; 1187 | pointer += 1; 1188 | } 1189 | } 1190 | pointer += 1; 1191 | } 1192 | pointer += 1; 1193 | } 1194 | } 1195 | 1196 | /// Look for single edits surrounded on both sides by equalities 1197 | /// which can be shifted sideways to align the edit to a word boundary. 1198 | /// e.g: The cat came. -> The cat came. 1199 | pub fn diffCleanupSemanticLossless( 1200 | allocator: std.mem.Allocator, 1201 | diffs: *DiffList, 1202 | ) DiffError!void { 1203 | var pointer: usize = 1; 1204 | // Intentionally ignore the first and last element (don't need checking). 1205 | while (pointer < @as(isize, @intCast(diffs.items.len)) - 1) { 1206 | if (diffs.items[pointer - 1].operation == .equal and 1207 | diffs.items[pointer + 1].operation == .equal) 1208 | { 1209 | // This is a single edit surrounded by equalities. 1210 | var equality_1: std.ArrayListUnmanaged(u8) = .empty; 1211 | defer equality_1.deinit(allocator); 1212 | try equality_1.appendSlice(allocator, diffs.items[pointer - 1].text); 1213 | 1214 | var edit: std.ArrayListUnmanaged(u8) = .empty; 1215 | defer edit.deinit(allocator); 1216 | try edit.appendSlice(allocator, diffs.items[pointer].text); 1217 | 1218 | var equality_2: std.ArrayListUnmanaged(u8) = .empty; 1219 | defer equality_2.deinit(allocator); 1220 | try equality_2.appendSlice(allocator, diffs.items[pointer + 1].text); 1221 | 1222 | // First, shift the edit as far left as possible. 1223 | const common_offset = diffCommonSuffix(equality_1.items, edit.items); 1224 | if (common_offset > 0) { 1225 | // TODO: Use buffer 1226 | const common_string = try allocator.dupe(u8, edit.items[edit.items.len - common_offset ..]); 1227 | defer allocator.free(common_string); 1228 | 1229 | equality_1.items.len = equality_1.items.len - common_offset; 1230 | 1231 | // edit.items.len = edit.items.len - common_offset; 1232 | const not_common = try allocator.dupe(u8, edit.items[0 .. edit.items.len - common_offset]); 1233 | defer allocator.free(not_common); 1234 | 1235 | edit.clearRetainingCapacity(); 1236 | try edit.appendSlice(allocator, common_string); 1237 | try edit.appendSlice(allocator, not_common); 1238 | 1239 | try equality_2.insertSlice(allocator, 0, common_string); 1240 | } 1241 | 1242 | // Second, step character by character right, 1243 | // looking for the best fit. 1244 | var best_equality_1: std.ArrayListUnmanaged(u8) = .empty; 1245 | defer best_equality_1.deinit(allocator); 1246 | try best_equality_1.appendSlice(allocator, equality_1.items); 1247 | 1248 | var best_edit: std.ArrayListUnmanaged(u8) = .empty; 1249 | defer best_edit.deinit(allocator); 1250 | try best_edit.appendSlice(allocator, edit.items); 1251 | 1252 | var best_equality_2: std.ArrayListUnmanaged(u8) = .empty; 1253 | defer best_equality_2.deinit(allocator); 1254 | try best_equality_2.appendSlice(allocator, equality_2.items); 1255 | 1256 | var best_score = diffCleanupSemanticScore(equality_1.items, edit.items) + 1257 | diffCleanupSemanticScore(edit.items, equality_2.items); 1258 | 1259 | while (edit.items.len != 0 and equality_2.items.len != 0 and edit.items[0] == equality_2.items[0]) { 1260 | try equality_1.append(allocator, edit.items[0]); 1261 | 1262 | _ = edit.orderedRemove(0); 1263 | try edit.append(allocator, equality_2.items[0]); 1264 | 1265 | _ = equality_2.orderedRemove(0); 1266 | 1267 | const score = diffCleanupSemanticScore(equality_1.items, edit.items) + 1268 | diffCleanupSemanticScore(edit.items, equality_2.items); 1269 | // The >= encourages trailing rather than leading whitespace on 1270 | // edits. 1271 | if (score >= best_score) { 1272 | best_score = score; 1273 | 1274 | best_equality_1.clearRetainingCapacity(); 1275 | try best_equality_1.appendSlice(allocator, equality_1.items); 1276 | 1277 | best_edit.clearRetainingCapacity(); 1278 | try best_edit.appendSlice(allocator, edit.items); 1279 | 1280 | best_equality_2.clearRetainingCapacity(); 1281 | try best_equality_2.appendSlice(allocator, equality_2.items); 1282 | } 1283 | } 1284 | 1285 | if (!std.mem.eql(u8, diffs.items[pointer - 1].text, best_equality_1.items)) { 1286 | // We have an improvement, save it back to the diff. 1287 | if (best_equality_1.items.len != 0) { 1288 | const old_text = diffs.items[pointer - 1].text; 1289 | diffs.items[pointer - 1].text = try allocator.dupe(u8, best_equality_1.items); 1290 | allocator.free(old_text); 1291 | } else { 1292 | const old_diff = diffs.orderedRemove(pointer - 1); 1293 | allocator.free(old_diff.text); 1294 | pointer -= 1; 1295 | } 1296 | const old_text1 = diffs.items[pointer].text; 1297 | diffs.items[pointer].text = try allocator.dupe(u8, best_edit.items); 1298 | defer allocator.free(old_text1); 1299 | if (best_equality_2.items.len != 0) { 1300 | const old_text2 = diffs.items[pointer + 1].text; 1301 | diffs.items[pointer + 1].text = try allocator.dupe(u8, best_equality_2.items); 1302 | allocator.free(old_text2); 1303 | } else { 1304 | const old_diff = diffs.orderedRemove(pointer + 1); 1305 | allocator.free(old_diff.text); 1306 | pointer -= 1; 1307 | } 1308 | } 1309 | } 1310 | pointer += 1; 1311 | } 1312 | } 1313 | 1314 | /// Given two strings, compute a score representing whether the internal 1315 | /// boundary falls on logical boundaries. 1316 | /// Scores range from 6 (best) to 0 (worst). 1317 | /// @param one First string. 1318 | /// @param two Second string. 1319 | /// @return The score. 1320 | fn diffCleanupSemanticScore(one: []const u8, two: []const u8) usize { 1321 | if (one.len == 0 or two.len == 0) { 1322 | // Edges are the best. 1323 | return 6; 1324 | } 1325 | 1326 | // Each port of this function behaves slightly differently due to 1327 | // subtle differences in each language's definition of things like 1328 | // 'whitespace'. Since this function's purpose is largely cosmetic, 1329 | // the choice has been made to use each language's native features 1330 | // rather than force total conformity. 1331 | const char1 = one[one.len - 1]; 1332 | const char2 = two[0]; 1333 | const nonAlphaNumeric1 = !std.ascii.isAlphanumeric(char1); 1334 | const nonAlphaNumeric2 = !std.ascii.isAlphanumeric(char2); 1335 | const whitespace1 = nonAlphaNumeric1 and std.ascii.isWhitespace(char1); 1336 | const whitespace2 = nonAlphaNumeric2 and std.ascii.isWhitespace(char2); 1337 | const lineBreak1 = whitespace1 and std.ascii.isControl(char1); 1338 | const lineBreak2 = whitespace2 and std.ascii.isControl(char2); 1339 | const blankLine1 = lineBreak1 and 1340 | // BLANKLINEEND.IsMatch(one); 1341 | (std.mem.endsWith(u8, one, "\n\n") or std.mem.endsWith(u8, one, "\n\r\n")); 1342 | const blankLine2 = lineBreak2 and 1343 | // BLANKLINESTART.IsMatch(two); 1344 | (std.mem.startsWith(u8, two, "\n\n") or 1345 | std.mem.startsWith(u8, two, "\r\n\n") or 1346 | std.mem.startsWith(u8, two, "\n\r\n") or 1347 | std.mem.startsWith(u8, two, "\r\n\r\n")); 1348 | 1349 | if (blankLine1 or blankLine2) { 1350 | // Five points for blank lines. 1351 | return 5; 1352 | } else if (lineBreak1 or lineBreak2) { 1353 | // Four points for line breaks. 1354 | return 4; 1355 | } else if (nonAlphaNumeric1 and !whitespace1 and whitespace2) { 1356 | // Three points for end of sentences. 1357 | return 3; 1358 | } else if (whitespace1 or whitespace2) { 1359 | // Two points for whitespace. 1360 | return 2; 1361 | } else if (nonAlphaNumeric1 or nonAlphaNumeric2) { 1362 | // One point for non-alphanumeric. 1363 | return 1; 1364 | } 1365 | return 0; 1366 | } 1367 | 1368 | /// Reduce the number of edits by eliminating operationally trivial 1369 | /// equalities. 1370 | pub fn diffCleanupEfficiency( 1371 | dmp: DiffMatchPatch, 1372 | allocator: std.mem.Allocator, 1373 | diffs: *DiffList, 1374 | ) DiffError!void { 1375 | var changes = false; 1376 | // Stack of indices where equalities are found. 1377 | var equalities: std.ArrayListUnmanaged(usize) = .empty; 1378 | defer equalities.deinit(allocator); 1379 | // Always equal to equalities[equalitiesLength-1][1] 1380 | var last_equality: []const u8 = ""; 1381 | var ipointer: isize = 0; // Index of current position. 1382 | // Is there an insertion operation before the last equality. 1383 | var pre_ins = false; 1384 | // Is there a deletion operation before the last equality. 1385 | var pre_del = false; 1386 | // Is there an insertion operation after the last equality. 1387 | var post_ins = false; 1388 | // Is there a deletion operation after the last equality. 1389 | var post_del = false; 1390 | while (ipointer < diffs.items.len) { 1391 | const pointer: usize = @intCast(ipointer); 1392 | if (diffs.items[pointer].operation == .equal) { // Equality found. 1393 | if (diffs.items[pointer].text.len < dmp.diff_edit_cost and (post_ins or post_del)) { 1394 | // Candidate found. 1395 | try equalities.append(allocator, pointer); 1396 | pre_ins = post_ins; 1397 | pre_del = post_del; 1398 | last_equality = diffs.items[pointer].text; 1399 | } else { 1400 | // Not a candidate, and can never become one. 1401 | equalities.clearRetainingCapacity(); 1402 | last_equality = ""; 1403 | } 1404 | post_ins = false; 1405 | post_del = false; 1406 | } else { // An insertion or deletion. 1407 | if (diffs.items[pointer].operation == .delete) { 1408 | post_del = true; 1409 | } else { 1410 | post_ins = true; 1411 | } 1412 | // Five types to be split: 1413 | // ABXYCD 1414 | // AXCD 1415 | // ABXC 1416 | // AXCD 1417 | // ABXC 1418 | if ((last_equality.len != 0) and 1419 | ((pre_ins and pre_del and post_ins and post_del) or 1420 | ((last_equality.len < dmp.diff_edit_cost / 2) and 1421 | (@as(u8, @intFromBool(pre_ins)) + @as(u8, @intFromBool(pre_del)) + @as(u8, @intFromBool(post_ins)) + @as(u8, @intFromBool(post_del)) == 3)))) 1422 | { 1423 | // Duplicate record. 1424 | try diffs.ensureUnusedCapacity(allocator, 1); 1425 | diffs.insertAssumeCapacity( 1426 | equalities.items[equalities.items.len - 1], 1427 | .{ 1428 | .operation = .delete, 1429 | .text = try allocator.dupe(u8, last_equality), 1430 | }, 1431 | ); 1432 | // Change second copy to insert. 1433 | diffs.items[equalities.items[equalities.items.len - 1] + 1].operation = .insert; 1434 | _ = equalities.pop(); // Throw away the equality we just deleted. 1435 | last_equality = ""; 1436 | if (pre_ins and pre_del) { 1437 | // No changes made which could affect previous entry, keep going. 1438 | post_ins = true; 1439 | post_del = true; 1440 | equalities.clearRetainingCapacity(); 1441 | } else { 1442 | if (equalities.items.len > 0) { 1443 | _ = equalities.pop(); 1444 | } 1445 | 1446 | ipointer = if (equalities.items.len > 0) @intCast(equalities.items[equalities.items.len - 1]) else -1; 1447 | post_ins = false; 1448 | post_del = false; 1449 | } 1450 | changes = true; 1451 | } 1452 | } 1453 | ipointer += 1; 1454 | } 1455 | 1456 | if (changes) { 1457 | try diffCleanupMerge(allocator, diffs); 1458 | } 1459 | } 1460 | 1461 | /// Determine if the suffix of one string is the prefix of another. 1462 | /// @param text1 First string. 1463 | /// @param text2 Second string. 1464 | /// @return The number of characters common to the end of the first 1465 | /// string and the start of the second string. 1466 | fn diffCommonOverlap(text1_in: []const u8, text2_in: []const u8) usize { 1467 | var text1 = text1_in; 1468 | var text2 = text2_in; 1469 | 1470 | // Cache the text lengths to prevent multiple calls. 1471 | const text1_length = text1.len; 1472 | const text2_length = text2.len; 1473 | // Eliminate the null case. 1474 | if (text1_length == 0 or text2_length == 0) { 1475 | return 0; 1476 | } 1477 | // Truncate the longer string. 1478 | if (text1_length > text2_length) { 1479 | text1 = text1[text1_length - text2_length ..]; 1480 | } else if (text1_length < text2_length) { 1481 | text2 = text2[0..text1_length]; 1482 | } 1483 | const text_length = @min(text1_length, text2_length); 1484 | // Quick check for the worst case. 1485 | if (std.mem.eql(u8, text1, text2)) { 1486 | return text_length; 1487 | } 1488 | 1489 | // Start by looking for a single character match 1490 | // and increase length until no match is found. 1491 | // Performance analysis: https://neil.fraser.name/news/2010/11/04/ 1492 | var best: usize = 0; 1493 | var length: usize = 1; 1494 | while (true) { 1495 | const pattern = text1[text_length - length ..]; 1496 | const found = std.mem.indexOf(u8, text2, pattern) orelse 1497 | return best; 1498 | 1499 | length += found; 1500 | 1501 | if (found == 0 or std.mem.eql(u8, text1[text_length - length ..], text2[0..length])) { 1502 | best = length; 1503 | length += 1; 1504 | } 1505 | } 1506 | } 1507 | 1508 | // DONE [✅]: Allocate all text in diffs to 1509 | // not cause segfault while freeing 1510 | 1511 | test diffCommonPrefix { 1512 | // Detect any common suffix. 1513 | try testing.expectEqual(@as(usize, 0), diffCommonPrefix("abc", "xyz")); // Null case 1514 | try testing.expectEqual(@as(usize, 4), diffCommonPrefix("1234abcdef", "1234xyz")); // Non-null case 1515 | try testing.expectEqual(@as(usize, 4), diffCommonPrefix("1234", "1234xyz")); // Whole case 1516 | } 1517 | 1518 | test diffCommonSuffix { 1519 | // Detect any common suffix. 1520 | try testing.expectEqual(@as(usize, 0), diffCommonSuffix("abc", "xyz")); // Null case 1521 | try testing.expectEqual(@as(usize, 4), diffCommonSuffix("abcdef1234", "xyz1234")); // Non-null case 1522 | try testing.expectEqual(@as(usize, 4), diffCommonSuffix("1234", "xyz1234")); // Whole case 1523 | } 1524 | 1525 | test diffCommonOverlap { 1526 | // Detect any suffix/prefix overlap. 1527 | try testing.expectEqual(@as(usize, 0), diffCommonOverlap("", "abcd")); // Null case 1528 | try testing.expectEqual(@as(usize, 3), diffCommonOverlap("abc", "abcd")); // Whole case 1529 | try testing.expectEqual(@as(usize, 0), diffCommonOverlap("123456", "abcd")); // No overlap 1530 | try testing.expectEqual(@as(usize, 3), diffCommonOverlap("123456xxx", "xxxabcd")); // Overlap 1531 | 1532 | // Some overly clever languages (C#) may treat ligatures as equal to their 1533 | // component letters. E.g. U+FB01 == 'fi' 1534 | try testing.expectEqual(@as(usize, 0), diffCommonOverlap("fi", "\u{fb01}")); // Unicode 1535 | } 1536 | 1537 | fn testDiffHalfMatch( 1538 | allocator: std.mem.Allocator, 1539 | params: struct { 1540 | dmp: DiffMatchPatch, 1541 | before: []const u8, 1542 | after: []const u8, 1543 | expected: ?HalfMatchResult, 1544 | }, 1545 | ) !void { 1546 | const maybe_result = try params.dmp.diffHalfMatch(allocator, params.before, params.after); 1547 | defer if (maybe_result) |result| result.deinit(allocator); 1548 | try testing.expectEqualDeep(params.expected, maybe_result); 1549 | } 1550 | 1551 | test diffHalfMatch { 1552 | const one_timeout: DiffMatchPatch = .{ .diff_timeout = 1 }; 1553 | 1554 | // No match #1 1555 | try checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{ 1556 | .dmp = one_timeout, 1557 | .before = "1234567890", 1558 | .after = "abcdef", 1559 | .expected = null, 1560 | }}); 1561 | 1562 | // No match #2 1563 | try checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{ 1564 | .dmp = one_timeout, 1565 | .before = "12345", 1566 | .after = "23", 1567 | .expected = null, 1568 | }}); 1569 | 1570 | // Single matches 1571 | try checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{ 1572 | .dmp = one_timeout, 1573 | .before = "1234567890", 1574 | .after = "a345678z", 1575 | .expected = .{ 1576 | .prefix_before = "12", 1577 | .suffix_before = "90", 1578 | .prefix_after = "a", 1579 | .suffix_after = "z", 1580 | .common_middle = "345678", 1581 | }, 1582 | }}); 1583 | 1584 | // Single Match #2 1585 | try checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{ 1586 | .dmp = one_timeout, 1587 | .before = "a345678z", 1588 | .after = "1234567890", 1589 | .expected = .{ 1590 | .prefix_before = "a", 1591 | .suffix_before = "z", 1592 | .prefix_after = "12", 1593 | .suffix_after = "90", 1594 | .common_middle = "345678", 1595 | }, 1596 | }}); 1597 | 1598 | // Single Match #3 1599 | try checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{ 1600 | .dmp = one_timeout, 1601 | .before = "abc56789z", 1602 | .after = "1234567890", 1603 | .expected = .{ 1604 | .prefix_before = "abc", 1605 | .suffix_before = "z", 1606 | .prefix_after = "1234", 1607 | .suffix_after = "0", 1608 | .common_middle = "56789", 1609 | }, 1610 | }}); 1611 | 1612 | // Single Match #4 1613 | try checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{ 1614 | .dmp = one_timeout, 1615 | .before = "a23456xyz", 1616 | .after = "1234567890", 1617 | .expected = .{ 1618 | .prefix_before = "a", 1619 | .suffix_before = "xyz", 1620 | .prefix_after = "1", 1621 | .suffix_after = "7890", 1622 | .common_middle = "23456", 1623 | }, 1624 | }}); 1625 | 1626 | // Multiple matches #1 1627 | try checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{ 1628 | .dmp = one_timeout, 1629 | .before = "121231234123451234123121", 1630 | .after = "a1234123451234z", 1631 | .expected = .{ 1632 | .prefix_before = "12123", 1633 | .suffix_before = "123121", 1634 | .prefix_after = "a", 1635 | .suffix_after = "z", 1636 | .common_middle = "1234123451234", 1637 | }, 1638 | }}); 1639 | 1640 | // Multiple Matches #2 1641 | try checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{ 1642 | .dmp = one_timeout, 1643 | .before = "x-=-=-=-=-=-=-=-=-=-=-=-=", 1644 | .after = "xx-=-=-=-=-=-=-=", 1645 | .expected = .{ 1646 | .prefix_before = "", 1647 | .suffix_before = "-=-=-=-=-=", 1648 | .prefix_after = "x", 1649 | .suffix_after = "", 1650 | .common_middle = "x-=-=-=-=-=-=-=", 1651 | }, 1652 | }}); 1653 | 1654 | // Multiple Matches #3 1655 | try checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{ 1656 | .dmp = one_timeout, 1657 | .before = "-=-=-=-=-=-=-=-=-=-=-=-=y", 1658 | .after = "-=-=-=-=-=-=-=yy", 1659 | .expected = .{ 1660 | .prefix_before = "-=-=-=-=-=", 1661 | .suffix_before = "", 1662 | .prefix_after = "", 1663 | .suffix_after = "y", 1664 | .common_middle = "-=-=-=-=-=-=-=y", 1665 | }, 1666 | }}); 1667 | 1668 | // Other cases 1669 | 1670 | // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy 1671 | // Non-optimal halfmatch 1672 | try checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{ 1673 | .dmp = one_timeout, 1674 | .before = "qHilloHelloHew", 1675 | .after = "xHelloHeHulloy", 1676 | .expected = .{ 1677 | .prefix_before = "qHillo", 1678 | .suffix_before = "w", 1679 | .prefix_after = "x", 1680 | .suffix_after = "Hulloy", 1681 | .common_middle = "HelloHe", 1682 | }, 1683 | }}); 1684 | 1685 | // Non-optimal halfmatch 1686 | try checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{ 1687 | .dmp = .{ .diff_timeout = 0 }, 1688 | .before = "qHilloHelloHew", 1689 | .after = "xHelloHeHulloy", 1690 | .expected = null, 1691 | }}); 1692 | } 1693 | 1694 | test diffLinesToChars { 1695 | const allocator = testing.allocator; 1696 | // Convert lines down to characters. 1697 | var tmp_array_list: std.ArrayListUnmanaged([]const u8) = .empty; 1698 | defer tmp_array_list.deinit(allocator); 1699 | try tmp_array_list.append(allocator, ""); 1700 | try tmp_array_list.append(allocator, "alpha\n"); 1701 | try tmp_array_list.append(allocator, "beta\n"); 1702 | 1703 | var result = try diffLinesToChars(allocator, "alpha\nbeta\nalpha\n", "beta\nalpha\nbeta\n"); 1704 | try testing.expectEqualStrings("\u{0001}\u{0002}\u{0001}", result.chars_1); // Shared lines #1 1705 | try testing.expectEqualStrings("\u{0002}\u{0001}\u{0002}", result.chars_2); // Shared lines #2 1706 | try testing.expectEqualDeep(tmp_array_list.items, result.line_array.items); // Shared lines #3 1707 | 1708 | tmp_array_list.clearRetainingCapacity(); 1709 | try tmp_array_list.append(allocator, ""); 1710 | try tmp_array_list.append(allocator, "alpha\r\n"); 1711 | try tmp_array_list.append(allocator, "beta\r\n"); 1712 | try tmp_array_list.append(allocator, "\r\n"); 1713 | result.deinit(allocator); 1714 | 1715 | result = try diffLinesToChars(allocator, "", "alpha\r\nbeta\r\n\r\n\r\n"); 1716 | try testing.expectEqualStrings("", result.chars_1); // Empty string and blank lines #1 1717 | try testing.expectEqualStrings("\u{0001}\u{0002}\u{0003}\u{0003}", result.chars_2); // Empty string and blank lines #2 1718 | try testing.expectEqualDeep(tmp_array_list.items, result.line_array.items); // Empty string and blank lines #3 1719 | 1720 | tmp_array_list.clearRetainingCapacity(); 1721 | try tmp_array_list.append(allocator, ""); 1722 | try tmp_array_list.append(allocator, "a"); 1723 | try tmp_array_list.append(allocator, "b"); 1724 | result.deinit(allocator); 1725 | 1726 | result = try diffLinesToChars(allocator, "a", "b"); 1727 | try testing.expectEqualStrings("\u{0001}", result.chars_1); // No linebreaks #1. 1728 | try testing.expectEqualStrings("\u{0002}", result.chars_2); // No linebreaks #2. 1729 | try testing.expectEqualDeep(tmp_array_list.items, result.line_array.items); // No linebreaks #3. 1730 | result.deinit(allocator); 1731 | 1732 | // TODO: More than 256 to reveal any 8-bit limitations but this requires 1733 | // some unicode logic that I don't want to deal with 1734 | // 1735 | // Casting to Unicode is straightforward and should sort correctly, I'm 1736 | // more concerned about the weird behavior when the 'char' is equal to a 1737 | // newline. Uncomment the EqualSlices below to see what I mean. 1738 | // I think there's some cleanup logic in the actual linediff that should 1739 | // take care of the problem, but I don't like it. 1740 | 1741 | const n: u8 = 255; 1742 | tmp_array_list.clearRetainingCapacity(); 1743 | 1744 | var line_list: std.ArrayListUnmanaged(u8) = .empty; 1745 | defer line_list.deinit(allocator); 1746 | var char_list: std.ArrayListUnmanaged(u8) = .empty; 1747 | defer char_list.deinit(allocator); 1748 | 1749 | var i: u8 = 1; 1750 | while (i < n) : (i += 1) { 1751 | try tmp_array_list.append(allocator, &.{ i, '\n' }); 1752 | try line_list.appendSlice(allocator, &.{ i, '\n' }); 1753 | try char_list.append(allocator, i); 1754 | } 1755 | try testing.expectEqual(@as(usize, n - 1), tmp_array_list.items.len); // Test initialization fail #1 1756 | try testing.expectEqual(@as(usize, n - 1), char_list.items.len); // Test initialization fail #2 1757 | try tmp_array_list.insert(allocator, 0, ""); 1758 | result = try diffLinesToChars(allocator, line_list.items, ""); 1759 | defer result.deinit(allocator); 1760 | // TODO: This isn't equal, should it be? 1761 | // try testing.expectEqualSlices(u8, char_list.items, result.chars_1); 1762 | try testing.expectEqualStrings("", result.chars_2); 1763 | // TODO this is wrong because of the max_value I think? 1764 | // try testing.expectEqualDeep(tmp_array_list.items, result.line_array.items); 1765 | } 1766 | 1767 | fn testDiffCharsToLines( 1768 | allocator: std.mem.Allocator, 1769 | params: struct { 1770 | diffs: []const Diff, 1771 | line_array: []const []const u8, 1772 | expected: []const Diff, 1773 | }, 1774 | ) !void { 1775 | var char_diffs: DiffList = try .initCapacity(allocator, params.diffs.len); 1776 | defer deinitDiffList(allocator, &char_diffs); 1777 | 1778 | for (params.diffs) |item| { 1779 | char_diffs.appendAssumeCapacity(.{ .operation = item.operation, .text = try allocator.dupe(u8, item.text) }); 1780 | } 1781 | 1782 | var diffs = try diffCharsToLines(allocator, &char_diffs, params.line_array); 1783 | defer deinitDiffList(allocator, &diffs); 1784 | 1785 | try testing.expectEqualDeep(params.expected, diffs.items); 1786 | } 1787 | 1788 | test diffCharsToLines { 1789 | // Convert chars up to lines. 1790 | var diff_list: DiffList = .empty; 1791 | defer deinitDiffList(testing.allocator, &diff_list); 1792 | try diff_list.ensureTotalCapacity(testing.allocator, 2); 1793 | diff_list.appendSliceAssumeCapacity(&.{ 1794 | .{ .operation = .equal, .text = try testing.allocator.dupe(u8, "\u{0001}\u{0002}\u{0001}") }, 1795 | .{ .operation = .insert, .text = try testing.allocator.dupe(u8, "\u{0002}\u{0001}\u{0002}") }, 1796 | }); 1797 | try checkAllAllocationFailures(testing.allocator, testDiffCharsToLines, .{.{ 1798 | .diffs = diff_list.items, 1799 | .line_array = &[_][]const u8{ 1800 | "", 1801 | "alpha\n", 1802 | "beta\n", 1803 | }, 1804 | .expected = &.{ 1805 | .{ .operation = .equal, .text = "alpha\nbeta\nalpha\n" }, 1806 | .{ .operation = .insert, .text = "beta\nalpha\nbeta\n" }, 1807 | }, 1808 | }}); 1809 | 1810 | // TODO: Implement exhaustive tests 1811 | } 1812 | 1813 | fn testDiffCleanupMerge(allocator: std.mem.Allocator, params: struct { 1814 | input: []const Diff, 1815 | expected: []const Diff, 1816 | }) !void { 1817 | var diffs: DiffList = try .initCapacity(allocator, params.input.len); 1818 | defer deinitDiffList(allocator, &diffs); 1819 | 1820 | for (params.input) |item| { 1821 | diffs.appendAssumeCapacity(.{ .operation = item.operation, .text = try allocator.dupe(u8, item.text) }); 1822 | } 1823 | 1824 | try diffCleanupMerge(allocator, &diffs); 1825 | 1826 | try testing.expectEqualDeep(params.expected, diffs.items); 1827 | } 1828 | 1829 | test diffCleanupMerge { 1830 | // Cleanup a messy diff. 1831 | 1832 | // No change case 1833 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{ 1834 | .input = &.{ 1835 | .{ .operation = .equal, .text = "a" }, 1836 | .{ .operation = .delete, .text = "b" }, 1837 | .{ .operation = .insert, .text = "c" }, 1838 | }, 1839 | .expected = &.{ 1840 | .{ .operation = .equal, .text = "a" }, 1841 | .{ .operation = .delete, .text = "b" }, 1842 | .{ .operation = .insert, .text = "c" }, 1843 | }, 1844 | }}); 1845 | 1846 | // Merge equalities 1847 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{ 1848 | .input = &.{ 1849 | .{ .operation = .equal, .text = "a" }, 1850 | .{ .operation = .equal, .text = "b" }, 1851 | .{ .operation = .equal, .text = "c" }, 1852 | }, 1853 | .expected = &.{ 1854 | .{ .operation = .equal, .text = "abc" }, 1855 | }, 1856 | }}); 1857 | 1858 | // Merge deletions 1859 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{ 1860 | .input = &.{ 1861 | .{ .operation = .delete, .text = "a" }, 1862 | .{ .operation = .delete, .text = "b" }, 1863 | .{ .operation = .delete, .text = "c" }, 1864 | }, 1865 | .expected = &.{ 1866 | .{ .operation = .delete, .text = "abc" }, 1867 | }, 1868 | }}); 1869 | 1870 | // Merge insertions 1871 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{ 1872 | .input = &.{ 1873 | .{ .operation = .insert, .text = "a" }, 1874 | .{ .operation = .insert, .text = "b" }, 1875 | .{ .operation = .insert, .text = "c" }, 1876 | }, 1877 | .expected = &.{ 1878 | .{ .operation = .insert, .text = "abc" }, 1879 | }, 1880 | }}); 1881 | 1882 | // Merge interweave 1883 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{ 1884 | .input = &.{ 1885 | .{ .operation = .delete, .text = "a" }, 1886 | .{ .operation = .insert, .text = "b" }, 1887 | .{ .operation = .delete, .text = "c" }, 1888 | .{ .operation = .insert, .text = "d" }, 1889 | .{ .operation = .equal, .text = "e" }, 1890 | .{ .operation = .equal, .text = "f" }, 1891 | }, 1892 | .expected = &.{ 1893 | .{ .operation = .delete, .text = "ac" }, 1894 | .{ .operation = .insert, .text = "bd" }, 1895 | .{ .operation = .equal, .text = "ef" }, 1896 | }, 1897 | }}); 1898 | 1899 | // Prefix and suffix detection 1900 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{ 1901 | .input = &.{ 1902 | .{ .operation = .delete, .text = "a" }, 1903 | .{ .operation = .insert, .text = "abc" }, 1904 | .{ .operation = .delete, .text = "dc" }, 1905 | }, 1906 | .expected = &.{ 1907 | .{ .operation = .equal, .text = "a" }, 1908 | .{ .operation = .delete, .text = "d" }, 1909 | .{ .operation = .insert, .text = "b" }, 1910 | .{ .operation = .equal, .text = "c" }, 1911 | }, 1912 | }}); 1913 | 1914 | // Prefix and suffix detection with equalities 1915 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{ 1916 | .input = &.{ 1917 | .{ .operation = .equal, .text = "x" }, 1918 | .{ .operation = .delete, .text = "a" }, 1919 | .{ .operation = .insert, .text = "abc" }, 1920 | .{ .operation = .delete, .text = "dc" }, 1921 | .{ .operation = .equal, .text = "y" }, 1922 | }, 1923 | .expected = &.{ 1924 | .{ .operation = .equal, .text = "xa" }, 1925 | .{ .operation = .delete, .text = "d" }, 1926 | .{ .operation = .insert, .text = "b" }, 1927 | .{ .operation = .equal, .text = "cy" }, 1928 | }, 1929 | }}); 1930 | 1931 | // Slide edit left 1932 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{ 1933 | .input = &.{ 1934 | .{ .operation = .equal, .text = "a" }, 1935 | .{ .operation = .insert, .text = "ba" }, 1936 | .{ .operation = .equal, .text = "c" }, 1937 | }, 1938 | .expected = &.{ 1939 | .{ .operation = .insert, .text = "ab" }, 1940 | .{ .operation = .equal, .text = "ac" }, 1941 | }, 1942 | }}); 1943 | 1944 | // Slide edit right 1945 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{ 1946 | .input = &.{ 1947 | .{ .operation = .equal, .text = "c" }, 1948 | .{ .operation = .insert, .text = "ab" }, 1949 | .{ .operation = .equal, .text = "a" }, 1950 | }, 1951 | .expected = &.{ 1952 | .{ .operation = .equal, .text = "ca" }, 1953 | .{ .operation = .insert, .text = "ba" }, 1954 | }, 1955 | }}); 1956 | 1957 | // Slide edit left recursive 1958 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{ 1959 | .input = &.{ 1960 | .{ .operation = .equal, .text = "a" }, 1961 | .{ .operation = .delete, .text = "b" }, 1962 | .{ .operation = .equal, .text = "c" }, 1963 | .{ .operation = .delete, .text = "ac" }, 1964 | .{ .operation = .equal, .text = "x" }, 1965 | }, 1966 | .expected = &.{ 1967 | .{ .operation = .delete, .text = "abc" }, 1968 | .{ .operation = .equal, .text = "acx" }, 1969 | }, 1970 | }}); 1971 | 1972 | // Slide edit right recursive 1973 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{ 1974 | .input = &.{ 1975 | .{ .operation = .equal, .text = "x" }, 1976 | .{ .operation = .delete, .text = "ca" }, 1977 | .{ .operation = .equal, .text = "c" }, 1978 | .{ .operation = .delete, .text = "b" }, 1979 | .{ .operation = .equal, .text = "a" }, 1980 | }, 1981 | .expected = &.{ 1982 | .{ .operation = .equal, .text = "xca" }, 1983 | .{ .operation = .delete, .text = "cba" }, 1984 | }, 1985 | }}); 1986 | 1987 | // Empty merge 1988 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{ 1989 | .input = &.{ 1990 | .{ .operation = .delete, .text = "b" }, 1991 | .{ .operation = .insert, .text = "ab" }, 1992 | .{ .operation = .equal, .text = "c" }, 1993 | }, 1994 | .expected = &.{ 1995 | .{ .operation = .insert, .text = "a" }, 1996 | .{ .operation = .equal, .text = "bc" }, 1997 | }, 1998 | }}); 1999 | 2000 | // Empty equality 2001 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{ 2002 | .input = &.{ 2003 | .{ .operation = .equal, .text = "" }, 2004 | .{ .operation = .insert, .text = "a" }, 2005 | .{ .operation = .equal, .text = "b" }, 2006 | }, 2007 | .expected = &.{ 2008 | .{ .operation = .insert, .text = "a" }, 2009 | .{ .operation = .equal, .text = "b" }, 2010 | }, 2011 | }}); 2012 | } 2013 | 2014 | fn testDiffCleanupSemanticLossless( 2015 | allocator: std.mem.Allocator, 2016 | params: struct { 2017 | input: []const Diff, 2018 | expected: []const Diff, 2019 | }, 2020 | ) !void { 2021 | var diffs: DiffList = try .initCapacity(allocator, params.input.len); 2022 | defer deinitDiffList(allocator, &diffs); 2023 | 2024 | for (params.input) |item| { 2025 | diffs.appendAssumeCapacity(.{ .operation = item.operation, .text = try allocator.dupe(u8, item.text) }); 2026 | } 2027 | 2028 | try diffCleanupSemanticLossless(allocator, &diffs); 2029 | 2030 | try testing.expectEqualDeep(params.expected, diffs.items); 2031 | } 2032 | 2033 | fn sliceToDiffList(allocator: Allocator, diff_slice: []const Diff) !DiffList { 2034 | var diff_list: DiffList = .empty; 2035 | errdefer deinitDiffList(allocator, &diff_list); 2036 | try diff_list.ensureTotalCapacity(allocator, diff_slice.len); 2037 | for (diff_slice) |d| { 2038 | diff_list.appendAssumeCapacity(.{ 2039 | .operation = d.operation, 2040 | .text = try allocator.dupe(u8, d.text), 2041 | }); 2042 | } 2043 | return diff_list; 2044 | } 2045 | 2046 | test diffCleanupSemanticLossless { 2047 | // Null case 2048 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{ 2049 | .input = &[_]Diff{}, 2050 | .expected = &[_]Diff{}, 2051 | }}); 2052 | 2053 | //defer deinitDiffList(allocator, &diffs); 2054 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{ 2055 | .input = &.{ 2056 | .{ .operation = .equal, .text = "AAA\r\n\r\nBBB" }, 2057 | .{ .operation = .insert, .text = "\r\nDDD\r\n\r\nBBB" }, 2058 | .{ .operation = .equal, .text = "\r\nEEE" }, 2059 | }, 2060 | .expected = &.{ 2061 | .{ .operation = .equal, .text = "AAA\r\n\r\n" }, 2062 | .{ .operation = .insert, .text = "BBB\r\nDDD\r\n\r\n" }, 2063 | .{ .operation = .equal, .text = "BBB\r\nEEE" }, 2064 | }, 2065 | }}); 2066 | 2067 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{ 2068 | .input = &.{ 2069 | .{ .operation = .equal, .text = "AAA\r\nBBB" }, 2070 | .{ .operation = .insert, .text = " DDD\r\nBBB" }, 2071 | .{ .operation = .equal, .text = " EEE" }, 2072 | }, 2073 | .expected = &.{ 2074 | .{ .operation = .equal, .text = "AAA\r\n" }, 2075 | .{ .operation = .insert, .text = "BBB DDD\r\n" }, 2076 | .{ .operation = .equal, .text = "BBB EEE" }, 2077 | }, 2078 | }}); 2079 | 2080 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{ 2081 | .input = &.{ 2082 | .{ .operation = .equal, .text = "The c" }, 2083 | .{ .operation = .insert, .text = "ow and the c" }, 2084 | .{ .operation = .equal, .text = "at." }, 2085 | }, 2086 | .expected = &.{ 2087 | .{ .operation = .equal, .text = "The " }, 2088 | .{ .operation = .insert, .text = "cow and the " }, 2089 | .{ .operation = .equal, .text = "cat." }, 2090 | }, 2091 | }}); 2092 | 2093 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{ 2094 | .input = &.{ 2095 | .{ .operation = .equal, .text = "The-c" }, 2096 | .{ .operation = .insert, .text = "ow-and-the-c" }, 2097 | .{ .operation = .equal, .text = "at." }, 2098 | }, 2099 | .expected = &.{ 2100 | .{ .operation = .equal, .text = "The-" }, 2101 | .{ .operation = .insert, .text = "cow-and-the-" }, 2102 | .{ .operation = .equal, .text = "cat." }, 2103 | }, 2104 | }}); 2105 | 2106 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{ 2107 | .input = &.{ 2108 | .{ .operation = .equal, .text = "a" }, 2109 | .{ .operation = .delete, .text = "a" }, 2110 | .{ .operation = .equal, .text = "ax" }, 2111 | }, 2112 | .expected = &.{ 2113 | .{ .operation = .delete, .text = "a" }, 2114 | .{ .operation = .equal, .text = "aax" }, 2115 | }, 2116 | }}); 2117 | 2118 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{ 2119 | .input = &.{ 2120 | .{ .operation = .equal, .text = "xa" }, 2121 | .{ .operation = .delete, .text = "a" }, 2122 | .{ .operation = .equal, .text = "a" }, 2123 | }, 2124 | .expected = &.{ 2125 | .{ .operation = .equal, .text = "xaa" }, 2126 | .{ .operation = .delete, .text = "a" }, 2127 | }, 2128 | }}); 2129 | 2130 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{ 2131 | .input = &.{ 2132 | .{ .operation = .equal, .text = "The xxx. The " }, 2133 | .{ .operation = .insert, .text = "zzz. The " }, 2134 | .{ .operation = .equal, .text = "yyy." }, 2135 | }, 2136 | .expected = &.{ 2137 | .{ .operation = .equal, .text = "The xxx." }, 2138 | .{ .operation = .insert, .text = " The zzz." }, 2139 | .{ .operation = .equal, .text = " The yyy." }, 2140 | }, 2141 | }}); 2142 | } 2143 | 2144 | fn rebuildtexts(allocator: std.mem.Allocator, diffs: DiffList) ![2][]const u8 { 2145 | var text: [2]std.ArrayListUnmanaged(u8) = .{ .empty, .empty }; 2146 | errdefer { 2147 | text[0].deinit(allocator); 2148 | text[1].deinit(allocator); 2149 | } 2150 | 2151 | for (diffs.items) |myDiff| { 2152 | if (myDiff.operation != .insert) { 2153 | try text[0].appendSlice(allocator, myDiff.text); 2154 | } 2155 | if (myDiff.operation != .delete) { 2156 | try text[1].appendSlice(allocator, myDiff.text); 2157 | } 2158 | } 2159 | 2160 | const first = try text[0].toOwnedSlice(allocator); 2161 | errdefer allocator.free(first); 2162 | 2163 | const second = try text[1].toOwnedSlice(allocator); 2164 | errdefer allocator.free(second); 2165 | 2166 | return .{ first, second }; 2167 | } 2168 | 2169 | fn testRebuildTexts(allocator: Allocator, diffs: DiffList, params: struct { 2170 | before: []const u8, 2171 | after: []const u8, 2172 | }) !void { 2173 | const texts = try rebuildtexts(allocator, diffs); 2174 | defer { 2175 | allocator.free(texts[0]); 2176 | allocator.free(texts[1]); 2177 | } 2178 | try testing.expectEqualStrings(params.before, texts[0]); 2179 | try testing.expectEqualStrings(params.after, texts[1]); 2180 | } 2181 | 2182 | test rebuildtexts { 2183 | { 2184 | var diffs = try sliceToDiffList(testing.allocator, &.{ 2185 | .{ .operation = .insert, .text = "abcabc" }, 2186 | .{ .operation = .equal, .text = "defdef" }, 2187 | .{ .operation = .delete, .text = "ghighi" }, 2188 | }); 2189 | defer deinitDiffList(testing.allocator, &diffs); 2190 | try checkAllAllocationFailures(testing.allocator, testRebuildTexts, .{ 2191 | diffs, 2192 | .{ 2193 | .before = "defdefghighi", 2194 | .after = "abcabcdefdef", 2195 | }, 2196 | }); 2197 | } 2198 | { 2199 | var diffs = try sliceToDiffList(testing.allocator, &.{ 2200 | .{ .operation = .insert, .text = "xxx" }, 2201 | .{ .operation = .delete, .text = "yyy" }, 2202 | }); 2203 | defer deinitDiffList(testing.allocator, &diffs); 2204 | try checkAllAllocationFailures(testing.allocator, testRebuildTexts, .{ 2205 | diffs, 2206 | .{ 2207 | .before = "yyy", 2208 | .after = "xxx", 2209 | }, 2210 | }); 2211 | } 2212 | { 2213 | var diffs = try sliceToDiffList(testing.allocator, &.{ 2214 | .{ .operation = .equal, .text = "xyz" }, 2215 | .{ .operation = .equal, .text = "pdq" }, 2216 | }); 2217 | defer deinitDiffList(testing.allocator, &diffs); 2218 | try checkAllAllocationFailures(testing.allocator, testRebuildTexts, .{ 2219 | diffs, 2220 | .{ 2221 | .before = "xyzpdq", 2222 | .after = "xyzpdq", 2223 | }, 2224 | }); 2225 | } 2226 | } 2227 | 2228 | fn testDiffBisect( 2229 | allocator: std.mem.Allocator, 2230 | params: struct { 2231 | dmp: DiffMatchPatch, 2232 | before: []const u8, 2233 | after: []const u8, 2234 | deadline: u64, 2235 | expected: []const Diff, 2236 | }, 2237 | ) !void { 2238 | var diffs = try params.dmp.diffBisect(allocator, params.before, params.after, params.deadline); 2239 | defer deinitDiffList(allocator, &diffs); 2240 | try testing.expectEqualDeep(params.expected, diffs.items); 2241 | } 2242 | 2243 | test diffBisect { 2244 | const this: DiffMatchPatch = .{ .diff_timeout = 0 }; 2245 | 2246 | const a = "cat"; 2247 | const b = "map"; 2248 | 2249 | // Normal 2250 | try checkAllAllocationFailures(testing.allocator, testDiffBisect, .{.{ 2251 | .dmp = this, 2252 | .before = a, 2253 | .after = b, 2254 | .deadline = std.math.maxInt(u64), // Travis TODO not sure if maxInt(u64) is correct for DateTime.MaxValue 2255 | .expected = &.{ 2256 | .{ .operation = .delete, .text = "c" }, 2257 | .{ .operation = .insert, .text = "m" }, 2258 | .{ .operation = .equal, .text = "a" }, 2259 | .{ .operation = .delete, .text = "t" }, 2260 | .{ .operation = .insert, .text = "p" }, 2261 | }, 2262 | }}); 2263 | 2264 | // Timeout 2265 | try checkAllAllocationFailures(testing.allocator, testDiffBisect, .{.{ 2266 | .dmp = this, 2267 | .before = a, 2268 | .after = b, 2269 | .deadline = 0, // Travis TODO not sure if 0 is correct for DateTime.MinValue 2270 | .expected = &.{ 2271 | .{ .operation = .delete, .text = "cat" }, 2272 | .{ .operation = .insert, .text = "map" }, 2273 | }, 2274 | }}); 2275 | } 2276 | 2277 | fn diffHalfMatchLeak(allocator: Allocator) !void { 2278 | const dmp: DiffMatchPatch = .default; 2279 | const text1 = "The quick brown fox jumps over the lazy dog."; 2280 | const text2 = "That quick brown fox jumped over a lazy dog."; 2281 | var diffs = try dmp.diff(allocator, text2, text1, true); 2282 | deinitDiffList(allocator, &diffs); 2283 | } 2284 | 2285 | test "diffHalfMatch leak regression test" { 2286 | try checkAllAllocationFailures(testing.allocator, diffHalfMatchLeak, .{}); 2287 | } 2288 | 2289 | fn testDiff( 2290 | allocator: std.mem.Allocator, 2291 | params: struct { 2292 | dmp: DiffMatchPatch, 2293 | before: []const u8, 2294 | after: []const u8, 2295 | check_lines: bool, 2296 | expected: []const Diff, 2297 | }, 2298 | ) !void { 2299 | var diffs = try params.dmp.diff(allocator, params.before, params.after, params.check_lines); 2300 | defer deinitDiffList(allocator, &diffs); 2301 | try testing.expectEqualDeep(params.expected, diffs.items); 2302 | } 2303 | 2304 | test diff { 2305 | const this: DiffMatchPatch = .{ .diff_timeout = 0 }; 2306 | 2307 | // Null case. 2308 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{ 2309 | .dmp = this, 2310 | .before = "", 2311 | .after = "", 2312 | .check_lines = false, 2313 | .expected = &[_]Diff{}, 2314 | }}); 2315 | 2316 | // Equality. 2317 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{ 2318 | .dmp = this, 2319 | .before = "abc", 2320 | .after = "abc", 2321 | .check_lines = false, 2322 | .expected = &.{ 2323 | .{ .operation = .equal, .text = "abc" }, 2324 | }, 2325 | }}); 2326 | 2327 | // Simple insertion. 2328 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{ 2329 | .dmp = this, 2330 | .before = "abc", 2331 | .after = "ab123c", 2332 | .check_lines = false, 2333 | .expected = &.{ 2334 | .{ .operation = .equal, .text = "ab" }, 2335 | .{ .operation = .insert, .text = "123" }, 2336 | .{ .operation = .equal, .text = "c" }, 2337 | }, 2338 | }}); 2339 | 2340 | // Simple deletion. 2341 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{ 2342 | .dmp = this, 2343 | .before = "a123bc", 2344 | .after = "abc", 2345 | .check_lines = false, 2346 | .expected = &.{ 2347 | .{ .operation = .equal, .text = "a" }, 2348 | .{ .operation = .delete, .text = "123" }, 2349 | .{ .operation = .equal, .text = "bc" }, 2350 | }, 2351 | }}); 2352 | 2353 | // Two insertions. 2354 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{ 2355 | .dmp = this, 2356 | .before = "abc", 2357 | .after = "a123b456c", 2358 | .check_lines = false, 2359 | .expected = &.{ 2360 | .{ .operation = .equal, .text = "a" }, 2361 | .{ .operation = .insert, .text = "123" }, 2362 | .{ .operation = .equal, .text = "b" }, 2363 | .{ .operation = .insert, .text = "456" }, 2364 | .{ .operation = .equal, .text = "c" }, 2365 | }, 2366 | }}); 2367 | 2368 | // Two deletions. 2369 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{ 2370 | .dmp = this, 2371 | .before = "a123b456c", 2372 | .after = "abc", 2373 | .check_lines = false, 2374 | .expected = &.{ 2375 | .{ .operation = .equal, .text = "a" }, 2376 | .{ .operation = .delete, .text = "123" }, 2377 | .{ .operation = .equal, .text = "b" }, 2378 | .{ .operation = .delete, .text = "456" }, 2379 | .{ .operation = .equal, .text = "c" }, 2380 | }, 2381 | }}); 2382 | 2383 | // Simple case #1 2384 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{ 2385 | .dmp = this, 2386 | .before = "a", 2387 | .after = "b", 2388 | .check_lines = false, 2389 | .expected = &.{ 2390 | .{ .operation = .delete, .text = "a" }, 2391 | .{ .operation = .insert, .text = "b" }, 2392 | }, 2393 | }}); 2394 | 2395 | // Simple case #2 2396 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{ 2397 | .dmp = this, 2398 | .before = "Apples are a fruit.", 2399 | .after = "Bananas are also fruit.", 2400 | .check_lines = false, 2401 | .expected = &.{ 2402 | .{ .operation = .delete, .text = "Apple" }, 2403 | .{ .operation = .insert, .text = "Banana" }, 2404 | .{ .operation = .equal, .text = "s are a" }, 2405 | .{ .operation = .insert, .text = "lso" }, 2406 | .{ .operation = .equal, .text = " fruit." }, 2407 | }, 2408 | }}); 2409 | 2410 | // Simple case #3 2411 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{ 2412 | .dmp = this, 2413 | .before = "ax\t", 2414 | .after = "\u{0680}x\x00", 2415 | .check_lines = false, 2416 | .expected = &.{ 2417 | .{ .operation = .delete, .text = "a" }, 2418 | .{ .operation = .insert, .text = "\u{0680}" }, 2419 | .{ .operation = .equal, .text = "x" }, 2420 | .{ .operation = .delete, .text = "\t" }, 2421 | .{ .operation = .insert, .text = "\x00" }, 2422 | }, 2423 | }}); 2424 | 2425 | // Overlap #1 2426 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{ 2427 | .dmp = this, 2428 | .before = "1ayb2", 2429 | .after = "abxab", 2430 | .check_lines = false, 2431 | .expected = &.{ 2432 | .{ .operation = .delete, .text = "1" }, 2433 | .{ .operation = .equal, .text = "a" }, 2434 | .{ .operation = .delete, .text = "y" }, 2435 | .{ .operation = .equal, .text = "b" }, 2436 | .{ .operation = .delete, .text = "2" }, 2437 | .{ .operation = .insert, .text = "xab" }, 2438 | }, 2439 | }}); 2440 | 2441 | // Overlap #2 2442 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{ 2443 | .dmp = this, 2444 | .before = "abcy", 2445 | .after = "xaxcxabc", 2446 | .check_lines = false, 2447 | .expected = &.{ 2448 | .{ .operation = .insert, .text = "xaxcx" }, 2449 | .{ .operation = .equal, .text = "abc" }, 2450 | .{ .operation = .delete, .text = "y" }, 2451 | }, 2452 | }}); 2453 | 2454 | // Overlap #3 2455 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{ 2456 | .dmp = this, 2457 | .before = "ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", 2458 | .after = "a-bcd-efghijklmnopqrs", 2459 | .check_lines = false, 2460 | .expected = &.{ 2461 | .{ .operation = .delete, .text = "ABCD" }, 2462 | .{ .operation = .equal, .text = "a" }, 2463 | .{ .operation = .delete, .text = "=" }, 2464 | .{ .operation = .insert, .text = "-" }, 2465 | .{ .operation = .equal, .text = "bcd" }, 2466 | .{ .operation = .delete, .text = "=" }, 2467 | .{ .operation = .insert, .text = "-" }, 2468 | .{ .operation = .equal, .text = "efghijklmnopqrs" }, 2469 | .{ .operation = .delete, .text = "EFGHIJKLMNOefg" }, 2470 | }, 2471 | }}); 2472 | 2473 | // Large equality 2474 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{ 2475 | .dmp = this, 2476 | .before = "a [[Pennsylvania]] and [[New", 2477 | .after = " and [[Pennsylvania]]", 2478 | .check_lines = false, 2479 | .expected = &.{ 2480 | .{ .operation = .insert, .text = " " }, 2481 | .{ .operation = .equal, .text = "a" }, 2482 | .{ .operation = .insert, .text = "nd" }, 2483 | .{ .operation = .equal, .text = " [[Pennsylvania]]" }, 2484 | .{ .operation = .delete, .text = " and [[New" }, 2485 | }, 2486 | }}); 2487 | 2488 | const allocator = testing.allocator; 2489 | // TODO these tests should be checked for allocation failure 2490 | 2491 | // Increase the text lengths by 1024 times to ensure a timeout. 2492 | { 2493 | const a = "`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n" ** 1024; 2494 | const b = "I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n" ** 1024; 2495 | 2496 | const with_timout: DiffMatchPatch = .{ 2497 | .diff_timeout = 100, // 100ms 2498 | }; 2499 | 2500 | const start_time = std.time.milliTimestamp(); 2501 | { 2502 | var time_diff = try with_timout.diff(allocator, a, b, false); 2503 | defer deinitDiffList(allocator, &time_diff); 2504 | } 2505 | const end_time = std.time.milliTimestamp(); 2506 | 2507 | // Test that we took at least the timeout period. 2508 | try testing.expect(with_timout.diff_timeout <= end_time - start_time); // diff: Timeout min. 2509 | // Test that we didn't take forever (be forgiving). 2510 | // Theoretically this test could fail very occasionally if the 2511 | // OS task swaps or locks up for a second at the wrong moment. 2512 | try testing.expect((with_timout.diff_timeout) * 10000 * 2 > end_time - start_time); // diff: Timeout max. 2513 | } 2514 | 2515 | { 2516 | // Test the linemode speedup. 2517 | // Must be long to pass the 100 char cutoff. 2518 | const a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; 2519 | const b = "abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n"; 2520 | 2521 | var diff_checked = try this.diff(allocator, a, b, true); 2522 | defer deinitDiffList(allocator, &diff_checked); 2523 | 2524 | var diff_unchecked = try this.diff(allocator, a, b, false); 2525 | defer deinitDiffList(allocator, &diff_unchecked); 2526 | 2527 | try testing.expectEqualDeep(diff_checked.items, diff_unchecked.items); // diff: Simple line-mode. 2528 | } 2529 | 2530 | { 2531 | const a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"; 2532 | const b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; 2533 | 2534 | var diff_checked = try this.diff(allocator, a, b, true); 2535 | defer deinitDiffList(allocator, &diff_checked); 2536 | 2537 | var diff_unchecked = try this.diff(allocator, a, b, false); 2538 | defer deinitDiffList(allocator, &diff_unchecked); 2539 | 2540 | try testing.expectEqualDeep(diff_checked.items, diff_unchecked.items); // diff: Single line-mode. 2541 | } 2542 | 2543 | { 2544 | // diff: Overlap line-mode. 2545 | const a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; 2546 | const b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n"; 2547 | 2548 | var diffs_linemode = try this.diff(allocator, a, b, true); 2549 | defer deinitDiffList(allocator, &diffs_linemode); 2550 | 2551 | const texts_linemode = try rebuildtexts(allocator, diffs_linemode); 2552 | defer { 2553 | allocator.free(texts_linemode[0]); 2554 | allocator.free(texts_linemode[1]); 2555 | } 2556 | 2557 | var diffs_textmode = try this.diff(allocator, a, b, false); 2558 | defer deinitDiffList(allocator, &diffs_textmode); 2559 | 2560 | const texts_textmode = try rebuildtexts(allocator, diffs_textmode); 2561 | defer { 2562 | allocator.free(texts_textmode[0]); 2563 | allocator.free(texts_textmode[1]); 2564 | } 2565 | 2566 | try testing.expectEqualStrings(texts_textmode[0], texts_linemode[0]); 2567 | try testing.expectEqualStrings(texts_textmode[1], texts_linemode[1]); 2568 | } 2569 | } 2570 | 2571 | fn testDiffLineMode( 2572 | allocator: Allocator, 2573 | dmp: *DiffMatchPatch, 2574 | before: []const u8, 2575 | after: []const u8, 2576 | ) !void { 2577 | dmp.diff_check_lines_over = 20; 2578 | var diff_checked = try dmp.diff(allocator, before, after, true); 2579 | defer deinitDiffList(allocator, &diff_checked); 2580 | 2581 | var diff_unchecked = try dmp.diff(allocator, before, after, false); 2582 | defer deinitDiffList(allocator, &diff_unchecked); 2583 | 2584 | try testing.expectEqualDeep(diff_checked.items, diff_unchecked.items); // diff: Simple line-mode. 2585 | dmp.diff_check_lines_over = 100; 2586 | } 2587 | 2588 | test "diffLineMode" { 2589 | var dmp: DiffMatchPatch = .{ .diff_timeout = 0 }; 2590 | try checkAllAllocationFailures( 2591 | testing.allocator, 2592 | testDiffLineMode, 2593 | 2594 | .{ 2595 | &dmp, 2596 | "1234567890\n1234567890\n1234567890\n", 2597 | "abcdefghij\nabcdefghij\nabcdefghij\n", 2598 | }, 2599 | ); 2600 | } 2601 | 2602 | fn testDiffCleanupSemantic( 2603 | allocator: std.mem.Allocator, 2604 | params: struct { 2605 | input: []const Diff, 2606 | expected: []const Diff, 2607 | }, 2608 | ) !void { 2609 | var diffs: DiffList = try .initCapacity(allocator, params.input.len); 2610 | defer deinitDiffList(allocator, &diffs); 2611 | 2612 | for (params.input) |item| { 2613 | diffs.appendAssumeCapacity(.{ .operation = item.operation, .text = try allocator.dupe(u8, item.text) }); 2614 | } 2615 | 2616 | try diffCleanupSemantic(allocator, &diffs); 2617 | 2618 | try testing.expectEqualDeep(params.expected, diffs.items); 2619 | } 2620 | 2621 | test diffCleanupSemantic { 2622 | // Null case. 2623 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{ 2624 | .input = &[_]Diff{}, 2625 | .expected = &[_]Diff{}, 2626 | }}); 2627 | 2628 | // No elimination #1 2629 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{ 2630 | .input = &.{ 2631 | .{ .operation = .delete, .text = "ab" }, 2632 | .{ .operation = .insert, .text = "cd" }, 2633 | .{ .operation = .equal, .text = "12" }, 2634 | .{ .operation = .delete, .text = "e" }, 2635 | }, 2636 | .expected = &.{ 2637 | .{ .operation = .delete, .text = "ab" }, 2638 | .{ .operation = .insert, .text = "cd" }, 2639 | .{ .operation = .equal, .text = "12" }, 2640 | .{ .operation = .delete, .text = "e" }, 2641 | }, 2642 | }}); 2643 | 2644 | // No elimination #2 2645 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{ 2646 | .input = &.{ 2647 | .{ .operation = .delete, .text = "abc" }, 2648 | .{ .operation = .insert, .text = "ABC" }, 2649 | .{ .operation = .equal, .text = "1234" }, 2650 | .{ .operation = .delete, .text = "wxyz" }, 2651 | }, 2652 | .expected = &.{ 2653 | .{ .operation = .delete, .text = "abc" }, 2654 | .{ .operation = .insert, .text = "ABC" }, 2655 | .{ .operation = .equal, .text = "1234" }, 2656 | .{ .operation = .delete, .text = "wxyz" }, 2657 | }, 2658 | }}); 2659 | 2660 | // Simple elimination 2661 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{ 2662 | .input = &.{ 2663 | .{ .operation = .delete, .text = "a" }, 2664 | .{ .operation = .equal, .text = "b" }, 2665 | .{ .operation = .delete, .text = "c" }, 2666 | }, 2667 | .expected = &.{ 2668 | .{ .operation = .delete, .text = "abc" }, 2669 | .{ .operation = .insert, .text = "b" }, 2670 | }, 2671 | }}); 2672 | 2673 | // Backpass elimination 2674 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{ 2675 | .input = &.{ 2676 | .{ .operation = .delete, .text = "ab" }, 2677 | .{ .operation = .equal, .text = "cd" }, 2678 | .{ .operation = .delete, .text = "e" }, 2679 | .{ .operation = .equal, .text = "f" }, 2680 | .{ .operation = .insert, .text = "g" }, 2681 | }, 2682 | .expected = &.{ 2683 | .{ .operation = .delete, .text = "abcdef" }, 2684 | .{ .operation = .insert, .text = "cdfg" }, 2685 | }, 2686 | }}); 2687 | 2688 | // Multiple elimination 2689 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{ 2690 | .input = &.{ 2691 | .{ .operation = .insert, .text = "1" }, 2692 | .{ .operation = .equal, .text = "A" }, 2693 | .{ .operation = .delete, .text = "B" }, 2694 | .{ .operation = .insert, .text = "2" }, 2695 | .{ .operation = .equal, .text = "_" }, 2696 | .{ .operation = .insert, .text = "1" }, 2697 | .{ .operation = .equal, .text = "A" }, 2698 | .{ .operation = .delete, .text = "B" }, 2699 | .{ .operation = .insert, .text = "2" }, 2700 | }, 2701 | .expected = &.{ 2702 | .{ .operation = .delete, .text = "AB_AB" }, 2703 | .{ .operation = .insert, .text = "1A2_1A2" }, 2704 | }, 2705 | }}); 2706 | 2707 | // Word boundaries 2708 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{ 2709 | .input = &.{ 2710 | .{ .operation = .equal, .text = "The c" }, 2711 | .{ .operation = .delete, .text = "ow and the c" }, 2712 | .{ .operation = .equal, .text = "at." }, 2713 | }, 2714 | .expected = &.{ 2715 | .{ .operation = .equal, .text = "The " }, 2716 | .{ .operation = .delete, .text = "cow and the " }, 2717 | .{ .operation = .equal, .text = "cat." }, 2718 | }, 2719 | }}); 2720 | 2721 | // No overlap elimination 2722 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{ 2723 | .input = &.{ 2724 | .{ .operation = .delete, .text = "abcxx" }, 2725 | .{ .operation = .insert, .text = "xxdef" }, 2726 | }, 2727 | .expected = &.{ 2728 | .{ .operation = .delete, .text = "abcxx" }, 2729 | .{ .operation = .insert, .text = "xxdef" }, 2730 | }, 2731 | }}); 2732 | 2733 | // Overlap elimination 2734 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{ 2735 | .input = &.{ 2736 | .{ .operation = .delete, .text = "abcxxx" }, 2737 | .{ .operation = .insert, .text = "xxxdef" }, 2738 | }, 2739 | .expected = &.{ 2740 | .{ .operation = .delete, .text = "abc" }, 2741 | .{ .operation = .equal, .text = "xxx" }, 2742 | .{ .operation = .insert, .text = "def" }, 2743 | }, 2744 | }}); 2745 | 2746 | // Reverse overlap elimination 2747 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{ 2748 | .input = &.{ 2749 | .{ .operation = .delete, .text = "xxxabc" }, 2750 | .{ .operation = .insert, .text = "defxxx" }, 2751 | }, 2752 | .expected = &.{ 2753 | .{ .operation = .insert, .text = "def" }, 2754 | .{ .operation = .equal, .text = "xxx" }, 2755 | .{ .operation = .delete, .text = "abc" }, 2756 | }, 2757 | }}); 2758 | 2759 | // Two overlap eliminations 2760 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{ 2761 | .input = &.{ 2762 | .{ .operation = .delete, .text = "abcd1212" }, 2763 | .{ .operation = .insert, .text = "1212efghi" }, 2764 | .{ .operation = .equal, .text = "----" }, 2765 | .{ .operation = .delete, .text = "A3" }, 2766 | .{ .operation = .insert, .text = "3BC" }, 2767 | }, 2768 | .expected = &.{ 2769 | .{ .operation = .delete, .text = "abcd" }, 2770 | .{ .operation = .equal, .text = "1212" }, 2771 | .{ .operation = .insert, .text = "efghi" }, 2772 | .{ .operation = .equal, .text = "----" }, 2773 | .{ .operation = .delete, .text = "A" }, 2774 | .{ .operation = .equal, .text = "3" }, 2775 | .{ .operation = .insert, .text = "BC" }, 2776 | }, 2777 | }}); 2778 | } 2779 | 2780 | fn testDiffCleanupEfficiency( 2781 | allocator: Allocator, 2782 | dmp: DiffMatchPatch, 2783 | params: struct { 2784 | input: []const Diff, 2785 | expected: []const Diff, 2786 | }, 2787 | ) !void { 2788 | var diffs: DiffList = try .initCapacity(allocator, params.input.len); 2789 | defer deinitDiffList(allocator, &diffs); 2790 | for (params.input) |item| { 2791 | diffs.appendAssumeCapacity(.{ .operation = item.operation, .text = try allocator.dupe(u8, item.text) }); 2792 | } 2793 | try dmp.diffCleanupEfficiency(allocator, &diffs); 2794 | 2795 | try testing.expectEqualDeep(params.expected, diffs.items); 2796 | } 2797 | 2798 | test "diffCleanupEfficiency" { 2799 | const allocator = testing.allocator; 2800 | var dmp: DiffMatchPatch = .default; 2801 | dmp.diff_edit_cost = 4; 2802 | { // Null case. 2803 | var diffs: DiffList = .empty; 2804 | try dmp.diffCleanupEfficiency(allocator, &diffs); 2805 | try testing.expectEqualDeep(DiffList.empty, diffs); 2806 | } 2807 | { // No elimination. 2808 | const dslice: []const Diff = &.{ 2809 | .{ .operation = .delete, .text = "ab" }, 2810 | .{ .operation = .insert, .text = "12" }, 2811 | .{ .operation = .equal, .text = "wxyz" }, 2812 | .{ .operation = .delete, .text = "cd" }, 2813 | .{ .operation = .insert, .text = "34" }, 2814 | }; 2815 | try checkAllAllocationFailures( 2816 | testing.allocator, 2817 | testDiffCleanupEfficiency, 2818 | .{ 2819 | dmp, 2820 | .{ .input = dslice, .expected = dslice }, 2821 | }, 2822 | ); 2823 | } 2824 | { // Four-edit elimination. 2825 | const dslice: []const Diff = &.{ 2826 | .{ .operation = .delete, .text = "ab" }, 2827 | .{ .operation = .insert, .text = "12" }, 2828 | .{ .operation = .equal, .text = "xyz" }, 2829 | .{ .operation = .delete, .text = "cd" }, 2830 | .{ .operation = .insert, .text = "34" }, 2831 | }; 2832 | const d_after: []const Diff = &.{ 2833 | .{ .operation = .delete, .text = "abxyzcd" }, 2834 | .{ .operation = .insert, .text = "12xyz34" }, 2835 | }; 2836 | try checkAllAllocationFailures( 2837 | testing.allocator, 2838 | testDiffCleanupEfficiency, 2839 | .{ 2840 | dmp, 2841 | .{ .input = dslice, .expected = d_after }, 2842 | }, 2843 | ); 2844 | } 2845 | { // Three-edit elimination. 2846 | const dslice: []const Diff = &.{ 2847 | .{ .operation = .insert, .text = "12" }, 2848 | .{ .operation = .equal, .text = "x" }, 2849 | .{ .operation = .delete, .text = "cd" }, 2850 | .{ .operation = .insert, .text = "34" }, 2851 | }; 2852 | const d_after: []const Diff = &.{ 2853 | .{ .operation = .delete, .text = "xcd" }, 2854 | .{ .operation = .insert, .text = "12x34" }, 2855 | }; 2856 | try checkAllAllocationFailures( 2857 | testing.allocator, 2858 | testDiffCleanupEfficiency, 2859 | .{ 2860 | dmp, 2861 | .{ .input = dslice, .expected = d_after }, 2862 | }, 2863 | ); 2864 | } 2865 | { // Backpass elimination. 2866 | const dslice: []const Diff = &.{ 2867 | .{ .operation = .delete, .text = "ab" }, 2868 | .{ .operation = .insert, .text = "12" }, 2869 | .{ .operation = .equal, .text = "xy" }, 2870 | .{ .operation = .insert, .text = "34" }, 2871 | .{ .operation = .equal, .text = "z" }, 2872 | .{ .operation = .delete, .text = "cd" }, 2873 | .{ .operation = .insert, .text = "56" }, 2874 | }; 2875 | const d_after: []const Diff = &.{ 2876 | .{ .operation = .delete, .text = "abxyzcd" }, 2877 | .{ .operation = .insert, .text = "12xy34z56" }, 2878 | }; 2879 | try checkAllAllocationFailures( 2880 | testing.allocator, 2881 | testDiffCleanupEfficiency, 2882 | .{ 2883 | dmp, 2884 | .{ .input = dslice, .expected = d_after }, 2885 | }, 2886 | ); 2887 | } 2888 | { // High cost elimination. 2889 | dmp.diff_edit_cost = 5; 2890 | const dslice: []const Diff = &.{ 2891 | .{ .operation = .delete, .text = "ab" }, 2892 | .{ .operation = .insert, .text = "12" }, 2893 | .{ .operation = .equal, .text = "wxyz" }, 2894 | .{ .operation = .delete, .text = "cd" }, 2895 | .{ .operation = .insert, .text = "34" }, 2896 | }; 2897 | const d_after: []const Diff = &.{ 2898 | .{ .operation = .delete, .text = "abwxyzcd" }, 2899 | .{ .operation = .insert, .text = "12wxyz34" }, 2900 | }; 2901 | try checkAllAllocationFailures( 2902 | testing.allocator, 2903 | testDiffCleanupEfficiency, 2904 | .{ 2905 | dmp, 2906 | .{ .input = dslice, .expected = d_after }, 2907 | }, 2908 | ); 2909 | dmp.diff_edit_cost = 4; 2910 | } 2911 | } 2912 | 2913 | /// https://github.com/ziglang/zig/pull/23042/files 2914 | fn checkAllAllocationFailures( 2915 | backing_allocator: std.mem.Allocator, 2916 | comptime test_fn: anytype, 2917 | extra_args: CheckAllAllocationFailuresTuples(@TypeOf(test_fn)).ExtraArgsTuple, 2918 | ) !void { 2919 | return std.testing.checkAllAllocationFailures(backing_allocator, test_fn, extra_args); 2920 | } 2921 | 2922 | fn CheckAllAllocationFailuresTuples(comptime TestFn: type) struct { 2923 | /// `std.meta.ArgsTuple(TestFn)` 2924 | ArgsTuple: type, 2925 | /// `std.meta.ArgsTuple(TestFn)` without the first argument 2926 | ExtraArgsTuple: type, 2927 | } { 2928 | switch (@typeInfo(@typeInfo(TestFn).@"fn".return_type.?)) { 2929 | .error_union => |info| { 2930 | if (info.payload != void) { 2931 | @compileError("Return type must be !void"); 2932 | } 2933 | }, 2934 | else => @compileError("Return type must be !void"), 2935 | } 2936 | 2937 | const ArgsTuple = std.meta.ArgsTuple(TestFn); 2938 | 2939 | const fn_args_fields = std.meta.fields(ArgsTuple); 2940 | if (fn_args_fields.len == 0 or fn_args_fields[0].type != std.mem.Allocator) { 2941 | @compileError("The provided function must have an " ++ @typeName(std.mem.Allocator) ++ " as its first argument"); 2942 | } 2943 | 2944 | // remove the first tuple field (`std.mem.Allocator`) 2945 | var extra_args_tuple_info = @typeInfo(ArgsTuple); 2946 | var extra_args_fields = extra_args_tuple_info.@"struct".fields[1..].*; 2947 | for (&extra_args_fields, 0..) |*extra_field, i| { 2948 | extra_field.name = fn_args_fields[i].name; 2949 | } 2950 | extra_args_tuple_info.@"struct".fields = &extra_args_fields; 2951 | const ExtraArgsTuple = @Type(extra_args_tuple_info); 2952 | 2953 | return .{ 2954 | .ArgsTuple = ArgsTuple, 2955 | .ExtraArgsTuple = ExtraArgsTuple, 2956 | }; 2957 | } 2958 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 diffz authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # diffz 2 | 3 | An implementation of Google's diff-match-patch. 4 | 5 | Currently implemented: 6 | 7 | - [x] Diff 8 | - [ ] Match 9 | - [ ] Patch 10 | 11 | ## Installation 12 | 13 | > [!NOTE] 14 | > The minimum supported Zig version is `0.14.0`. 15 | 16 | ```bash 17 | # Initialize a `zig build` project if you haven't already 18 | zig init 19 | # Add the `diffz` package to your `build.zig.zon` 20 | zig fetch --save git+https://github.com/ziglibs/diffz.git 21 | ``` 22 | 23 | You can then import `diffz` in your `build.zig` with: 24 | 25 | ```zig 26 | const diffz = b.dependency("diffz", .{}); 27 | const exe = b.addExecutable(...); 28 | exe.root_module.addImport("diffz", diffz.module("diffz")); 29 | ``` 30 | 31 | ## License 32 | 33 | This library is based off of https://github.com/google/diff-match-patch, which is licensed under the [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0). This library itself is licensed under the MIT License, see `LICENSE`. 34 | -------------------------------------------------------------------------------- /build.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const builtin = @import("builtin"); 3 | 4 | const minimum_zig_version = std.SemanticVersion.parse("0.14.0") catch unreachable; 5 | 6 | pub fn build(b: *std.Build) void { 7 | if (comptime (builtin.zig_version.order(minimum_zig_version) == .lt)) { 8 | @compileError(std.fmt.comptimePrint( 9 | \\Your Zig version does not meet the minimum build requirement: 10 | \\ required Zig version: {[minimum_zig_version]} 11 | \\ actual Zig version: {[current_version]} 12 | \\ 13 | , .{ 14 | .current_version = builtin.zig_version, 15 | .minimum_zig_version = minimum_zig_version, 16 | })); 17 | } 18 | 19 | const target = b.standardTargetOptions(.{}); 20 | const optimize = b.standardOptimizeOption(.{}); 21 | 22 | const diffz_module = b.addModule("diffz", .{ 23 | .root_source_file = b.path("DiffMatchPatch.zig"), 24 | .target = target, 25 | .optimize = optimize, 26 | }); 27 | 28 | const tests = b.addTest(.{ .root_module = diffz_module }); 29 | const run_tests = b.addRunArtifact(tests); 30 | 31 | const test_step = b.step("test", "Run all the tests"); 32 | test_step.dependOn(&run_tests.step); 33 | 34 | const kcov_bin = b.findProgram(&.{"kcov"}, &.{}) catch "kcov"; 35 | 36 | const run_kcov = b.addSystemCommand(&.{ 37 | kcov_bin, 38 | "--clean", 39 | "--exclude-line=unreachable,expect(false)", 40 | }); 41 | run_kcov.addPrefixedDirectoryArg("--include-pattern=", b.path(".")); 42 | const coverage_output = run_kcov.addOutputDirectoryArg("."); 43 | run_kcov.addArtifactArg(tests); 44 | run_kcov.enableTestRunnerMode(); 45 | 46 | const install_coverage = b.addInstallDirectory(.{ 47 | .source_dir = coverage_output, 48 | .install_dir = .{ .custom = "coverage" }, 49 | .install_subdir = "", 50 | }); 51 | 52 | const coverage_step = b.step("coverage", "Generate coverage (kcov must be installed)"); 53 | coverage_step.dependOn(&install_coverage.step); 54 | } 55 | -------------------------------------------------------------------------------- /build.zig.zon: -------------------------------------------------------------------------------- 1 | .{ 2 | .name = .diffz, 3 | .version = "0.0.1", 4 | .minimum_zig_version = "0.14.0", 5 | .paths = .{ 6 | "DiffMatchPatch.zig", 7 | "LICENSE", 8 | "README.md", 9 | "build.zig.zon", 10 | "build.zig", 11 | }, 12 | .fingerprint = 0x23937d8821656b1b, // Changing this has security and trust implications. 13 | } 14 | --------------------------------------------------------------------------------