├── .gitattributes
├── .github
└── workflows
│ └── main.yml
├── .gitignore
├── DiffMatchPatch.zig
├── LICENSE
├── README.md
├── build.zig
└── build.zig.zon
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.zig text=auto eol=lf
2 | *.zon text=auto eol=lf
3 |
--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 | branches:
9 | - main
10 | workflow_dispatch:
11 |
12 | jobs:
13 | build:
14 | strategy:
15 | fail-fast: false
16 | matrix:
17 | zig-version: [master]
18 | os: [ubuntu-latest, macos-latest, windows-latest]
19 | include:
20 | - zig-version: "0.14.0"
21 | os: ubuntu-latest
22 | runs-on: ${{ matrix.os }}
23 | steps:
24 | - name: Checkout
25 | uses: actions/checkout@v4
26 |
27 | - name: Setup Zig
28 | uses: mlugg/setup-zig@v1
29 | with:
30 | version: ${{ matrix.zig-version }}
31 |
32 | - name: Check Formatting
33 | run: zig fmt --ast-check --check .
34 |
35 | - name: Run Tests
36 | run: zig build test --summary all
37 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .zig-cache
2 | zig-cache
3 | zig-out
4 |
--------------------------------------------------------------------------------
/DiffMatchPatch.zig:
--------------------------------------------------------------------------------
1 | const DiffMatchPatch = @This();
2 |
3 | const std = @import("std");
4 | const testing = std.testing;
5 | const Allocator = std.mem.Allocator;
6 |
7 | /// DMP with default configuration options
8 | pub const default: DiffMatchPatch = .{};
9 |
10 | pub const Diff = struct {
11 | pub const Operation = enum {
12 | insert,
13 | delete,
14 | equal,
15 | };
16 |
17 | operation: Operation,
18 | text: []const u8,
19 |
20 | pub fn format(value: Diff, _: anytype, _: anytype, writer: anytype) !void {
21 | try writer.print("({s}, \"{s}\")", .{
22 | switch (value.operation) {
23 | .equal => "=",
24 | .insert => "+",
25 | .delete => "-",
26 | },
27 | value.text,
28 | });
29 | }
30 |
31 | pub fn eql(a: Diff, b: Diff) bool {
32 | return a.operation == b.operation and std.mem.eql(u8, a.text, b.text);
33 | }
34 |
35 | test eql {
36 | const equal_a: Diff = .{ .operation = .equal, .text = "a" };
37 | const insert_a: Diff = .{ .operation = .insert, .text = "a" };
38 | const equal_b: Diff = .{ .operation = .equal, .text = "b" };
39 | const delete_b: Diff = .{ .operation = .delete, .text = "b" };
40 |
41 | try testing.expect(equal_a.eql(equal_a));
42 | try testing.expect(!insert_a.eql(equal_a));
43 | try testing.expect(!equal_a.eql(equal_b));
44 | try testing.expect(!equal_a.eql(delete_b));
45 | }
46 | };
47 |
48 | /// Number of milliseconds to map a diff before giving up (0 for infinity).
49 | diff_timeout: u64 = 1000,
50 | /// Cost of an empty edit operation in terms of edit characters.
51 | diff_edit_cost: u16 = 4,
52 | /// Number of bytes in each string needed to trigger a line-based diff
53 | diff_check_lines_over: u64 = 100,
54 |
55 | /// At what point is no match declared (0.0 = perfection, 1.0 = very loose).
56 | match_threshold: f32 = 0.5,
57 | /// How far to search for a match (0 = exact location, 1000+ = broad match).
58 | /// A match this many characters away from the expected location will add
59 | /// 1.0 to the score (0.0 is a perfect match).
60 | match_distance: u32 = 1000,
61 | /// The number of bits in an int.
62 | match_max_bits: u16 = 32,
63 |
64 | /// When deleting a large block of text (over ~64 characters), how close
65 | /// do the contents have to be to match the expected contents. (0.0 =
66 | /// perfection, 1.0 = very loose). Note that Match_Threshold controls
67 | /// how closely the end points of a delete need to match.
68 | patch_delete_threshold: f32 = 0.5,
69 | /// Chunk size for context length.
70 | patch_margin: u16 = 4,
71 |
72 | pub const DiffError = error{OutOfMemory};
73 |
74 | /// Find the differences between two texts. The return value
75 | /// must be freed with `deinitDiffList(allocator, &diffs)`.
76 | /// @param before Old string to be diffed.
77 | /// @param after New string to be diffed.
78 | /// @param checklines Speedup flag. If false, then don't run a
79 | /// line-level diff first to identify the changed areas.
80 | /// If true, then run a faster slightly less optimal diff.
81 | /// @return List of Diff objects.
82 | pub fn diff(
83 | dmp: DiffMatchPatch,
84 | allocator: std.mem.Allocator,
85 | before: []const u8,
86 | after: []const u8,
87 | /// If false, then don't run a line-level diff first
88 | /// to identify the changed areas. If true, then run
89 | /// a faster slightly less optimal diff.
90 | check_lines: bool,
91 | ) DiffError!DiffList {
92 | const deadline = if (dmp.diff_timeout == 0)
93 | std.math.maxInt(u64)
94 | else
95 | @as(u64, @intCast(std.time.milliTimestamp())) + dmp.diff_timeout;
96 | return dmp.diffInternal(allocator, before, after, check_lines, deadline);
97 | }
98 |
99 | const DiffList = std.ArrayListUnmanaged(Diff);
100 |
101 | /// Deinit an `std.ArrayListUnmanaged(Diff)` and the allocated slices of
102 | /// text in each `Diff`.
103 | pub fn deinitDiffList(allocator: Allocator, diffs: *DiffList) void {
104 | defer diffs.deinit(allocator);
105 | for (diffs.items) |d| {
106 | allocator.free(d.text);
107 | }
108 | }
109 |
110 | fn freeRangeDiffList(
111 | allocator: Allocator,
112 | diffs: *DiffList,
113 | start: usize,
114 | len: usize,
115 | ) void {
116 | const range = diffs.items[start..][0..len];
117 | for (range) |d| {
118 | allocator.free(d.text);
119 | }
120 | }
121 |
122 | fn diffInternal(
123 | dmp: DiffMatchPatch,
124 | allocator: std.mem.Allocator,
125 | before: []const u8,
126 | after: []const u8,
127 | check_lines: bool,
128 | deadline: u64,
129 | ) DiffError!DiffList {
130 | // Trim off common prefix (speedup).
131 | const common_prefix_length = std.mem.indexOfDiff(u8, before, after) orelse {
132 | // equality
133 | var diffs: DiffList = .empty;
134 | errdefer deinitDiffList(allocator, &diffs);
135 | if (before.len != 0) {
136 | try diffs.ensureUnusedCapacity(allocator, 1);
137 | diffs.appendAssumeCapacity(.{
138 | .operation = .equal,
139 | .text = try allocator.dupe(u8, before),
140 | });
141 | }
142 | return diffs;
143 | };
144 |
145 | const common_prefix = before[0..common_prefix_length];
146 | var trimmed_before = before[common_prefix_length..];
147 | var trimmed_after = after[common_prefix_length..];
148 |
149 | // Trim off common suffix (speedup).
150 | const common_suffix_length = diffCommonSuffix(trimmed_before, trimmed_after);
151 | const common_suffix = trimmed_before[trimmed_before.len - common_suffix_length ..];
152 | trimmed_before = trimmed_before[0 .. trimmed_before.len - common_suffix_length];
153 | trimmed_after = trimmed_after[0 .. trimmed_after.len - common_suffix_length];
154 |
155 | // Compute the diff on the middle block.
156 | var diffs = try dmp.diffCompute(allocator, trimmed_before, trimmed_after, check_lines, deadline);
157 | errdefer deinitDiffList(allocator, &diffs);
158 |
159 | // Restore the prefix and suffix.
160 |
161 | if (common_prefix.len != 0) {
162 | try diffs.ensureUnusedCapacity(allocator, 1);
163 | diffs.insertAssumeCapacity(0, .{
164 | .operation = .equal,
165 | .text = try allocator.dupe(u8, common_prefix),
166 | });
167 | }
168 | if (common_suffix.len != 0) {
169 | try diffs.ensureUnusedCapacity(allocator, 1);
170 | diffs.appendAssumeCapacity(.{
171 | .operation = .equal,
172 | .text = try allocator.dupe(u8, common_suffix),
173 | });
174 | }
175 |
176 | try diffCleanupMerge(allocator, &diffs);
177 | return diffs;
178 | }
179 |
180 | fn indexOfDiff(comptime T: type, a: []const T, b: []const T) ?usize {
181 | const shortest = @min(a.len, b.len);
182 | for (a[0..shortest], b[0..shortest], 0..) |a_char, b_char, index| {
183 | if (a_char != b_char) return index;
184 | }
185 | return if (a.len == b.len) null else shortest;
186 | }
187 |
188 | fn diffCommonPrefix(before: []const u8, after: []const u8) usize {
189 | return indexOfDiff(u8, before, after) orelse @min(before.len, after.len);
190 | }
191 |
192 | fn diffCommonSuffix(before: []const u8, after: []const u8) usize {
193 | const n = @min(before.len, after.len);
194 | var i: usize = 1;
195 |
196 | while (i <= n) : (i += 1) {
197 | if (before[before.len - i] != after[after.len - i]) {
198 | return i - 1;
199 | }
200 | }
201 |
202 | return n;
203 | }
204 |
205 | /// Find the differences between two texts. Assumes that the texts do not
206 | /// have any common prefix or suffix.
207 | /// @param before Old string to be diffed.
208 | /// @param after New string to be diffed.
209 | /// @param checklines Speedup flag. If false, then don't run a
210 | /// line-level diff first to identify the changed areas.
211 | /// If true, then run a faster slightly less optimal diff.
212 | /// @param deadline Time when the diff should be complete by.
213 | /// @return List of Diff objects.
214 | fn diffCompute(
215 | dmp: DiffMatchPatch,
216 | allocator: std.mem.Allocator,
217 | before: []const u8,
218 | after: []const u8,
219 | check_lines: bool,
220 | deadline: u64,
221 | ) DiffError!DiffList {
222 | if (before.len == 0) {
223 | // Just add some text (speedup).
224 | var diffs: DiffList = .empty;
225 | errdefer deinitDiffList(allocator, &diffs);
226 | try diffs.ensureUnusedCapacity(allocator, 1);
227 | diffs.appendAssumeCapacity(.{
228 | .operation = .insert,
229 | .text = try allocator.dupe(u8, after),
230 | });
231 | return diffs;
232 | }
233 |
234 | if (after.len == 0) {
235 | // Just delete some text (speedup).
236 | var diffs: DiffList = .empty;
237 | errdefer deinitDiffList(allocator, &diffs);
238 | try diffs.ensureUnusedCapacity(allocator, 1);
239 | diffs.appendAssumeCapacity(.{
240 | .operation = .delete,
241 | .text = try allocator.dupe(u8, before),
242 | });
243 | return diffs;
244 | }
245 |
246 | const long_text = if (before.len > after.len) before else after;
247 | const short_text = if (before.len > after.len) after else before;
248 |
249 | if (std.mem.indexOf(u8, long_text, short_text)) |index| {
250 | var diffs: DiffList = .empty;
251 | errdefer deinitDiffList(allocator, &diffs);
252 | // Shorter text is inside the longer text (speedup).
253 | const op: Diff.Operation = if (before.len > after.len)
254 | .delete
255 | else
256 | .insert;
257 | try diffs.ensureUnusedCapacity(allocator, 3);
258 | diffs.appendAssumeCapacity(.{
259 | .operation = op,
260 | .text = try allocator.dupe(u8, long_text[0..index]),
261 | });
262 | diffs.appendAssumeCapacity(.{
263 | .operation = .equal,
264 | .text = try allocator.dupe(u8, short_text),
265 | });
266 | diffs.appendAssumeCapacity(.{
267 | .operation = op,
268 | .text = try allocator.dupe(u8, long_text[index + short_text.len ..]),
269 | });
270 | return diffs;
271 | }
272 |
273 | if (short_text.len == 1) {
274 | // Single character string.
275 | // After the previous speedup, the character can't be an equality.
276 | var diffs: DiffList = .empty;
277 | errdefer deinitDiffList(allocator, &diffs);
278 | try diffs.ensureUnusedCapacity(allocator, 2);
279 | diffs.appendAssumeCapacity(.{
280 | .operation = .delete,
281 | .text = try allocator.dupe(u8, before),
282 | });
283 | diffs.appendAssumeCapacity(.{
284 | .operation = .insert,
285 | .text = try allocator.dupe(u8, after),
286 | });
287 | return diffs;
288 | }
289 |
290 | // Check to see if the problem can be split in two.
291 | if (try dmp.diffHalfMatch(allocator, before, after)) |half_match| {
292 | // A half-match was found, sort out the return data.
293 | defer half_match.deinit(allocator);
294 | // Send both pairs off for separate processing.
295 | var diffs = try dmp.diffInternal(
296 | allocator,
297 | half_match.prefix_before,
298 | half_match.prefix_after,
299 | check_lines,
300 | deadline,
301 | );
302 | errdefer deinitDiffList(allocator, &diffs);
303 | var diffs_b = try dmp.diffInternal(
304 | allocator,
305 | half_match.suffix_before,
306 | half_match.suffix_after,
307 | check_lines,
308 | deadline,
309 | );
310 | defer diffs_b.deinit(allocator);
311 | // we have to deinit regardless, so deinitDiffList would be
312 | // a double free:
313 | errdefer {
314 | for (diffs_b.items) |d| {
315 | allocator.free(d.text);
316 | }
317 | }
318 |
319 | // Merge the results.
320 | try diffs.ensureUnusedCapacity(allocator, 1);
321 | diffs.appendAssumeCapacity(.{
322 | .operation = .equal,
323 | .text = try allocator.dupe(u8, half_match.common_middle),
324 | });
325 | try diffs.appendSlice(allocator, diffs_b.items);
326 | return diffs;
327 | }
328 | if (check_lines and before.len > dmp.diff_check_lines_over and after.len > dmp.diff_check_lines_over) {
329 | return dmp.diffLineMode(allocator, before, after, deadline);
330 | }
331 |
332 | return dmp.diffBisect(allocator, before, after, deadline);
333 | }
334 |
335 | const HalfMatchResult = struct {
336 | prefix_before: []const u8,
337 | suffix_before: []const u8,
338 | prefix_after: []const u8,
339 | suffix_after: []const u8,
340 | common_middle: []const u8,
341 |
342 | pub fn deinit(hmr: HalfMatchResult, alloc: Allocator) void {
343 | alloc.free(hmr.prefix_before);
344 | alloc.free(hmr.suffix_before);
345 | alloc.free(hmr.prefix_after);
346 | alloc.free(hmr.suffix_after);
347 | alloc.free(hmr.common_middle);
348 | }
349 | };
350 |
351 | /// Do the two texts share a Substring which is at least half the length of
352 | /// the longer text?
353 | /// This speedup can produce non-minimal diffs.
354 | /// @param before First string.
355 | /// @param after Second string.
356 | /// @return Five element String array, containing the prefix of text1, the
357 | /// suffix of text1, the prefix of text2, the suffix of text2 and the
358 | /// common middle. Or null if there was no match.
359 | fn diffHalfMatch(
360 | dmp: DiffMatchPatch,
361 | allocator: std.mem.Allocator,
362 | before: []const u8,
363 | after: []const u8,
364 | ) DiffError!?HalfMatchResult {
365 | if (dmp.diff_timeout <= 0) {
366 | // Don't risk returning a non-optimal diff if we have unlimited time.
367 | return null;
368 | }
369 | const long_text = if (before.len > after.len) before else after;
370 | const short_text = if (before.len > after.len) after else before;
371 |
372 | if (long_text.len < 4 or short_text.len * 2 < long_text.len) {
373 | return null; // Pointless.
374 | }
375 |
376 | // First check if the second quarter is the seed for a half-match.
377 | const half_match_1 = try dmp.diffHalfMatchInternal(allocator, long_text, short_text, (long_text.len + 3) / 4);
378 | errdefer {
379 | if (half_match_1) |h_m| h_m.deinit(allocator);
380 | }
381 | // Check again based on the third quarter.
382 | const half_match_2 = try dmp.diffHalfMatchInternal(allocator, long_text, short_text, (long_text.len + 1) / 2);
383 | errdefer {
384 | if (half_match_2) |h_m| h_m.deinit(allocator);
385 | }
386 |
387 | var half_match: ?HalfMatchResult = null;
388 | if (half_match_1 == null and half_match_2 == null) {
389 | return null;
390 | } else if (half_match_2 == null) {
391 | half_match = half_match_1.?;
392 | } else if (half_match_1 == null) {
393 | half_match = half_match_2.?;
394 | } else {
395 | // Both matched. Select the longest.
396 | half_match = half: {
397 | if (half_match_1.?.common_middle.len > half_match_2.?.common_middle.len) {
398 | half_match_2.?.deinit(allocator);
399 | break :half half_match_1;
400 | } else {
401 | half_match_1.?.deinit(allocator);
402 | break :half half_match_2;
403 | }
404 | };
405 | }
406 |
407 | // A half-match was found, sort out the return data.
408 | if (before.len > after.len) {
409 | return half_match.?;
410 | } else {
411 | // Transfers ownership of all memory to new, permuted, half_match.
412 | const half_match_yes = half_match.?;
413 | return .{
414 | .prefix_before = half_match_yes.prefix_after,
415 | .suffix_before = half_match_yes.suffix_after,
416 | .prefix_after = half_match_yes.prefix_before,
417 | .suffix_after = half_match_yes.suffix_before,
418 | .common_middle = half_match_yes.common_middle,
419 | };
420 | }
421 | }
422 |
423 | /// Does a Substring of shorttext exist within longtext such that the
424 | /// Substring is at least half the length of longtext?
425 | /// @param longtext Longer string.
426 | /// @param shorttext Shorter string.
427 | /// @param i Start index of quarter length Substring within longtext.
428 | /// @return Five element string array, containing the prefix of longtext, the
429 | /// suffix of longtext, the prefix of shorttext, the suffix of shorttext
430 | /// and the common middle. Or null if there was no match.
431 | fn diffHalfMatchInternal(
432 | _: DiffMatchPatch,
433 | allocator: std.mem.Allocator,
434 | long_text: []const u8,
435 | short_text: []const u8,
436 | i: usize,
437 | ) DiffError!?HalfMatchResult {
438 | // Start with a 1/4 length Substring at position i as a seed.
439 | const seed = long_text[i .. i + long_text.len / 4];
440 | var j: isize = -1;
441 |
442 | var best_common: std.ArrayListUnmanaged(u8) = .empty;
443 | defer best_common.deinit(allocator);
444 | var best_long_text_a: []const u8 = "";
445 | var best_long_text_b: []const u8 = "";
446 | var best_short_text_a: []const u8 = "";
447 | var best_short_text_b: []const u8 = "";
448 |
449 | while (j < short_text.len and b: {
450 | j = @as(isize, @intCast(std.mem.indexOf(u8, short_text[@intCast(j + 1)..], seed) orelse break :b false)) + j + 1;
451 | break :b true;
452 | }) {
453 | const prefix_length = diffCommonPrefix(long_text[i..], short_text[@intCast(j)..]);
454 | const suffix_length = diffCommonSuffix(long_text[0..i], short_text[0..@intCast(j)]);
455 | if (best_common.items.len < suffix_length + prefix_length) {
456 | best_common.clearRetainingCapacity();
457 | const a = short_text[@intCast(j - @as(isize, @intCast(suffix_length))) .. @as(usize, @intCast(j - @as(isize, @intCast(suffix_length)))) + suffix_length];
458 | try best_common.appendSlice(allocator, a);
459 | const b = short_text[@intCast(j) .. @as(usize, @intCast(j)) + prefix_length];
460 | try best_common.appendSlice(allocator, b);
461 |
462 | best_long_text_a = long_text[0 .. i - suffix_length];
463 | best_long_text_b = long_text[i + prefix_length ..];
464 | best_short_text_a = short_text[0..@intCast(j - @as(isize, @intCast(suffix_length)))];
465 | best_short_text_b = short_text[@intCast(j + @as(isize, @intCast(prefix_length)))..];
466 | }
467 | }
468 | if (best_common.items.len * 2 >= long_text.len) {
469 | const prefix_before = try allocator.dupe(u8, best_long_text_a);
470 | errdefer allocator.free(prefix_before);
471 | const suffix_before = try allocator.dupe(u8, best_long_text_b);
472 | errdefer allocator.free(suffix_before);
473 | const prefix_after = try allocator.dupe(u8, best_short_text_a);
474 | errdefer allocator.free(prefix_after);
475 | const suffix_after = try allocator.dupe(u8, best_short_text_b);
476 | errdefer allocator.free(suffix_after);
477 | const best_common_text = try best_common.toOwnedSlice(allocator);
478 | errdefer allocator.free(best_common_text);
479 | return .{
480 | .prefix_before = prefix_before,
481 | .suffix_before = suffix_before,
482 | .prefix_after = prefix_after,
483 | .suffix_after = suffix_after,
484 | .common_middle = best_common_text,
485 | };
486 | } else {
487 | return null;
488 | }
489 | }
490 |
491 | /// Find the 'middle snake' of a diff, split the problem in two
492 | /// and return the recursively constructed diff.
493 | /// See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
494 | /// @param before Old string to be diffed.
495 | /// @param after New string to be diffed.
496 | /// @param deadline Time at which to bail if not yet complete.
497 | /// @return List of Diff objects.
498 | fn diffBisect(
499 | dmp: DiffMatchPatch,
500 | allocator: std.mem.Allocator,
501 | before: []const u8,
502 | after: []const u8,
503 | deadline: u64,
504 | ) DiffError!DiffList {
505 | const before_length: isize = @intCast(before.len);
506 | const after_length: isize = @intCast(after.len);
507 | const max_d: isize = @intCast((before.len + after.len + 1) / 2);
508 | const v_offset = max_d;
509 | const v_length = 2 * max_d;
510 |
511 | var v1: std.ArrayListUnmanaged(isize) = try .initCapacity(allocator, @intCast(v_length));
512 | defer v1.deinit(allocator);
513 | v1.items.len = @intCast(v_length);
514 | var v2: std.ArrayListUnmanaged(isize) = try .initCapacity(allocator, @intCast(v_length));
515 | defer v2.deinit(allocator);
516 | v2.items.len = @intCast(v_length);
517 |
518 | var x: usize = 0;
519 | while (x < v_length) : (x += 1) {
520 | v1.items[x] = -1;
521 | v2.items[x] = -1;
522 | }
523 | v1.items[@intCast(v_offset + 1)] = 0;
524 | v2.items[@intCast(v_offset + 1)] = 0;
525 | const delta = before_length - after_length;
526 | // If the total number of characters is odd, then the front path will
527 | // collide with the reverse path.
528 | const front = (@mod(delta, 2) != 0);
529 | // Offsets for start and end of k loop.
530 | // Prevents mapping of space beyond the grid.
531 | var k1start: isize = 0;
532 | var k1end: isize = 0;
533 | var k2start: isize = 0;
534 | var k2end: isize = 0;
535 |
536 | var d: isize = 0;
537 | while (d < max_d) : (d += 1) {
538 | // Bail out if deadline is reached.
539 | if (@as(u64, @intCast(std.time.milliTimestamp())) > deadline) {
540 | break;
541 | }
542 |
543 | // Walk the front path one step.
544 | var k1 = -d + k1start;
545 | while (k1 <= d - k1end) : (k1 += 2) {
546 | const k1_offset = v_offset + k1;
547 | var x1: isize = 0;
548 | if (k1 == -d or (k1 != d and
549 | v1.items[@intCast(k1_offset - 1)] < v1.items[@intCast(k1_offset + 1)]))
550 | {
551 | x1 = v1.items[@intCast(k1_offset + 1)];
552 | } else {
553 | x1 = v1.items[@intCast(k1_offset - 1)] + 1;
554 | }
555 | var y1 = x1 - k1;
556 | while (x1 < before_length and
557 | y1 < after_length and before[@intCast(x1)] == after[@intCast(y1)])
558 | {
559 | x1 += 1;
560 | y1 += 1;
561 | }
562 | v1.items[@intCast(k1_offset)] = x1;
563 | if (x1 > before_length) {
564 | // Ran off the right of the graph.
565 | k1end += 2;
566 | } else if (y1 > after_length) {
567 | // Ran off the bottom of the graph.
568 | k1start += 2;
569 | } else if (front) {
570 | const k2_offset = v_offset + delta - k1;
571 | if (k2_offset >= 0 and k2_offset < v_length and v2.items[@intCast(k2_offset)] != -1) {
572 | // Mirror x2 onto top-left coordinate system.
573 | const x2 = before_length - v2.items[@intCast(k2_offset)];
574 | if (x1 >= x2) {
575 | // Overlap detected.
576 | return dmp.diffBisectSplit(allocator, before, after, x1, y1, deadline);
577 | }
578 | }
579 | }
580 | }
581 |
582 | // Walk the reverse path one step.
583 | var k2: isize = -d + k2start;
584 | while (k2 <= d - k2end) : (k2 += 2) {
585 | const k2_offset = v_offset + k2;
586 | var x2: isize = 0;
587 | if (k2 == -d or (k2 != d and
588 | v2.items[@intCast(k2_offset - 1)] < v2.items[@intCast(k2_offset + 1)]))
589 | {
590 | x2 = v2.items[@intCast(k2_offset + 1)];
591 | } else {
592 | x2 = v2.items[@intCast(k2_offset - 1)] + 1;
593 | }
594 | var y2: isize = x2 - k2;
595 | while (x2 < before_length and y2 < after_length and
596 | before[@intCast(before_length - x2 - 1)] ==
597 | after[@intCast(after_length - y2 - 1)])
598 | {
599 | x2 += 1;
600 | y2 += 1;
601 | }
602 | v2.items[@intCast(k2_offset)] = x2;
603 | if (x2 > before_length) {
604 | // Ran off the left of the graph.
605 | k2end += 2;
606 | } else if (y2 > after_length) {
607 | // Ran off the top of the graph.
608 | k2start += 2;
609 | } else if (!front) {
610 | const k1_offset = v_offset + delta - k2;
611 | if (k1_offset >= 0 and k1_offset < v_length and v1.items[@intCast(k1_offset)] != -1) {
612 | const x1 = v1.items[@intCast(k1_offset)];
613 | const y1 = v_offset + x1 - k1_offset;
614 | // Mirror x2 onto top-left coordinate system.
615 | x2 = before_length - v2.items[@intCast(k2_offset)];
616 | if (x1 >= x2) {
617 | // Overlap detected.
618 | return dmp.diffBisectSplit(allocator, before, after, x1, y1, deadline);
619 | }
620 | }
621 | }
622 | }
623 | }
624 | // Diff took too long and hit the deadline or
625 | // number of diffs equals number of characters, no commonality at all.
626 | var diffs: DiffList = .empty;
627 | errdefer deinitDiffList(allocator, &diffs);
628 | try diffs.ensureUnusedCapacity(allocator, 2);
629 | diffs.appendAssumeCapacity(.{
630 | .operation = .delete,
631 | .text = try allocator.dupe(u8, before),
632 | });
633 | diffs.appendAssumeCapacity(.{
634 | .operation = .insert,
635 | .text = try allocator.dupe(u8, after),
636 | });
637 | return diffs;
638 | }
639 |
640 | /// Given the location of the 'middle snake', split the diff in two parts
641 | /// and recurse.
642 | /// @param text1 Old string to be diffed.
643 | /// @param text2 New string to be diffed.
644 | /// @param x Index of split point in text1.
645 | /// @param y Index of split point in text2.
646 | /// @param deadline Time at which to bail if not yet complete.
647 | /// @return LinkedList of Diff objects.
648 | fn diffBisectSplit(
649 | dmp: DiffMatchPatch,
650 | allocator: std.mem.Allocator,
651 | text1: []const u8,
652 | text2: []const u8,
653 | x: isize,
654 | y: isize,
655 | deadline: u64,
656 | ) DiffError!DiffList {
657 | const text1a = text1[0..@intCast(x)];
658 | const text2a = text2[0..@intCast(y)];
659 | const text1b = text1[@intCast(x)..];
660 | const text2b = text2[@intCast(y)..];
661 |
662 | // Compute both diffs serially.
663 | var diffs = try dmp.diffInternal(allocator, text1a, text2a, false, deadline);
664 | errdefer deinitDiffList(allocator, &diffs);
665 | var diffs_b = try dmp.diffInternal(allocator, text1b, text2b, false, deadline);
666 | // Free the list, but not the contents:
667 | defer diffs_b.deinit(allocator);
668 | errdefer {
669 | for (diffs_b.items) |d| {
670 | allocator.free(d.text);
671 | }
672 | }
673 | try diffs.appendSlice(allocator, diffs_b.items);
674 | return diffs;
675 | }
676 |
677 | /// Do a quick line-level diff on both strings, then rediff the parts for
678 | /// greater accuracy.
679 | /// This speedup can produce non-minimal diffs.
680 | /// @param text1 Old string to be diffed.
681 | /// @param text2 New string to be diffed.
682 | /// @param deadline Time when the diff should be complete by.
683 | /// @return List of Diff objects.
684 | fn diffLineMode(
685 | dmp: DiffMatchPatch,
686 | allocator: std.mem.Allocator,
687 | text1_in: []const u8,
688 | text2_in: []const u8,
689 | deadline: u64,
690 | ) DiffError!DiffList {
691 | // Scan the text on a line-by-line basis first.
692 | var a = try diffLinesToChars(allocator, text1_in, text2_in);
693 | defer a.deinit(allocator);
694 | const text1 = a.chars_1;
695 | const text2 = a.chars_2;
696 | const line_array = a.line_array;
697 | var diffs: DiffList = undefined;
698 | {
699 | var char_diffs: DiffList = try dmp.diffInternal(allocator, text1, text2, false, deadline);
700 | defer deinitDiffList(allocator, &char_diffs);
701 | // Convert the diff back to original text.
702 | diffs = try diffCharsToLines(allocator, &char_diffs, line_array.items);
703 | // Eliminate freak matches (e.g. blank lines)
704 | }
705 | errdefer deinitDiffList(allocator, &diffs);
706 | try diffCleanupSemantic(allocator, &diffs);
707 |
708 | // Rediff any replacement blocks, this time character-by-character.
709 | // Add a dummy entry at the end.
710 | try diffs.append(allocator, .{ .operation = .equal, .text = "" });
711 |
712 | var pointer: usize = 0;
713 | var count_delete: usize = 0;
714 | var count_insert: usize = 0;
715 | var text_delete: std.ArrayListUnmanaged(u8) = .empty;
716 | var text_insert: std.ArrayListUnmanaged(u8) = .empty;
717 | defer {
718 | text_delete.deinit(allocator);
719 | text_insert.deinit(allocator);
720 | }
721 |
722 | while (pointer < diffs.items.len) {
723 | switch (diffs.items[pointer].operation) {
724 | .insert => {
725 | count_insert += 1;
726 | try text_insert.appendSlice(allocator, diffs.items[pointer].text);
727 | },
728 | .delete => {
729 | count_delete += 1;
730 | try text_delete.appendSlice(allocator, diffs.items[pointer].text);
731 | },
732 | .equal => {
733 | // Upon reaching an equality, check for prior redundancies.
734 | if (count_delete >= 1 and count_insert >= 1) {
735 | // Delete the offending records and add the merged ones.
736 | freeRangeDiffList(
737 | allocator,
738 | &diffs,
739 | pointer - count_delete - count_insert,
740 | count_delete + count_insert,
741 | );
742 | diffs.replaceRangeAssumeCapacity(
743 | pointer - count_delete - count_insert,
744 | count_delete + count_insert,
745 | &.{},
746 | );
747 | pointer = pointer - count_delete - count_insert;
748 | var sub_diff = try dmp.diffInternal(allocator, text_delete.items, text_insert.items, false, deadline);
749 | {
750 | errdefer deinitDiffList(allocator, &sub_diff);
751 | try diffs.ensureUnusedCapacity(allocator, sub_diff.items.len);
752 | }
753 | defer sub_diff.deinit(allocator);
754 | const new_diff = diffs.addManyAtAssumeCapacity(pointer, sub_diff.items.len);
755 | @memcpy(new_diff, sub_diff.items);
756 | pointer = pointer + sub_diff.items.len;
757 | }
758 | count_insert = 0;
759 | count_delete = 0;
760 | text_delete.clearRetainingCapacity();
761 | text_insert.clearRetainingCapacity();
762 | },
763 | }
764 | pointer += 1;
765 | }
766 | diffs.items.len -= 1; // Remove the dummy entry at the end.
767 |
768 | return diffs;
769 | }
770 |
771 | const LinesToCharsResult = struct {
772 | chars_1: []const u8,
773 | chars_2: []const u8,
774 | line_array: std.ArrayListUnmanaged([]const u8),
775 |
776 | pub fn deinit(self: *LinesToCharsResult, allocator: Allocator) void {
777 | allocator.free(self.chars_1);
778 | allocator.free(self.chars_2);
779 | self.line_array.deinit(allocator);
780 | }
781 | };
782 |
783 | /// Split two texts into a list of strings. Reduce the texts to a string of
784 | /// hashes where each Unicode character represents one line.
785 | /// @param text1 First string.
786 | /// @param text2 Second string.
787 | /// @return Three element Object array, containing the encoded text1, the
788 | /// encoded text2 and the List of unique strings. The zeroth element
789 | /// of the List of unique strings is intentionally blank.
790 | fn diffLinesToChars(
791 | allocator: std.mem.Allocator,
792 | text1: []const u8,
793 | text2: []const u8,
794 | ) DiffError!LinesToCharsResult {
795 | var line_array: std.ArrayListUnmanaged([]const u8) = .empty;
796 | errdefer line_array.deinit(allocator);
797 | var line_hash: std.StringHashMapUnmanaged(usize) = .empty;
798 | defer line_hash.deinit(allocator);
799 | // e.g. line_array[4] == "Hello\n"
800 | // e.g. line_hash.get("Hello\n") == 4
801 |
802 | // "\x00" is a valid character, but various debuggers don't like it.
803 | // So we'll insert a junk entry to avoid generating a null character.
804 | try line_array.append(allocator, "");
805 |
806 | // Allocate 2/3rds of the space for text1, the rest for text2.
807 | const chars1 = try diffLinesToCharsMunge(allocator, text1, &line_array, &line_hash, 170);
808 | errdefer allocator.free(chars1);
809 | const chars2 = try diffLinesToCharsMunge(allocator, text2, &line_array, &line_hash, 255);
810 | return .{ .chars_1 = chars1, .chars_2 = chars2, .line_array = line_array };
811 | }
812 |
813 | /// Split a text into a list of strings. Reduce the texts to a string of
814 | /// hashes where each Unicode character represents one line.
815 | /// @param text String to encode.
816 | /// @param lineArray List of unique strings.
817 | /// @param lineHash Map of strings to indices.
818 | /// @param maxLines Maximum length of lineArray.
819 | /// @return Encoded string.
820 | fn diffLinesToCharsMunge(
821 | allocator: std.mem.Allocator,
822 | text: []const u8,
823 | line_array: *std.ArrayListUnmanaged([]const u8),
824 | line_hash: *std.StringHashMapUnmanaged(usize),
825 | max_lines: usize,
826 | ) DiffError![]const u8 {
827 | var line_start: isize = 0;
828 | var line_end: isize = -1;
829 | var chars: std.ArrayListUnmanaged(u8) = .empty;
830 | defer chars.deinit(allocator);
831 | // Walk the text, pulling out a Substring for each line.
832 | // TODO this can be handled with a Reader, avoiding all the manual splitting
833 | while (line_end < @as(isize, @intCast(text.len)) - 1) {
834 | line_end = b: {
835 | break :b @as(isize, @intCast(std.mem.indexOf(u8, text[@intCast(line_start)..], "\n") orelse
836 | break :b @intCast(text.len - 1))) + line_start;
837 | };
838 | var line = text[@intCast(line_start) .. @as(usize, @intCast(line_start)) + @as(usize, @intCast(line_end + 1 - line_start))];
839 |
840 | if (line_hash.get(line)) |value| {
841 | try chars.append(allocator, @intCast(value));
842 | } else {
843 | if (line_array.items.len == max_lines) {
844 | // Bail out at 255 because char 256 == char 0.
845 | line = text[@intCast(line_start)..];
846 | line_end = @intCast(text.len);
847 | }
848 | try line_array.append(allocator, line);
849 | try line_hash.put(allocator, line, line_array.items.len - 1);
850 | try chars.append(allocator, @intCast(line_array.items.len - 1));
851 | }
852 | line_start = line_end + 1;
853 | }
854 | return try chars.toOwnedSlice(allocator);
855 | }
856 |
857 | /// Rehydrate the text in a diff from a string of line hashes to real lines
858 | /// of text.
859 | /// @param diffs List of Diff objects.
860 | /// @param lineArray List of unique strings.
861 | fn diffCharsToLines(
862 | allocator: std.mem.Allocator,
863 | char_diffs: *DiffList,
864 | line_array: []const []const u8,
865 | ) DiffError!DiffList {
866 | var diffs: DiffList = .empty;
867 | errdefer deinitDiffList(allocator, &diffs);
868 | try diffs.ensureTotalCapacity(allocator, char_diffs.items.len);
869 | var text: std.ArrayListUnmanaged(u8) = .empty;
870 | defer text.deinit(allocator);
871 |
872 | for (char_diffs.items) |*d| {
873 | var j: usize = 0;
874 | while (j < d.text.len) : (j += 1) {
875 | try text.appendSlice(allocator, line_array[d.text[j]]);
876 | }
877 | diffs.appendAssumeCapacity(.{
878 | .operation = d.operation,
879 | .text = try text.toOwnedSlice(allocator),
880 | });
881 | }
882 | return diffs;
883 | }
884 |
885 | /// Reorder and merge like edit sections. Merge equalities.
886 | /// Any edit section can move as long as it doesn't cross an equality.
887 | /// @param diffs List of Diff objects.
888 | fn diffCleanupMerge(allocator: std.mem.Allocator, diffs: *DiffList) DiffError!void {
889 | // Add a dummy entry at the end.
890 | try diffs.append(allocator, .{ .operation = .equal, .text = "" });
891 | var pointer: usize = 0;
892 | var count_delete: usize = 0;
893 | var count_insert: usize = 0;
894 |
895 | var text_delete: std.ArrayListUnmanaged(u8) = .empty;
896 | defer text_delete.deinit(allocator);
897 |
898 | var text_insert: std.ArrayListUnmanaged(u8) = .empty;
899 | defer text_insert.deinit(allocator);
900 |
901 | var common_length: usize = undefined;
902 | while (pointer < diffs.items.len) {
903 | switch (diffs.items[pointer].operation) {
904 | .insert => {
905 | count_insert += 1;
906 | try text_insert.appendSlice(allocator, diffs.items[pointer].text);
907 | pointer += 1;
908 | },
909 | .delete => {
910 | count_delete += 1;
911 | try text_delete.appendSlice(allocator, diffs.items[pointer].text);
912 | pointer += 1;
913 | },
914 | .equal => {
915 | // Upon reaching an equality, check for prior redundancies.
916 | if (count_delete + count_insert > 1) {
917 | if (count_delete != 0 and count_insert != 0) {
918 | // Factor out any common prefixies.
919 | common_length = diffCommonPrefix(text_insert.items, text_delete.items);
920 | if (common_length != 0) {
921 | if ((pointer - count_delete - count_insert) > 0 and
922 | diffs.items[pointer - count_delete - count_insert - 1].operation == .equal)
923 | {
924 | const ii = pointer - count_delete - count_insert - 1;
925 | var nt = try allocator.alloc(u8, diffs.items[ii].text.len + common_length);
926 | const ot = diffs.items[ii].text;
927 | @memcpy(nt[0..ot.len], ot);
928 | @memcpy(nt[ot.len..], text_insert.items[0..common_length]);
929 | diffs.items[ii].text = nt;
930 | allocator.free(ot);
931 | } else {
932 | try diffs.ensureUnusedCapacity(allocator, 1);
933 | const text = try allocator.dupe(u8, text_insert.items[0..common_length]);
934 | diffs.insertAssumeCapacity(0, .{ .operation = .equal, .text = text });
935 | pointer += 1;
936 | }
937 | text_insert.replaceRangeAssumeCapacity(0, common_length, &.{});
938 | text_delete.replaceRangeAssumeCapacity(0, common_length, &.{});
939 | }
940 | // Factor out any common suffixies.
941 | // @ZigPort this seems very wrong
942 | common_length = diffCommonSuffix(text_insert.items, text_delete.items);
943 | if (common_length != 0) {
944 | const old_text = diffs.items[pointer].text;
945 | diffs.items[pointer].text = try std.mem.concat(allocator, u8, &.{
946 | text_insert.items[text_insert.items.len - common_length ..],
947 | old_text,
948 | });
949 | allocator.free(old_text);
950 | text_insert.items.len -= common_length;
951 | text_delete.items.len -= common_length;
952 | }
953 | }
954 | // Delete the offending records and add the merged ones.
955 | pointer -= count_delete + count_insert;
956 | if (count_delete + count_insert > 0) {
957 | freeRangeDiffList(allocator, diffs, pointer, count_delete + count_insert);
958 | diffs.replaceRangeAssumeCapacity(pointer, count_delete + count_insert, &.{});
959 | }
960 |
961 | if (text_delete.items.len != 0) {
962 | try diffs.ensureUnusedCapacity(allocator, 1);
963 | diffs.insertAssumeCapacity(pointer, .{
964 | .operation = .delete,
965 | .text = try allocator.dupe(u8, text_delete.items),
966 | });
967 | pointer += 1;
968 | }
969 | if (text_insert.items.len != 0) {
970 | try diffs.ensureUnusedCapacity(allocator, 1);
971 | diffs.insertAssumeCapacity(pointer, .{
972 | .operation = .insert,
973 | .text = try allocator.dupe(u8, text_insert.items),
974 | });
975 | pointer += 1;
976 | }
977 | pointer += 1;
978 | } else if (pointer != 0 and diffs.items[pointer - 1].operation == .equal) {
979 | // Merge this equality with the previous one.
980 | // TODO: Fix using realloc or smth
981 | // Note: can't use realloc because the text is const
982 | var nt = try allocator.alloc(u8, diffs.items[pointer - 1].text.len + diffs.items[pointer].text.len);
983 | const ot = diffs.items[pointer - 1].text;
984 | defer (allocator.free(ot));
985 | @memcpy(nt[0..ot.len], ot);
986 | @memcpy(nt[ot.len..], diffs.items[pointer].text);
987 | diffs.items[pointer - 1].text = nt;
988 | const dead_diff = diffs.orderedRemove(pointer);
989 | allocator.free(dead_diff.text);
990 | } else {
991 | pointer += 1;
992 | }
993 | count_insert = 0;
994 | count_delete = 0;
995 | text_delete.clearRetainingCapacity();
996 | text_insert.clearRetainingCapacity();
997 | },
998 | }
999 | }
1000 | if (diffs.items[diffs.items.len - 1].text.len == 0) {
1001 | diffs.items.len -= 1;
1002 | }
1003 |
1004 | // Second pass: look for single edits surrounded on both sides by
1005 | // equalities which can be shifted sideways to eliminate an equality.
1006 | // e.g: ABAC -> ABAC
1007 | var changes = false;
1008 | pointer = 1;
1009 | // Intentionally ignore the first and last element (don't need checking).
1010 | while (pointer < (diffs.items.len - 1)) {
1011 | if (diffs.items[pointer - 1].operation == .equal and
1012 | diffs.items[pointer + 1].operation == .equal)
1013 | {
1014 | // This is a single edit surrounded by equalities.
1015 | if (std.mem.endsWith(u8, diffs.items[pointer].text, diffs.items[pointer - 1].text)) {
1016 | const old_pt = diffs.items[pointer].text;
1017 | const pt = try std.mem.concat(allocator, u8, &.{
1018 | diffs.items[pointer - 1].text,
1019 | diffs.items[pointer].text[0 .. diffs.items[pointer].text.len -
1020 | diffs.items[pointer - 1].text.len],
1021 | });
1022 | allocator.free(old_pt);
1023 | diffs.items[pointer].text = pt;
1024 | const old_pt1t = diffs.items[pointer + 1].text;
1025 | const p1t = try std.mem.concat(allocator, u8, &.{
1026 | diffs.items[pointer - 1].text,
1027 | diffs.items[pointer + 1].text,
1028 | });
1029 | allocator.free(old_pt1t);
1030 | diffs.items[pointer + 1].text = p1t;
1031 | freeRangeDiffList(allocator, diffs, pointer - 1, 1);
1032 | diffs.replaceRangeAssumeCapacity(pointer - 1, 1, &.{});
1033 | changes = true;
1034 | } else if (std.mem.startsWith(u8, diffs.items[pointer].text, diffs.items[pointer + 1].text)) {
1035 | const old_ptm1 = diffs.items[pointer - 1].text;
1036 | const pm1t = try std.mem.concat(allocator, u8, &.{
1037 | diffs.items[pointer - 1].text,
1038 | diffs.items[pointer + 1].text,
1039 | });
1040 | allocator.free(old_ptm1);
1041 | diffs.items[pointer - 1].text = pm1t;
1042 | const old_pt = diffs.items[pointer].text;
1043 | const pt = try std.mem.concat(allocator, u8, &.{
1044 | diffs.items[pointer].text[diffs.items[pointer + 1].text.len..],
1045 | diffs.items[pointer + 1].text,
1046 | });
1047 | allocator.free(old_pt);
1048 | diffs.items[pointer].text = pt;
1049 | freeRangeDiffList(allocator, diffs, pointer + 1, 1);
1050 | diffs.replaceRangeAssumeCapacity(pointer + 1, 1, &.{});
1051 | changes = true;
1052 | }
1053 | }
1054 | pointer += 1;
1055 | }
1056 | // If shifts were made, the diff needs reordering and another shift sweep.
1057 | if (changes) {
1058 | try diffCleanupMerge(allocator, diffs);
1059 | }
1060 | }
1061 |
1062 | /// Reduce the number of edits by eliminating semantically trivial
1063 | /// equalities.
1064 | /// @param diffs List of Diff objects.
1065 | pub fn diffCleanupSemantic(allocator: std.mem.Allocator, diffs: *DiffList) DiffError!void {
1066 | var changes = false;
1067 | // Stack of indices where equalities are found.
1068 | var equalities: std.ArrayListUnmanaged(isize) = .empty;
1069 | defer equalities.deinit(allocator);
1070 | // Always equal to equalities[equalitiesLength-1][1]
1071 | var last_equality: ?[]const u8 = null;
1072 | var pointer: isize = 0; // Index of current position.
1073 | // Number of characters that changed prior to the equality.
1074 | var length_insertions1: usize = 0;
1075 | var length_deletions1: usize = 0;
1076 | // Number of characters that changed after the equality.
1077 | var length_insertions2: usize = 0;
1078 | var length_deletions2: usize = 0;
1079 | while (pointer < diffs.items.len) {
1080 | if (diffs.items[@intCast(pointer)].operation == .equal) { // Equality found.
1081 | try equalities.append(allocator, pointer);
1082 | length_insertions1 = length_insertions2;
1083 | length_deletions1 = length_deletions2;
1084 | length_insertions2 = 0;
1085 | length_deletions2 = 0;
1086 | last_equality = diffs.items[@intCast(pointer)].text;
1087 | } else { // an insertion or deletion
1088 | if (diffs.items[@intCast(pointer)].operation == .insert) {
1089 | length_insertions2 += diffs.items[@intCast(pointer)].text.len;
1090 | } else {
1091 | length_deletions2 += diffs.items[@intCast(pointer)].text.len;
1092 | }
1093 | // Eliminate an equality that is smaller or equal to the edits on both
1094 | // sides of it.
1095 | if (last_equality != null and
1096 | (last_equality.?.len <= @max(length_insertions1, length_deletions1)) and
1097 | (last_equality.?.len <= @max(length_insertions2, length_deletions2)))
1098 | {
1099 | // Duplicate record.
1100 | try diffs.ensureUnusedCapacity(allocator, 1);
1101 | diffs.insertAssumeCapacity(
1102 | @intCast(equalities.items[equalities.items.len - 1]),
1103 | .{
1104 | .operation = .delete,
1105 | .text = try allocator.dupe(u8, last_equality.?),
1106 | },
1107 | );
1108 | // Change second copy to insert.
1109 | diffs.items[@intCast(equalities.items[equalities.items.len - 1] + 1)].operation = .insert;
1110 | // Throw away the equality we just deleted.
1111 | _ = equalities.pop();
1112 | if (equalities.items.len > 0) {
1113 | _ = equalities.pop();
1114 | }
1115 | pointer = if (equalities.items.len > 0) equalities.items[equalities.items.len - 1] else -1;
1116 | length_insertions1 = 0; // Reset the counters.
1117 | length_deletions1 = 0;
1118 | length_insertions2 = 0;
1119 | length_deletions2 = 0;
1120 | last_equality = null;
1121 | changes = true;
1122 | }
1123 | }
1124 | pointer += 1;
1125 | }
1126 |
1127 | // Normalize the diff.
1128 | if (changes) {
1129 | try diffCleanupMerge(allocator, diffs);
1130 | }
1131 | try diffCleanupSemanticLossless(allocator, diffs);
1132 |
1133 | // Find any overlaps between deletions and insertions.
1134 | // e.g: abcxxxxxxdef
1135 | // -> abcxxxdef
1136 | // e.g: xxxabcdefxxx
1137 | // -> defxxxabc
1138 | // Only extract an overlap if it is as big as the edit ahead or behind it.
1139 | pointer = 1;
1140 | while (pointer < diffs.items.len) {
1141 | if (diffs.items[@intCast(pointer - 1)].operation == .delete and
1142 | diffs.items[@intCast(pointer)].operation == .insert)
1143 | {
1144 | const deletion = diffs.items[@intCast(pointer - 1)].text;
1145 | const insertion = diffs.items[@intCast(pointer)].text;
1146 | const overlap_length1: usize = diffCommonOverlap(deletion, insertion);
1147 | const overlap_length2: usize = diffCommonOverlap(insertion, deletion);
1148 | if (overlap_length1 >= overlap_length2) {
1149 | if (@as(f32, @floatFromInt(overlap_length1)) >= @as(f32, @floatFromInt(deletion.len)) / 2.0 or
1150 | @as(f32, @floatFromInt(overlap_length1)) >= @as(f32, @floatFromInt(insertion.len)) / 2.0)
1151 | {
1152 | // Overlap found.
1153 | // Insert an equality and trim the surrounding edits.
1154 | try diffs.ensureUnusedCapacity(allocator, 1);
1155 | diffs.insertAssumeCapacity(@intCast(pointer), .{
1156 | .operation = .equal,
1157 | .text = try allocator.dupe(u8, insertion[0..overlap_length1]),
1158 | });
1159 | diffs.items[@intCast(pointer - 1)].text =
1160 | try allocator.dupe(u8, deletion[0 .. deletion.len - overlap_length1]);
1161 | allocator.free(deletion);
1162 | diffs.items[@intCast(pointer + 1)].text =
1163 | try allocator.dupe(u8, insertion[overlap_length1..]);
1164 | allocator.free(insertion);
1165 | pointer += 1;
1166 | }
1167 | } else {
1168 | if (@as(f32, @floatFromInt(overlap_length2)) >= @as(f32, @floatFromInt(deletion.len)) / 2.0 or
1169 | @as(f32, @floatFromInt(overlap_length2)) >= @as(f32, @floatFromInt(insertion.len)) / 2.0)
1170 | {
1171 | // Reverse overlap found.
1172 | // Insert an equality and swap and trim the surrounding edits.
1173 | try diffs.ensureUnusedCapacity(allocator, 1);
1174 | diffs.insertAssumeCapacity(@intCast(pointer), .{
1175 | .operation = .equal,
1176 | .text = try allocator.dupe(u8, deletion[0..overlap_length2]),
1177 | });
1178 | const new_minus = try allocator.dupe(u8, insertion[0 .. insertion.len - overlap_length2]);
1179 | errdefer allocator.free(new_minus); // necessary due to swap
1180 | const new_plus = try allocator.dupe(u8, deletion[overlap_length2..]);
1181 | allocator.free(deletion);
1182 | allocator.free(insertion);
1183 | diffs.items[@intCast(pointer - 1)].operation = .insert;
1184 | diffs.items[@intCast(pointer - 1)].text = new_minus;
1185 | diffs.items[@intCast(pointer + 1)].operation = .delete;
1186 | diffs.items[@intCast(pointer + 1)].text = new_plus;
1187 | pointer += 1;
1188 | }
1189 | }
1190 | pointer += 1;
1191 | }
1192 | pointer += 1;
1193 | }
1194 | }
1195 |
1196 | /// Look for single edits surrounded on both sides by equalities
1197 | /// which can be shifted sideways to align the edit to a word boundary.
1198 | /// e.g: The cat came. -> The cat came.
1199 | pub fn diffCleanupSemanticLossless(
1200 | allocator: std.mem.Allocator,
1201 | diffs: *DiffList,
1202 | ) DiffError!void {
1203 | var pointer: usize = 1;
1204 | // Intentionally ignore the first and last element (don't need checking).
1205 | while (pointer < @as(isize, @intCast(diffs.items.len)) - 1) {
1206 | if (diffs.items[pointer - 1].operation == .equal and
1207 | diffs.items[pointer + 1].operation == .equal)
1208 | {
1209 | // This is a single edit surrounded by equalities.
1210 | var equality_1: std.ArrayListUnmanaged(u8) = .empty;
1211 | defer equality_1.deinit(allocator);
1212 | try equality_1.appendSlice(allocator, diffs.items[pointer - 1].text);
1213 |
1214 | var edit: std.ArrayListUnmanaged(u8) = .empty;
1215 | defer edit.deinit(allocator);
1216 | try edit.appendSlice(allocator, diffs.items[pointer].text);
1217 |
1218 | var equality_2: std.ArrayListUnmanaged(u8) = .empty;
1219 | defer equality_2.deinit(allocator);
1220 | try equality_2.appendSlice(allocator, diffs.items[pointer + 1].text);
1221 |
1222 | // First, shift the edit as far left as possible.
1223 | const common_offset = diffCommonSuffix(equality_1.items, edit.items);
1224 | if (common_offset > 0) {
1225 | // TODO: Use buffer
1226 | const common_string = try allocator.dupe(u8, edit.items[edit.items.len - common_offset ..]);
1227 | defer allocator.free(common_string);
1228 |
1229 | equality_1.items.len = equality_1.items.len - common_offset;
1230 |
1231 | // edit.items.len = edit.items.len - common_offset;
1232 | const not_common = try allocator.dupe(u8, edit.items[0 .. edit.items.len - common_offset]);
1233 | defer allocator.free(not_common);
1234 |
1235 | edit.clearRetainingCapacity();
1236 | try edit.appendSlice(allocator, common_string);
1237 | try edit.appendSlice(allocator, not_common);
1238 |
1239 | try equality_2.insertSlice(allocator, 0, common_string);
1240 | }
1241 |
1242 | // Second, step character by character right,
1243 | // looking for the best fit.
1244 | var best_equality_1: std.ArrayListUnmanaged(u8) = .empty;
1245 | defer best_equality_1.deinit(allocator);
1246 | try best_equality_1.appendSlice(allocator, equality_1.items);
1247 |
1248 | var best_edit: std.ArrayListUnmanaged(u8) = .empty;
1249 | defer best_edit.deinit(allocator);
1250 | try best_edit.appendSlice(allocator, edit.items);
1251 |
1252 | var best_equality_2: std.ArrayListUnmanaged(u8) = .empty;
1253 | defer best_equality_2.deinit(allocator);
1254 | try best_equality_2.appendSlice(allocator, equality_2.items);
1255 |
1256 | var best_score = diffCleanupSemanticScore(equality_1.items, edit.items) +
1257 | diffCleanupSemanticScore(edit.items, equality_2.items);
1258 |
1259 | while (edit.items.len != 0 and equality_2.items.len != 0 and edit.items[0] == equality_2.items[0]) {
1260 | try equality_1.append(allocator, edit.items[0]);
1261 |
1262 | _ = edit.orderedRemove(0);
1263 | try edit.append(allocator, equality_2.items[0]);
1264 |
1265 | _ = equality_2.orderedRemove(0);
1266 |
1267 | const score = diffCleanupSemanticScore(equality_1.items, edit.items) +
1268 | diffCleanupSemanticScore(edit.items, equality_2.items);
1269 | // The >= encourages trailing rather than leading whitespace on
1270 | // edits.
1271 | if (score >= best_score) {
1272 | best_score = score;
1273 |
1274 | best_equality_1.clearRetainingCapacity();
1275 | try best_equality_1.appendSlice(allocator, equality_1.items);
1276 |
1277 | best_edit.clearRetainingCapacity();
1278 | try best_edit.appendSlice(allocator, edit.items);
1279 |
1280 | best_equality_2.clearRetainingCapacity();
1281 | try best_equality_2.appendSlice(allocator, equality_2.items);
1282 | }
1283 | }
1284 |
1285 | if (!std.mem.eql(u8, diffs.items[pointer - 1].text, best_equality_1.items)) {
1286 | // We have an improvement, save it back to the diff.
1287 | if (best_equality_1.items.len != 0) {
1288 | const old_text = diffs.items[pointer - 1].text;
1289 | diffs.items[pointer - 1].text = try allocator.dupe(u8, best_equality_1.items);
1290 | allocator.free(old_text);
1291 | } else {
1292 | const old_diff = diffs.orderedRemove(pointer - 1);
1293 | allocator.free(old_diff.text);
1294 | pointer -= 1;
1295 | }
1296 | const old_text1 = diffs.items[pointer].text;
1297 | diffs.items[pointer].text = try allocator.dupe(u8, best_edit.items);
1298 | defer allocator.free(old_text1);
1299 | if (best_equality_2.items.len != 0) {
1300 | const old_text2 = diffs.items[pointer + 1].text;
1301 | diffs.items[pointer + 1].text = try allocator.dupe(u8, best_equality_2.items);
1302 | allocator.free(old_text2);
1303 | } else {
1304 | const old_diff = diffs.orderedRemove(pointer + 1);
1305 | allocator.free(old_diff.text);
1306 | pointer -= 1;
1307 | }
1308 | }
1309 | }
1310 | pointer += 1;
1311 | }
1312 | }
1313 |
1314 | /// Given two strings, compute a score representing whether the internal
1315 | /// boundary falls on logical boundaries.
1316 | /// Scores range from 6 (best) to 0 (worst).
1317 | /// @param one First string.
1318 | /// @param two Second string.
1319 | /// @return The score.
1320 | fn diffCleanupSemanticScore(one: []const u8, two: []const u8) usize {
1321 | if (one.len == 0 or two.len == 0) {
1322 | // Edges are the best.
1323 | return 6;
1324 | }
1325 |
1326 | // Each port of this function behaves slightly differently due to
1327 | // subtle differences in each language's definition of things like
1328 | // 'whitespace'. Since this function's purpose is largely cosmetic,
1329 | // the choice has been made to use each language's native features
1330 | // rather than force total conformity.
1331 | const char1 = one[one.len - 1];
1332 | const char2 = two[0];
1333 | const nonAlphaNumeric1 = !std.ascii.isAlphanumeric(char1);
1334 | const nonAlphaNumeric2 = !std.ascii.isAlphanumeric(char2);
1335 | const whitespace1 = nonAlphaNumeric1 and std.ascii.isWhitespace(char1);
1336 | const whitespace2 = nonAlphaNumeric2 and std.ascii.isWhitespace(char2);
1337 | const lineBreak1 = whitespace1 and std.ascii.isControl(char1);
1338 | const lineBreak2 = whitespace2 and std.ascii.isControl(char2);
1339 | const blankLine1 = lineBreak1 and
1340 | // BLANKLINEEND.IsMatch(one);
1341 | (std.mem.endsWith(u8, one, "\n\n") or std.mem.endsWith(u8, one, "\n\r\n"));
1342 | const blankLine2 = lineBreak2 and
1343 | // BLANKLINESTART.IsMatch(two);
1344 | (std.mem.startsWith(u8, two, "\n\n") or
1345 | std.mem.startsWith(u8, two, "\r\n\n") or
1346 | std.mem.startsWith(u8, two, "\n\r\n") or
1347 | std.mem.startsWith(u8, two, "\r\n\r\n"));
1348 |
1349 | if (blankLine1 or blankLine2) {
1350 | // Five points for blank lines.
1351 | return 5;
1352 | } else if (lineBreak1 or lineBreak2) {
1353 | // Four points for line breaks.
1354 | return 4;
1355 | } else if (nonAlphaNumeric1 and !whitespace1 and whitespace2) {
1356 | // Three points for end of sentences.
1357 | return 3;
1358 | } else if (whitespace1 or whitespace2) {
1359 | // Two points for whitespace.
1360 | return 2;
1361 | } else if (nonAlphaNumeric1 or nonAlphaNumeric2) {
1362 | // One point for non-alphanumeric.
1363 | return 1;
1364 | }
1365 | return 0;
1366 | }
1367 |
1368 | /// Reduce the number of edits by eliminating operationally trivial
1369 | /// equalities.
1370 | pub fn diffCleanupEfficiency(
1371 | dmp: DiffMatchPatch,
1372 | allocator: std.mem.Allocator,
1373 | diffs: *DiffList,
1374 | ) DiffError!void {
1375 | var changes = false;
1376 | // Stack of indices where equalities are found.
1377 | var equalities: std.ArrayListUnmanaged(usize) = .empty;
1378 | defer equalities.deinit(allocator);
1379 | // Always equal to equalities[equalitiesLength-1][1]
1380 | var last_equality: []const u8 = "";
1381 | var ipointer: isize = 0; // Index of current position.
1382 | // Is there an insertion operation before the last equality.
1383 | var pre_ins = false;
1384 | // Is there a deletion operation before the last equality.
1385 | var pre_del = false;
1386 | // Is there an insertion operation after the last equality.
1387 | var post_ins = false;
1388 | // Is there a deletion operation after the last equality.
1389 | var post_del = false;
1390 | while (ipointer < diffs.items.len) {
1391 | const pointer: usize = @intCast(ipointer);
1392 | if (diffs.items[pointer].operation == .equal) { // Equality found.
1393 | if (diffs.items[pointer].text.len < dmp.diff_edit_cost and (post_ins or post_del)) {
1394 | // Candidate found.
1395 | try equalities.append(allocator, pointer);
1396 | pre_ins = post_ins;
1397 | pre_del = post_del;
1398 | last_equality = diffs.items[pointer].text;
1399 | } else {
1400 | // Not a candidate, and can never become one.
1401 | equalities.clearRetainingCapacity();
1402 | last_equality = "";
1403 | }
1404 | post_ins = false;
1405 | post_del = false;
1406 | } else { // An insertion or deletion.
1407 | if (diffs.items[pointer].operation == .delete) {
1408 | post_del = true;
1409 | } else {
1410 | post_ins = true;
1411 | }
1412 | // Five types to be split:
1413 | // ABXYCD
1414 | // AXCD
1415 | // ABXC
1416 | // AXCD
1417 | // ABXC
1418 | if ((last_equality.len != 0) and
1419 | ((pre_ins and pre_del and post_ins and post_del) or
1420 | ((last_equality.len < dmp.diff_edit_cost / 2) and
1421 | (@as(u8, @intFromBool(pre_ins)) + @as(u8, @intFromBool(pre_del)) + @as(u8, @intFromBool(post_ins)) + @as(u8, @intFromBool(post_del)) == 3))))
1422 | {
1423 | // Duplicate record.
1424 | try diffs.ensureUnusedCapacity(allocator, 1);
1425 | diffs.insertAssumeCapacity(
1426 | equalities.items[equalities.items.len - 1],
1427 | .{
1428 | .operation = .delete,
1429 | .text = try allocator.dupe(u8, last_equality),
1430 | },
1431 | );
1432 | // Change second copy to insert.
1433 | diffs.items[equalities.items[equalities.items.len - 1] + 1].operation = .insert;
1434 | _ = equalities.pop(); // Throw away the equality we just deleted.
1435 | last_equality = "";
1436 | if (pre_ins and pre_del) {
1437 | // No changes made which could affect previous entry, keep going.
1438 | post_ins = true;
1439 | post_del = true;
1440 | equalities.clearRetainingCapacity();
1441 | } else {
1442 | if (equalities.items.len > 0) {
1443 | _ = equalities.pop();
1444 | }
1445 |
1446 | ipointer = if (equalities.items.len > 0) @intCast(equalities.items[equalities.items.len - 1]) else -1;
1447 | post_ins = false;
1448 | post_del = false;
1449 | }
1450 | changes = true;
1451 | }
1452 | }
1453 | ipointer += 1;
1454 | }
1455 |
1456 | if (changes) {
1457 | try diffCleanupMerge(allocator, diffs);
1458 | }
1459 | }
1460 |
1461 | /// Determine if the suffix of one string is the prefix of another.
1462 | /// @param text1 First string.
1463 | /// @param text2 Second string.
1464 | /// @return The number of characters common to the end of the first
1465 | /// string and the start of the second string.
1466 | fn diffCommonOverlap(text1_in: []const u8, text2_in: []const u8) usize {
1467 | var text1 = text1_in;
1468 | var text2 = text2_in;
1469 |
1470 | // Cache the text lengths to prevent multiple calls.
1471 | const text1_length = text1.len;
1472 | const text2_length = text2.len;
1473 | // Eliminate the null case.
1474 | if (text1_length == 0 or text2_length == 0) {
1475 | return 0;
1476 | }
1477 | // Truncate the longer string.
1478 | if (text1_length > text2_length) {
1479 | text1 = text1[text1_length - text2_length ..];
1480 | } else if (text1_length < text2_length) {
1481 | text2 = text2[0..text1_length];
1482 | }
1483 | const text_length = @min(text1_length, text2_length);
1484 | // Quick check for the worst case.
1485 | if (std.mem.eql(u8, text1, text2)) {
1486 | return text_length;
1487 | }
1488 |
1489 | // Start by looking for a single character match
1490 | // and increase length until no match is found.
1491 | // Performance analysis: https://neil.fraser.name/news/2010/11/04/
1492 | var best: usize = 0;
1493 | var length: usize = 1;
1494 | while (true) {
1495 | const pattern = text1[text_length - length ..];
1496 | const found = std.mem.indexOf(u8, text2, pattern) orelse
1497 | return best;
1498 |
1499 | length += found;
1500 |
1501 | if (found == 0 or std.mem.eql(u8, text1[text_length - length ..], text2[0..length])) {
1502 | best = length;
1503 | length += 1;
1504 | }
1505 | }
1506 | }
1507 |
1508 | // DONE [✅]: Allocate all text in diffs to
1509 | // not cause segfault while freeing
1510 |
1511 | test diffCommonPrefix {
1512 | // Detect any common suffix.
1513 | try testing.expectEqual(@as(usize, 0), diffCommonPrefix("abc", "xyz")); // Null case
1514 | try testing.expectEqual(@as(usize, 4), diffCommonPrefix("1234abcdef", "1234xyz")); // Non-null case
1515 | try testing.expectEqual(@as(usize, 4), diffCommonPrefix("1234", "1234xyz")); // Whole case
1516 | }
1517 |
1518 | test diffCommonSuffix {
1519 | // Detect any common suffix.
1520 | try testing.expectEqual(@as(usize, 0), diffCommonSuffix("abc", "xyz")); // Null case
1521 | try testing.expectEqual(@as(usize, 4), diffCommonSuffix("abcdef1234", "xyz1234")); // Non-null case
1522 | try testing.expectEqual(@as(usize, 4), diffCommonSuffix("1234", "xyz1234")); // Whole case
1523 | }
1524 |
1525 | test diffCommonOverlap {
1526 | // Detect any suffix/prefix overlap.
1527 | try testing.expectEqual(@as(usize, 0), diffCommonOverlap("", "abcd")); // Null case
1528 | try testing.expectEqual(@as(usize, 3), diffCommonOverlap("abc", "abcd")); // Whole case
1529 | try testing.expectEqual(@as(usize, 0), diffCommonOverlap("123456", "abcd")); // No overlap
1530 | try testing.expectEqual(@as(usize, 3), diffCommonOverlap("123456xxx", "xxxabcd")); // Overlap
1531 |
1532 | // Some overly clever languages (C#) may treat ligatures as equal to their
1533 | // component letters. E.g. U+FB01 == 'fi'
1534 | try testing.expectEqual(@as(usize, 0), diffCommonOverlap("fi", "\u{fb01}")); // Unicode
1535 | }
1536 |
1537 | fn testDiffHalfMatch(
1538 | allocator: std.mem.Allocator,
1539 | params: struct {
1540 | dmp: DiffMatchPatch,
1541 | before: []const u8,
1542 | after: []const u8,
1543 | expected: ?HalfMatchResult,
1544 | },
1545 | ) !void {
1546 | const maybe_result = try params.dmp.diffHalfMatch(allocator, params.before, params.after);
1547 | defer if (maybe_result) |result| result.deinit(allocator);
1548 | try testing.expectEqualDeep(params.expected, maybe_result);
1549 | }
1550 |
1551 | test diffHalfMatch {
1552 | const one_timeout: DiffMatchPatch = .{ .diff_timeout = 1 };
1553 |
1554 | // No match #1
1555 | try checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{
1556 | .dmp = one_timeout,
1557 | .before = "1234567890",
1558 | .after = "abcdef",
1559 | .expected = null,
1560 | }});
1561 |
1562 | // No match #2
1563 | try checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{
1564 | .dmp = one_timeout,
1565 | .before = "12345",
1566 | .after = "23",
1567 | .expected = null,
1568 | }});
1569 |
1570 | // Single matches
1571 | try checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{
1572 | .dmp = one_timeout,
1573 | .before = "1234567890",
1574 | .after = "a345678z",
1575 | .expected = .{
1576 | .prefix_before = "12",
1577 | .suffix_before = "90",
1578 | .prefix_after = "a",
1579 | .suffix_after = "z",
1580 | .common_middle = "345678",
1581 | },
1582 | }});
1583 |
1584 | // Single Match #2
1585 | try checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{
1586 | .dmp = one_timeout,
1587 | .before = "a345678z",
1588 | .after = "1234567890",
1589 | .expected = .{
1590 | .prefix_before = "a",
1591 | .suffix_before = "z",
1592 | .prefix_after = "12",
1593 | .suffix_after = "90",
1594 | .common_middle = "345678",
1595 | },
1596 | }});
1597 |
1598 | // Single Match #3
1599 | try checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{
1600 | .dmp = one_timeout,
1601 | .before = "abc56789z",
1602 | .after = "1234567890",
1603 | .expected = .{
1604 | .prefix_before = "abc",
1605 | .suffix_before = "z",
1606 | .prefix_after = "1234",
1607 | .suffix_after = "0",
1608 | .common_middle = "56789",
1609 | },
1610 | }});
1611 |
1612 | // Single Match #4
1613 | try checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{
1614 | .dmp = one_timeout,
1615 | .before = "a23456xyz",
1616 | .after = "1234567890",
1617 | .expected = .{
1618 | .prefix_before = "a",
1619 | .suffix_before = "xyz",
1620 | .prefix_after = "1",
1621 | .suffix_after = "7890",
1622 | .common_middle = "23456",
1623 | },
1624 | }});
1625 |
1626 | // Multiple matches #1
1627 | try checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{
1628 | .dmp = one_timeout,
1629 | .before = "121231234123451234123121",
1630 | .after = "a1234123451234z",
1631 | .expected = .{
1632 | .prefix_before = "12123",
1633 | .suffix_before = "123121",
1634 | .prefix_after = "a",
1635 | .suffix_after = "z",
1636 | .common_middle = "1234123451234",
1637 | },
1638 | }});
1639 |
1640 | // Multiple Matches #2
1641 | try checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{
1642 | .dmp = one_timeout,
1643 | .before = "x-=-=-=-=-=-=-=-=-=-=-=-=",
1644 | .after = "xx-=-=-=-=-=-=-=",
1645 | .expected = .{
1646 | .prefix_before = "",
1647 | .suffix_before = "-=-=-=-=-=",
1648 | .prefix_after = "x",
1649 | .suffix_after = "",
1650 | .common_middle = "x-=-=-=-=-=-=-=",
1651 | },
1652 | }});
1653 |
1654 | // Multiple Matches #3
1655 | try checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{
1656 | .dmp = one_timeout,
1657 | .before = "-=-=-=-=-=-=-=-=-=-=-=-=y",
1658 | .after = "-=-=-=-=-=-=-=yy",
1659 | .expected = .{
1660 | .prefix_before = "-=-=-=-=-=",
1661 | .suffix_before = "",
1662 | .prefix_after = "",
1663 | .suffix_after = "y",
1664 | .common_middle = "-=-=-=-=-=-=-=y",
1665 | },
1666 | }});
1667 |
1668 | // Other cases
1669 |
1670 | // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy
1671 | // Non-optimal halfmatch
1672 | try checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{
1673 | .dmp = one_timeout,
1674 | .before = "qHilloHelloHew",
1675 | .after = "xHelloHeHulloy",
1676 | .expected = .{
1677 | .prefix_before = "qHillo",
1678 | .suffix_before = "w",
1679 | .prefix_after = "x",
1680 | .suffix_after = "Hulloy",
1681 | .common_middle = "HelloHe",
1682 | },
1683 | }});
1684 |
1685 | // Non-optimal halfmatch
1686 | try checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{
1687 | .dmp = .{ .diff_timeout = 0 },
1688 | .before = "qHilloHelloHew",
1689 | .after = "xHelloHeHulloy",
1690 | .expected = null,
1691 | }});
1692 | }
1693 |
1694 | test diffLinesToChars {
1695 | const allocator = testing.allocator;
1696 | // Convert lines down to characters.
1697 | var tmp_array_list: std.ArrayListUnmanaged([]const u8) = .empty;
1698 | defer tmp_array_list.deinit(allocator);
1699 | try tmp_array_list.append(allocator, "");
1700 | try tmp_array_list.append(allocator, "alpha\n");
1701 | try tmp_array_list.append(allocator, "beta\n");
1702 |
1703 | var result = try diffLinesToChars(allocator, "alpha\nbeta\nalpha\n", "beta\nalpha\nbeta\n");
1704 | try testing.expectEqualStrings("\u{0001}\u{0002}\u{0001}", result.chars_1); // Shared lines #1
1705 | try testing.expectEqualStrings("\u{0002}\u{0001}\u{0002}", result.chars_2); // Shared lines #2
1706 | try testing.expectEqualDeep(tmp_array_list.items, result.line_array.items); // Shared lines #3
1707 |
1708 | tmp_array_list.clearRetainingCapacity();
1709 | try tmp_array_list.append(allocator, "");
1710 | try tmp_array_list.append(allocator, "alpha\r\n");
1711 | try tmp_array_list.append(allocator, "beta\r\n");
1712 | try tmp_array_list.append(allocator, "\r\n");
1713 | result.deinit(allocator);
1714 |
1715 | result = try diffLinesToChars(allocator, "", "alpha\r\nbeta\r\n\r\n\r\n");
1716 | try testing.expectEqualStrings("", result.chars_1); // Empty string and blank lines #1
1717 | try testing.expectEqualStrings("\u{0001}\u{0002}\u{0003}\u{0003}", result.chars_2); // Empty string and blank lines #2
1718 | try testing.expectEqualDeep(tmp_array_list.items, result.line_array.items); // Empty string and blank lines #3
1719 |
1720 | tmp_array_list.clearRetainingCapacity();
1721 | try tmp_array_list.append(allocator, "");
1722 | try tmp_array_list.append(allocator, "a");
1723 | try tmp_array_list.append(allocator, "b");
1724 | result.deinit(allocator);
1725 |
1726 | result = try diffLinesToChars(allocator, "a", "b");
1727 | try testing.expectEqualStrings("\u{0001}", result.chars_1); // No linebreaks #1.
1728 | try testing.expectEqualStrings("\u{0002}", result.chars_2); // No linebreaks #2.
1729 | try testing.expectEqualDeep(tmp_array_list.items, result.line_array.items); // No linebreaks #3.
1730 | result.deinit(allocator);
1731 |
1732 | // TODO: More than 256 to reveal any 8-bit limitations but this requires
1733 | // some unicode logic that I don't want to deal with
1734 | //
1735 | // Casting to Unicode is straightforward and should sort correctly, I'm
1736 | // more concerned about the weird behavior when the 'char' is equal to a
1737 | // newline. Uncomment the EqualSlices below to see what I mean.
1738 | // I think there's some cleanup logic in the actual linediff that should
1739 | // take care of the problem, but I don't like it.
1740 |
1741 | const n: u8 = 255;
1742 | tmp_array_list.clearRetainingCapacity();
1743 |
1744 | var line_list: std.ArrayListUnmanaged(u8) = .empty;
1745 | defer line_list.deinit(allocator);
1746 | var char_list: std.ArrayListUnmanaged(u8) = .empty;
1747 | defer char_list.deinit(allocator);
1748 |
1749 | var i: u8 = 1;
1750 | while (i < n) : (i += 1) {
1751 | try tmp_array_list.append(allocator, &.{ i, '\n' });
1752 | try line_list.appendSlice(allocator, &.{ i, '\n' });
1753 | try char_list.append(allocator, i);
1754 | }
1755 | try testing.expectEqual(@as(usize, n - 1), tmp_array_list.items.len); // Test initialization fail #1
1756 | try testing.expectEqual(@as(usize, n - 1), char_list.items.len); // Test initialization fail #2
1757 | try tmp_array_list.insert(allocator, 0, "");
1758 | result = try diffLinesToChars(allocator, line_list.items, "");
1759 | defer result.deinit(allocator);
1760 | // TODO: This isn't equal, should it be?
1761 | // try testing.expectEqualSlices(u8, char_list.items, result.chars_1);
1762 | try testing.expectEqualStrings("", result.chars_2);
1763 | // TODO this is wrong because of the max_value I think?
1764 | // try testing.expectEqualDeep(tmp_array_list.items, result.line_array.items);
1765 | }
1766 |
1767 | fn testDiffCharsToLines(
1768 | allocator: std.mem.Allocator,
1769 | params: struct {
1770 | diffs: []const Diff,
1771 | line_array: []const []const u8,
1772 | expected: []const Diff,
1773 | },
1774 | ) !void {
1775 | var char_diffs: DiffList = try .initCapacity(allocator, params.diffs.len);
1776 | defer deinitDiffList(allocator, &char_diffs);
1777 |
1778 | for (params.diffs) |item| {
1779 | char_diffs.appendAssumeCapacity(.{ .operation = item.operation, .text = try allocator.dupe(u8, item.text) });
1780 | }
1781 |
1782 | var diffs = try diffCharsToLines(allocator, &char_diffs, params.line_array);
1783 | defer deinitDiffList(allocator, &diffs);
1784 |
1785 | try testing.expectEqualDeep(params.expected, diffs.items);
1786 | }
1787 |
1788 | test diffCharsToLines {
1789 | // Convert chars up to lines.
1790 | var diff_list: DiffList = .empty;
1791 | defer deinitDiffList(testing.allocator, &diff_list);
1792 | try diff_list.ensureTotalCapacity(testing.allocator, 2);
1793 | diff_list.appendSliceAssumeCapacity(&.{
1794 | .{ .operation = .equal, .text = try testing.allocator.dupe(u8, "\u{0001}\u{0002}\u{0001}") },
1795 | .{ .operation = .insert, .text = try testing.allocator.dupe(u8, "\u{0002}\u{0001}\u{0002}") },
1796 | });
1797 | try checkAllAllocationFailures(testing.allocator, testDiffCharsToLines, .{.{
1798 | .diffs = diff_list.items,
1799 | .line_array = &[_][]const u8{
1800 | "",
1801 | "alpha\n",
1802 | "beta\n",
1803 | },
1804 | .expected = &.{
1805 | .{ .operation = .equal, .text = "alpha\nbeta\nalpha\n" },
1806 | .{ .operation = .insert, .text = "beta\nalpha\nbeta\n" },
1807 | },
1808 | }});
1809 |
1810 | // TODO: Implement exhaustive tests
1811 | }
1812 |
1813 | fn testDiffCleanupMerge(allocator: std.mem.Allocator, params: struct {
1814 | input: []const Diff,
1815 | expected: []const Diff,
1816 | }) !void {
1817 | var diffs: DiffList = try .initCapacity(allocator, params.input.len);
1818 | defer deinitDiffList(allocator, &diffs);
1819 |
1820 | for (params.input) |item| {
1821 | diffs.appendAssumeCapacity(.{ .operation = item.operation, .text = try allocator.dupe(u8, item.text) });
1822 | }
1823 |
1824 | try diffCleanupMerge(allocator, &diffs);
1825 |
1826 | try testing.expectEqualDeep(params.expected, diffs.items);
1827 | }
1828 |
1829 | test diffCleanupMerge {
1830 | // Cleanup a messy diff.
1831 |
1832 | // No change case
1833 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
1834 | .input = &.{
1835 | .{ .operation = .equal, .text = "a" },
1836 | .{ .operation = .delete, .text = "b" },
1837 | .{ .operation = .insert, .text = "c" },
1838 | },
1839 | .expected = &.{
1840 | .{ .operation = .equal, .text = "a" },
1841 | .{ .operation = .delete, .text = "b" },
1842 | .{ .operation = .insert, .text = "c" },
1843 | },
1844 | }});
1845 |
1846 | // Merge equalities
1847 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
1848 | .input = &.{
1849 | .{ .operation = .equal, .text = "a" },
1850 | .{ .operation = .equal, .text = "b" },
1851 | .{ .operation = .equal, .text = "c" },
1852 | },
1853 | .expected = &.{
1854 | .{ .operation = .equal, .text = "abc" },
1855 | },
1856 | }});
1857 |
1858 | // Merge deletions
1859 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
1860 | .input = &.{
1861 | .{ .operation = .delete, .text = "a" },
1862 | .{ .operation = .delete, .text = "b" },
1863 | .{ .operation = .delete, .text = "c" },
1864 | },
1865 | .expected = &.{
1866 | .{ .operation = .delete, .text = "abc" },
1867 | },
1868 | }});
1869 |
1870 | // Merge insertions
1871 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
1872 | .input = &.{
1873 | .{ .operation = .insert, .text = "a" },
1874 | .{ .operation = .insert, .text = "b" },
1875 | .{ .operation = .insert, .text = "c" },
1876 | },
1877 | .expected = &.{
1878 | .{ .operation = .insert, .text = "abc" },
1879 | },
1880 | }});
1881 |
1882 | // Merge interweave
1883 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
1884 | .input = &.{
1885 | .{ .operation = .delete, .text = "a" },
1886 | .{ .operation = .insert, .text = "b" },
1887 | .{ .operation = .delete, .text = "c" },
1888 | .{ .operation = .insert, .text = "d" },
1889 | .{ .operation = .equal, .text = "e" },
1890 | .{ .operation = .equal, .text = "f" },
1891 | },
1892 | .expected = &.{
1893 | .{ .operation = .delete, .text = "ac" },
1894 | .{ .operation = .insert, .text = "bd" },
1895 | .{ .operation = .equal, .text = "ef" },
1896 | },
1897 | }});
1898 |
1899 | // Prefix and suffix detection
1900 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
1901 | .input = &.{
1902 | .{ .operation = .delete, .text = "a" },
1903 | .{ .operation = .insert, .text = "abc" },
1904 | .{ .operation = .delete, .text = "dc" },
1905 | },
1906 | .expected = &.{
1907 | .{ .operation = .equal, .text = "a" },
1908 | .{ .operation = .delete, .text = "d" },
1909 | .{ .operation = .insert, .text = "b" },
1910 | .{ .operation = .equal, .text = "c" },
1911 | },
1912 | }});
1913 |
1914 | // Prefix and suffix detection with equalities
1915 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
1916 | .input = &.{
1917 | .{ .operation = .equal, .text = "x" },
1918 | .{ .operation = .delete, .text = "a" },
1919 | .{ .operation = .insert, .text = "abc" },
1920 | .{ .operation = .delete, .text = "dc" },
1921 | .{ .operation = .equal, .text = "y" },
1922 | },
1923 | .expected = &.{
1924 | .{ .operation = .equal, .text = "xa" },
1925 | .{ .operation = .delete, .text = "d" },
1926 | .{ .operation = .insert, .text = "b" },
1927 | .{ .operation = .equal, .text = "cy" },
1928 | },
1929 | }});
1930 |
1931 | // Slide edit left
1932 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
1933 | .input = &.{
1934 | .{ .operation = .equal, .text = "a" },
1935 | .{ .operation = .insert, .text = "ba" },
1936 | .{ .operation = .equal, .text = "c" },
1937 | },
1938 | .expected = &.{
1939 | .{ .operation = .insert, .text = "ab" },
1940 | .{ .operation = .equal, .text = "ac" },
1941 | },
1942 | }});
1943 |
1944 | // Slide edit right
1945 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
1946 | .input = &.{
1947 | .{ .operation = .equal, .text = "c" },
1948 | .{ .operation = .insert, .text = "ab" },
1949 | .{ .operation = .equal, .text = "a" },
1950 | },
1951 | .expected = &.{
1952 | .{ .operation = .equal, .text = "ca" },
1953 | .{ .operation = .insert, .text = "ba" },
1954 | },
1955 | }});
1956 |
1957 | // Slide edit left recursive
1958 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
1959 | .input = &.{
1960 | .{ .operation = .equal, .text = "a" },
1961 | .{ .operation = .delete, .text = "b" },
1962 | .{ .operation = .equal, .text = "c" },
1963 | .{ .operation = .delete, .text = "ac" },
1964 | .{ .operation = .equal, .text = "x" },
1965 | },
1966 | .expected = &.{
1967 | .{ .operation = .delete, .text = "abc" },
1968 | .{ .operation = .equal, .text = "acx" },
1969 | },
1970 | }});
1971 |
1972 | // Slide edit right recursive
1973 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
1974 | .input = &.{
1975 | .{ .operation = .equal, .text = "x" },
1976 | .{ .operation = .delete, .text = "ca" },
1977 | .{ .operation = .equal, .text = "c" },
1978 | .{ .operation = .delete, .text = "b" },
1979 | .{ .operation = .equal, .text = "a" },
1980 | },
1981 | .expected = &.{
1982 | .{ .operation = .equal, .text = "xca" },
1983 | .{ .operation = .delete, .text = "cba" },
1984 | },
1985 | }});
1986 |
1987 | // Empty merge
1988 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
1989 | .input = &.{
1990 | .{ .operation = .delete, .text = "b" },
1991 | .{ .operation = .insert, .text = "ab" },
1992 | .{ .operation = .equal, .text = "c" },
1993 | },
1994 | .expected = &.{
1995 | .{ .operation = .insert, .text = "a" },
1996 | .{ .operation = .equal, .text = "bc" },
1997 | },
1998 | }});
1999 |
2000 | // Empty equality
2001 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
2002 | .input = &.{
2003 | .{ .operation = .equal, .text = "" },
2004 | .{ .operation = .insert, .text = "a" },
2005 | .{ .operation = .equal, .text = "b" },
2006 | },
2007 | .expected = &.{
2008 | .{ .operation = .insert, .text = "a" },
2009 | .{ .operation = .equal, .text = "b" },
2010 | },
2011 | }});
2012 | }
2013 |
2014 | fn testDiffCleanupSemanticLossless(
2015 | allocator: std.mem.Allocator,
2016 | params: struct {
2017 | input: []const Diff,
2018 | expected: []const Diff,
2019 | },
2020 | ) !void {
2021 | var diffs: DiffList = try .initCapacity(allocator, params.input.len);
2022 | defer deinitDiffList(allocator, &diffs);
2023 |
2024 | for (params.input) |item| {
2025 | diffs.appendAssumeCapacity(.{ .operation = item.operation, .text = try allocator.dupe(u8, item.text) });
2026 | }
2027 |
2028 | try diffCleanupSemanticLossless(allocator, &diffs);
2029 |
2030 | try testing.expectEqualDeep(params.expected, diffs.items);
2031 | }
2032 |
2033 | fn sliceToDiffList(allocator: Allocator, diff_slice: []const Diff) !DiffList {
2034 | var diff_list: DiffList = .empty;
2035 | errdefer deinitDiffList(allocator, &diff_list);
2036 | try diff_list.ensureTotalCapacity(allocator, diff_slice.len);
2037 | for (diff_slice) |d| {
2038 | diff_list.appendAssumeCapacity(.{
2039 | .operation = d.operation,
2040 | .text = try allocator.dupe(u8, d.text),
2041 | });
2042 | }
2043 | return diff_list;
2044 | }
2045 |
2046 | test diffCleanupSemanticLossless {
2047 | // Null case
2048 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{
2049 | .input = &[_]Diff{},
2050 | .expected = &[_]Diff{},
2051 | }});
2052 |
2053 | //defer deinitDiffList(allocator, &diffs);
2054 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{
2055 | .input = &.{
2056 | .{ .operation = .equal, .text = "AAA\r\n\r\nBBB" },
2057 | .{ .operation = .insert, .text = "\r\nDDD\r\n\r\nBBB" },
2058 | .{ .operation = .equal, .text = "\r\nEEE" },
2059 | },
2060 | .expected = &.{
2061 | .{ .operation = .equal, .text = "AAA\r\n\r\n" },
2062 | .{ .operation = .insert, .text = "BBB\r\nDDD\r\n\r\n" },
2063 | .{ .operation = .equal, .text = "BBB\r\nEEE" },
2064 | },
2065 | }});
2066 |
2067 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{
2068 | .input = &.{
2069 | .{ .operation = .equal, .text = "AAA\r\nBBB" },
2070 | .{ .operation = .insert, .text = " DDD\r\nBBB" },
2071 | .{ .operation = .equal, .text = " EEE" },
2072 | },
2073 | .expected = &.{
2074 | .{ .operation = .equal, .text = "AAA\r\n" },
2075 | .{ .operation = .insert, .text = "BBB DDD\r\n" },
2076 | .{ .operation = .equal, .text = "BBB EEE" },
2077 | },
2078 | }});
2079 |
2080 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{
2081 | .input = &.{
2082 | .{ .operation = .equal, .text = "The c" },
2083 | .{ .operation = .insert, .text = "ow and the c" },
2084 | .{ .operation = .equal, .text = "at." },
2085 | },
2086 | .expected = &.{
2087 | .{ .operation = .equal, .text = "The " },
2088 | .{ .operation = .insert, .text = "cow and the " },
2089 | .{ .operation = .equal, .text = "cat." },
2090 | },
2091 | }});
2092 |
2093 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{
2094 | .input = &.{
2095 | .{ .operation = .equal, .text = "The-c" },
2096 | .{ .operation = .insert, .text = "ow-and-the-c" },
2097 | .{ .operation = .equal, .text = "at." },
2098 | },
2099 | .expected = &.{
2100 | .{ .operation = .equal, .text = "The-" },
2101 | .{ .operation = .insert, .text = "cow-and-the-" },
2102 | .{ .operation = .equal, .text = "cat." },
2103 | },
2104 | }});
2105 |
2106 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{
2107 | .input = &.{
2108 | .{ .operation = .equal, .text = "a" },
2109 | .{ .operation = .delete, .text = "a" },
2110 | .{ .operation = .equal, .text = "ax" },
2111 | },
2112 | .expected = &.{
2113 | .{ .operation = .delete, .text = "a" },
2114 | .{ .operation = .equal, .text = "aax" },
2115 | },
2116 | }});
2117 |
2118 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{
2119 | .input = &.{
2120 | .{ .operation = .equal, .text = "xa" },
2121 | .{ .operation = .delete, .text = "a" },
2122 | .{ .operation = .equal, .text = "a" },
2123 | },
2124 | .expected = &.{
2125 | .{ .operation = .equal, .text = "xaa" },
2126 | .{ .operation = .delete, .text = "a" },
2127 | },
2128 | }});
2129 |
2130 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{
2131 | .input = &.{
2132 | .{ .operation = .equal, .text = "The xxx. The " },
2133 | .{ .operation = .insert, .text = "zzz. The " },
2134 | .{ .operation = .equal, .text = "yyy." },
2135 | },
2136 | .expected = &.{
2137 | .{ .operation = .equal, .text = "The xxx." },
2138 | .{ .operation = .insert, .text = " The zzz." },
2139 | .{ .operation = .equal, .text = " The yyy." },
2140 | },
2141 | }});
2142 | }
2143 |
2144 | fn rebuildtexts(allocator: std.mem.Allocator, diffs: DiffList) ![2][]const u8 {
2145 | var text: [2]std.ArrayListUnmanaged(u8) = .{ .empty, .empty };
2146 | errdefer {
2147 | text[0].deinit(allocator);
2148 | text[1].deinit(allocator);
2149 | }
2150 |
2151 | for (diffs.items) |myDiff| {
2152 | if (myDiff.operation != .insert) {
2153 | try text[0].appendSlice(allocator, myDiff.text);
2154 | }
2155 | if (myDiff.operation != .delete) {
2156 | try text[1].appendSlice(allocator, myDiff.text);
2157 | }
2158 | }
2159 |
2160 | const first = try text[0].toOwnedSlice(allocator);
2161 | errdefer allocator.free(first);
2162 |
2163 | const second = try text[1].toOwnedSlice(allocator);
2164 | errdefer allocator.free(second);
2165 |
2166 | return .{ first, second };
2167 | }
2168 |
2169 | fn testRebuildTexts(allocator: Allocator, diffs: DiffList, params: struct {
2170 | before: []const u8,
2171 | after: []const u8,
2172 | }) !void {
2173 | const texts = try rebuildtexts(allocator, diffs);
2174 | defer {
2175 | allocator.free(texts[0]);
2176 | allocator.free(texts[1]);
2177 | }
2178 | try testing.expectEqualStrings(params.before, texts[0]);
2179 | try testing.expectEqualStrings(params.after, texts[1]);
2180 | }
2181 |
2182 | test rebuildtexts {
2183 | {
2184 | var diffs = try sliceToDiffList(testing.allocator, &.{
2185 | .{ .operation = .insert, .text = "abcabc" },
2186 | .{ .operation = .equal, .text = "defdef" },
2187 | .{ .operation = .delete, .text = "ghighi" },
2188 | });
2189 | defer deinitDiffList(testing.allocator, &diffs);
2190 | try checkAllAllocationFailures(testing.allocator, testRebuildTexts, .{
2191 | diffs,
2192 | .{
2193 | .before = "defdefghighi",
2194 | .after = "abcabcdefdef",
2195 | },
2196 | });
2197 | }
2198 | {
2199 | var diffs = try sliceToDiffList(testing.allocator, &.{
2200 | .{ .operation = .insert, .text = "xxx" },
2201 | .{ .operation = .delete, .text = "yyy" },
2202 | });
2203 | defer deinitDiffList(testing.allocator, &diffs);
2204 | try checkAllAllocationFailures(testing.allocator, testRebuildTexts, .{
2205 | diffs,
2206 | .{
2207 | .before = "yyy",
2208 | .after = "xxx",
2209 | },
2210 | });
2211 | }
2212 | {
2213 | var diffs = try sliceToDiffList(testing.allocator, &.{
2214 | .{ .operation = .equal, .text = "xyz" },
2215 | .{ .operation = .equal, .text = "pdq" },
2216 | });
2217 | defer deinitDiffList(testing.allocator, &diffs);
2218 | try checkAllAllocationFailures(testing.allocator, testRebuildTexts, .{
2219 | diffs,
2220 | .{
2221 | .before = "xyzpdq",
2222 | .after = "xyzpdq",
2223 | },
2224 | });
2225 | }
2226 | }
2227 |
2228 | fn testDiffBisect(
2229 | allocator: std.mem.Allocator,
2230 | params: struct {
2231 | dmp: DiffMatchPatch,
2232 | before: []const u8,
2233 | after: []const u8,
2234 | deadline: u64,
2235 | expected: []const Diff,
2236 | },
2237 | ) !void {
2238 | var diffs = try params.dmp.diffBisect(allocator, params.before, params.after, params.deadline);
2239 | defer deinitDiffList(allocator, &diffs);
2240 | try testing.expectEqualDeep(params.expected, diffs.items);
2241 | }
2242 |
2243 | test diffBisect {
2244 | const this: DiffMatchPatch = .{ .diff_timeout = 0 };
2245 |
2246 | const a = "cat";
2247 | const b = "map";
2248 |
2249 | // Normal
2250 | try checkAllAllocationFailures(testing.allocator, testDiffBisect, .{.{
2251 | .dmp = this,
2252 | .before = a,
2253 | .after = b,
2254 | .deadline = std.math.maxInt(u64), // Travis TODO not sure if maxInt(u64) is correct for DateTime.MaxValue
2255 | .expected = &.{
2256 | .{ .operation = .delete, .text = "c" },
2257 | .{ .operation = .insert, .text = "m" },
2258 | .{ .operation = .equal, .text = "a" },
2259 | .{ .operation = .delete, .text = "t" },
2260 | .{ .operation = .insert, .text = "p" },
2261 | },
2262 | }});
2263 |
2264 | // Timeout
2265 | try checkAllAllocationFailures(testing.allocator, testDiffBisect, .{.{
2266 | .dmp = this,
2267 | .before = a,
2268 | .after = b,
2269 | .deadline = 0, // Travis TODO not sure if 0 is correct for DateTime.MinValue
2270 | .expected = &.{
2271 | .{ .operation = .delete, .text = "cat" },
2272 | .{ .operation = .insert, .text = "map" },
2273 | },
2274 | }});
2275 | }
2276 |
2277 | fn diffHalfMatchLeak(allocator: Allocator) !void {
2278 | const dmp: DiffMatchPatch = .default;
2279 | const text1 = "The quick brown fox jumps over the lazy dog.";
2280 | const text2 = "That quick brown fox jumped over a lazy dog.";
2281 | var diffs = try dmp.diff(allocator, text2, text1, true);
2282 | deinitDiffList(allocator, &diffs);
2283 | }
2284 |
2285 | test "diffHalfMatch leak regression test" {
2286 | try checkAllAllocationFailures(testing.allocator, diffHalfMatchLeak, .{});
2287 | }
2288 |
2289 | fn testDiff(
2290 | allocator: std.mem.Allocator,
2291 | params: struct {
2292 | dmp: DiffMatchPatch,
2293 | before: []const u8,
2294 | after: []const u8,
2295 | check_lines: bool,
2296 | expected: []const Diff,
2297 | },
2298 | ) !void {
2299 | var diffs = try params.dmp.diff(allocator, params.before, params.after, params.check_lines);
2300 | defer deinitDiffList(allocator, &diffs);
2301 | try testing.expectEqualDeep(params.expected, diffs.items);
2302 | }
2303 |
2304 | test diff {
2305 | const this: DiffMatchPatch = .{ .diff_timeout = 0 };
2306 |
2307 | // Null case.
2308 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{
2309 | .dmp = this,
2310 | .before = "",
2311 | .after = "",
2312 | .check_lines = false,
2313 | .expected = &[_]Diff{},
2314 | }});
2315 |
2316 | // Equality.
2317 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{
2318 | .dmp = this,
2319 | .before = "abc",
2320 | .after = "abc",
2321 | .check_lines = false,
2322 | .expected = &.{
2323 | .{ .operation = .equal, .text = "abc" },
2324 | },
2325 | }});
2326 |
2327 | // Simple insertion.
2328 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{
2329 | .dmp = this,
2330 | .before = "abc",
2331 | .after = "ab123c",
2332 | .check_lines = false,
2333 | .expected = &.{
2334 | .{ .operation = .equal, .text = "ab" },
2335 | .{ .operation = .insert, .text = "123" },
2336 | .{ .operation = .equal, .text = "c" },
2337 | },
2338 | }});
2339 |
2340 | // Simple deletion.
2341 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{
2342 | .dmp = this,
2343 | .before = "a123bc",
2344 | .after = "abc",
2345 | .check_lines = false,
2346 | .expected = &.{
2347 | .{ .operation = .equal, .text = "a" },
2348 | .{ .operation = .delete, .text = "123" },
2349 | .{ .operation = .equal, .text = "bc" },
2350 | },
2351 | }});
2352 |
2353 | // Two insertions.
2354 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{
2355 | .dmp = this,
2356 | .before = "abc",
2357 | .after = "a123b456c",
2358 | .check_lines = false,
2359 | .expected = &.{
2360 | .{ .operation = .equal, .text = "a" },
2361 | .{ .operation = .insert, .text = "123" },
2362 | .{ .operation = .equal, .text = "b" },
2363 | .{ .operation = .insert, .text = "456" },
2364 | .{ .operation = .equal, .text = "c" },
2365 | },
2366 | }});
2367 |
2368 | // Two deletions.
2369 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{
2370 | .dmp = this,
2371 | .before = "a123b456c",
2372 | .after = "abc",
2373 | .check_lines = false,
2374 | .expected = &.{
2375 | .{ .operation = .equal, .text = "a" },
2376 | .{ .operation = .delete, .text = "123" },
2377 | .{ .operation = .equal, .text = "b" },
2378 | .{ .operation = .delete, .text = "456" },
2379 | .{ .operation = .equal, .text = "c" },
2380 | },
2381 | }});
2382 |
2383 | // Simple case #1
2384 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{
2385 | .dmp = this,
2386 | .before = "a",
2387 | .after = "b",
2388 | .check_lines = false,
2389 | .expected = &.{
2390 | .{ .operation = .delete, .text = "a" },
2391 | .{ .operation = .insert, .text = "b" },
2392 | },
2393 | }});
2394 |
2395 | // Simple case #2
2396 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{
2397 | .dmp = this,
2398 | .before = "Apples are a fruit.",
2399 | .after = "Bananas are also fruit.",
2400 | .check_lines = false,
2401 | .expected = &.{
2402 | .{ .operation = .delete, .text = "Apple" },
2403 | .{ .operation = .insert, .text = "Banana" },
2404 | .{ .operation = .equal, .text = "s are a" },
2405 | .{ .operation = .insert, .text = "lso" },
2406 | .{ .operation = .equal, .text = " fruit." },
2407 | },
2408 | }});
2409 |
2410 | // Simple case #3
2411 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{
2412 | .dmp = this,
2413 | .before = "ax\t",
2414 | .after = "\u{0680}x\x00",
2415 | .check_lines = false,
2416 | .expected = &.{
2417 | .{ .operation = .delete, .text = "a" },
2418 | .{ .operation = .insert, .text = "\u{0680}" },
2419 | .{ .operation = .equal, .text = "x" },
2420 | .{ .operation = .delete, .text = "\t" },
2421 | .{ .operation = .insert, .text = "\x00" },
2422 | },
2423 | }});
2424 |
2425 | // Overlap #1
2426 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{
2427 | .dmp = this,
2428 | .before = "1ayb2",
2429 | .after = "abxab",
2430 | .check_lines = false,
2431 | .expected = &.{
2432 | .{ .operation = .delete, .text = "1" },
2433 | .{ .operation = .equal, .text = "a" },
2434 | .{ .operation = .delete, .text = "y" },
2435 | .{ .operation = .equal, .text = "b" },
2436 | .{ .operation = .delete, .text = "2" },
2437 | .{ .operation = .insert, .text = "xab" },
2438 | },
2439 | }});
2440 |
2441 | // Overlap #2
2442 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{
2443 | .dmp = this,
2444 | .before = "abcy",
2445 | .after = "xaxcxabc",
2446 | .check_lines = false,
2447 | .expected = &.{
2448 | .{ .operation = .insert, .text = "xaxcx" },
2449 | .{ .operation = .equal, .text = "abc" },
2450 | .{ .operation = .delete, .text = "y" },
2451 | },
2452 | }});
2453 |
2454 | // Overlap #3
2455 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{
2456 | .dmp = this,
2457 | .before = "ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg",
2458 | .after = "a-bcd-efghijklmnopqrs",
2459 | .check_lines = false,
2460 | .expected = &.{
2461 | .{ .operation = .delete, .text = "ABCD" },
2462 | .{ .operation = .equal, .text = "a" },
2463 | .{ .operation = .delete, .text = "=" },
2464 | .{ .operation = .insert, .text = "-" },
2465 | .{ .operation = .equal, .text = "bcd" },
2466 | .{ .operation = .delete, .text = "=" },
2467 | .{ .operation = .insert, .text = "-" },
2468 | .{ .operation = .equal, .text = "efghijklmnopqrs" },
2469 | .{ .operation = .delete, .text = "EFGHIJKLMNOefg" },
2470 | },
2471 | }});
2472 |
2473 | // Large equality
2474 | try checkAllAllocationFailures(testing.allocator, testDiff, .{.{
2475 | .dmp = this,
2476 | .before = "a [[Pennsylvania]] and [[New",
2477 | .after = " and [[Pennsylvania]]",
2478 | .check_lines = false,
2479 | .expected = &.{
2480 | .{ .operation = .insert, .text = " " },
2481 | .{ .operation = .equal, .text = "a" },
2482 | .{ .operation = .insert, .text = "nd" },
2483 | .{ .operation = .equal, .text = " [[Pennsylvania]]" },
2484 | .{ .operation = .delete, .text = " and [[New" },
2485 | },
2486 | }});
2487 |
2488 | const allocator = testing.allocator;
2489 | // TODO these tests should be checked for allocation failure
2490 |
2491 | // Increase the text lengths by 1024 times to ensure a timeout.
2492 | {
2493 | const a = "`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n" ** 1024;
2494 | const b = "I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n" ** 1024;
2495 |
2496 | const with_timout: DiffMatchPatch = .{
2497 | .diff_timeout = 100, // 100ms
2498 | };
2499 |
2500 | const start_time = std.time.milliTimestamp();
2501 | {
2502 | var time_diff = try with_timout.diff(allocator, a, b, false);
2503 | defer deinitDiffList(allocator, &time_diff);
2504 | }
2505 | const end_time = std.time.milliTimestamp();
2506 |
2507 | // Test that we took at least the timeout period.
2508 | try testing.expect(with_timout.diff_timeout <= end_time - start_time); // diff: Timeout min.
2509 | // Test that we didn't take forever (be forgiving).
2510 | // Theoretically this test could fail very occasionally if the
2511 | // OS task swaps or locks up for a second at the wrong moment.
2512 | try testing.expect((with_timout.diff_timeout) * 10000 * 2 > end_time - start_time); // diff: Timeout max.
2513 | }
2514 |
2515 | {
2516 | // Test the linemode speedup.
2517 | // Must be long to pass the 100 char cutoff.
2518 | const a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n";
2519 | const b = "abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n";
2520 |
2521 | var diff_checked = try this.diff(allocator, a, b, true);
2522 | defer deinitDiffList(allocator, &diff_checked);
2523 |
2524 | var diff_unchecked = try this.diff(allocator, a, b, false);
2525 | defer deinitDiffList(allocator, &diff_unchecked);
2526 |
2527 | try testing.expectEqualDeep(diff_checked.items, diff_unchecked.items); // diff: Simple line-mode.
2528 | }
2529 |
2530 | {
2531 | const a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890";
2532 | const b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij";
2533 |
2534 | var diff_checked = try this.diff(allocator, a, b, true);
2535 | defer deinitDiffList(allocator, &diff_checked);
2536 |
2537 | var diff_unchecked = try this.diff(allocator, a, b, false);
2538 | defer deinitDiffList(allocator, &diff_unchecked);
2539 |
2540 | try testing.expectEqualDeep(diff_checked.items, diff_unchecked.items); // diff: Single line-mode.
2541 | }
2542 |
2543 | {
2544 | // diff: Overlap line-mode.
2545 | const a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n";
2546 | const b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n";
2547 |
2548 | var diffs_linemode = try this.diff(allocator, a, b, true);
2549 | defer deinitDiffList(allocator, &diffs_linemode);
2550 |
2551 | const texts_linemode = try rebuildtexts(allocator, diffs_linemode);
2552 | defer {
2553 | allocator.free(texts_linemode[0]);
2554 | allocator.free(texts_linemode[1]);
2555 | }
2556 |
2557 | var diffs_textmode = try this.diff(allocator, a, b, false);
2558 | defer deinitDiffList(allocator, &diffs_textmode);
2559 |
2560 | const texts_textmode = try rebuildtexts(allocator, diffs_textmode);
2561 | defer {
2562 | allocator.free(texts_textmode[0]);
2563 | allocator.free(texts_textmode[1]);
2564 | }
2565 |
2566 | try testing.expectEqualStrings(texts_textmode[0], texts_linemode[0]);
2567 | try testing.expectEqualStrings(texts_textmode[1], texts_linemode[1]);
2568 | }
2569 | }
2570 |
2571 | fn testDiffLineMode(
2572 | allocator: Allocator,
2573 | dmp: *DiffMatchPatch,
2574 | before: []const u8,
2575 | after: []const u8,
2576 | ) !void {
2577 | dmp.diff_check_lines_over = 20;
2578 | var diff_checked = try dmp.diff(allocator, before, after, true);
2579 | defer deinitDiffList(allocator, &diff_checked);
2580 |
2581 | var diff_unchecked = try dmp.diff(allocator, before, after, false);
2582 | defer deinitDiffList(allocator, &diff_unchecked);
2583 |
2584 | try testing.expectEqualDeep(diff_checked.items, diff_unchecked.items); // diff: Simple line-mode.
2585 | dmp.diff_check_lines_over = 100;
2586 | }
2587 |
2588 | test "diffLineMode" {
2589 | var dmp: DiffMatchPatch = .{ .diff_timeout = 0 };
2590 | try checkAllAllocationFailures(
2591 | testing.allocator,
2592 | testDiffLineMode,
2593 |
2594 | .{
2595 | &dmp,
2596 | "1234567890\n1234567890\n1234567890\n",
2597 | "abcdefghij\nabcdefghij\nabcdefghij\n",
2598 | },
2599 | );
2600 | }
2601 |
2602 | fn testDiffCleanupSemantic(
2603 | allocator: std.mem.Allocator,
2604 | params: struct {
2605 | input: []const Diff,
2606 | expected: []const Diff,
2607 | },
2608 | ) !void {
2609 | var diffs: DiffList = try .initCapacity(allocator, params.input.len);
2610 | defer deinitDiffList(allocator, &diffs);
2611 |
2612 | for (params.input) |item| {
2613 | diffs.appendAssumeCapacity(.{ .operation = item.operation, .text = try allocator.dupe(u8, item.text) });
2614 | }
2615 |
2616 | try diffCleanupSemantic(allocator, &diffs);
2617 |
2618 | try testing.expectEqualDeep(params.expected, diffs.items);
2619 | }
2620 |
2621 | test diffCleanupSemantic {
2622 | // Null case.
2623 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{
2624 | .input = &[_]Diff{},
2625 | .expected = &[_]Diff{},
2626 | }});
2627 |
2628 | // No elimination #1
2629 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{
2630 | .input = &.{
2631 | .{ .operation = .delete, .text = "ab" },
2632 | .{ .operation = .insert, .text = "cd" },
2633 | .{ .operation = .equal, .text = "12" },
2634 | .{ .operation = .delete, .text = "e" },
2635 | },
2636 | .expected = &.{
2637 | .{ .operation = .delete, .text = "ab" },
2638 | .{ .operation = .insert, .text = "cd" },
2639 | .{ .operation = .equal, .text = "12" },
2640 | .{ .operation = .delete, .text = "e" },
2641 | },
2642 | }});
2643 |
2644 | // No elimination #2
2645 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{
2646 | .input = &.{
2647 | .{ .operation = .delete, .text = "abc" },
2648 | .{ .operation = .insert, .text = "ABC" },
2649 | .{ .operation = .equal, .text = "1234" },
2650 | .{ .operation = .delete, .text = "wxyz" },
2651 | },
2652 | .expected = &.{
2653 | .{ .operation = .delete, .text = "abc" },
2654 | .{ .operation = .insert, .text = "ABC" },
2655 | .{ .operation = .equal, .text = "1234" },
2656 | .{ .operation = .delete, .text = "wxyz" },
2657 | },
2658 | }});
2659 |
2660 | // Simple elimination
2661 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{
2662 | .input = &.{
2663 | .{ .operation = .delete, .text = "a" },
2664 | .{ .operation = .equal, .text = "b" },
2665 | .{ .operation = .delete, .text = "c" },
2666 | },
2667 | .expected = &.{
2668 | .{ .operation = .delete, .text = "abc" },
2669 | .{ .operation = .insert, .text = "b" },
2670 | },
2671 | }});
2672 |
2673 | // Backpass elimination
2674 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{
2675 | .input = &.{
2676 | .{ .operation = .delete, .text = "ab" },
2677 | .{ .operation = .equal, .text = "cd" },
2678 | .{ .operation = .delete, .text = "e" },
2679 | .{ .operation = .equal, .text = "f" },
2680 | .{ .operation = .insert, .text = "g" },
2681 | },
2682 | .expected = &.{
2683 | .{ .operation = .delete, .text = "abcdef" },
2684 | .{ .operation = .insert, .text = "cdfg" },
2685 | },
2686 | }});
2687 |
2688 | // Multiple elimination
2689 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{
2690 | .input = &.{
2691 | .{ .operation = .insert, .text = "1" },
2692 | .{ .operation = .equal, .text = "A" },
2693 | .{ .operation = .delete, .text = "B" },
2694 | .{ .operation = .insert, .text = "2" },
2695 | .{ .operation = .equal, .text = "_" },
2696 | .{ .operation = .insert, .text = "1" },
2697 | .{ .operation = .equal, .text = "A" },
2698 | .{ .operation = .delete, .text = "B" },
2699 | .{ .operation = .insert, .text = "2" },
2700 | },
2701 | .expected = &.{
2702 | .{ .operation = .delete, .text = "AB_AB" },
2703 | .{ .operation = .insert, .text = "1A2_1A2" },
2704 | },
2705 | }});
2706 |
2707 | // Word boundaries
2708 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{
2709 | .input = &.{
2710 | .{ .operation = .equal, .text = "The c" },
2711 | .{ .operation = .delete, .text = "ow and the c" },
2712 | .{ .operation = .equal, .text = "at." },
2713 | },
2714 | .expected = &.{
2715 | .{ .operation = .equal, .text = "The " },
2716 | .{ .operation = .delete, .text = "cow and the " },
2717 | .{ .operation = .equal, .text = "cat." },
2718 | },
2719 | }});
2720 |
2721 | // No overlap elimination
2722 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{
2723 | .input = &.{
2724 | .{ .operation = .delete, .text = "abcxx" },
2725 | .{ .operation = .insert, .text = "xxdef" },
2726 | },
2727 | .expected = &.{
2728 | .{ .operation = .delete, .text = "abcxx" },
2729 | .{ .operation = .insert, .text = "xxdef" },
2730 | },
2731 | }});
2732 |
2733 | // Overlap elimination
2734 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{
2735 | .input = &.{
2736 | .{ .operation = .delete, .text = "abcxxx" },
2737 | .{ .operation = .insert, .text = "xxxdef" },
2738 | },
2739 | .expected = &.{
2740 | .{ .operation = .delete, .text = "abc" },
2741 | .{ .operation = .equal, .text = "xxx" },
2742 | .{ .operation = .insert, .text = "def" },
2743 | },
2744 | }});
2745 |
2746 | // Reverse overlap elimination
2747 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{
2748 | .input = &.{
2749 | .{ .operation = .delete, .text = "xxxabc" },
2750 | .{ .operation = .insert, .text = "defxxx" },
2751 | },
2752 | .expected = &.{
2753 | .{ .operation = .insert, .text = "def" },
2754 | .{ .operation = .equal, .text = "xxx" },
2755 | .{ .operation = .delete, .text = "abc" },
2756 | },
2757 | }});
2758 |
2759 | // Two overlap eliminations
2760 | try checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{
2761 | .input = &.{
2762 | .{ .operation = .delete, .text = "abcd1212" },
2763 | .{ .operation = .insert, .text = "1212efghi" },
2764 | .{ .operation = .equal, .text = "----" },
2765 | .{ .operation = .delete, .text = "A3" },
2766 | .{ .operation = .insert, .text = "3BC" },
2767 | },
2768 | .expected = &.{
2769 | .{ .operation = .delete, .text = "abcd" },
2770 | .{ .operation = .equal, .text = "1212" },
2771 | .{ .operation = .insert, .text = "efghi" },
2772 | .{ .operation = .equal, .text = "----" },
2773 | .{ .operation = .delete, .text = "A" },
2774 | .{ .operation = .equal, .text = "3" },
2775 | .{ .operation = .insert, .text = "BC" },
2776 | },
2777 | }});
2778 | }
2779 |
2780 | fn testDiffCleanupEfficiency(
2781 | allocator: Allocator,
2782 | dmp: DiffMatchPatch,
2783 | params: struct {
2784 | input: []const Diff,
2785 | expected: []const Diff,
2786 | },
2787 | ) !void {
2788 | var diffs: DiffList = try .initCapacity(allocator, params.input.len);
2789 | defer deinitDiffList(allocator, &diffs);
2790 | for (params.input) |item| {
2791 | diffs.appendAssumeCapacity(.{ .operation = item.operation, .text = try allocator.dupe(u8, item.text) });
2792 | }
2793 | try dmp.diffCleanupEfficiency(allocator, &diffs);
2794 |
2795 | try testing.expectEqualDeep(params.expected, diffs.items);
2796 | }
2797 |
2798 | test "diffCleanupEfficiency" {
2799 | const allocator = testing.allocator;
2800 | var dmp: DiffMatchPatch = .default;
2801 | dmp.diff_edit_cost = 4;
2802 | { // Null case.
2803 | var diffs: DiffList = .empty;
2804 | try dmp.diffCleanupEfficiency(allocator, &diffs);
2805 | try testing.expectEqualDeep(DiffList.empty, diffs);
2806 | }
2807 | { // No elimination.
2808 | const dslice: []const Diff = &.{
2809 | .{ .operation = .delete, .text = "ab" },
2810 | .{ .operation = .insert, .text = "12" },
2811 | .{ .operation = .equal, .text = "wxyz" },
2812 | .{ .operation = .delete, .text = "cd" },
2813 | .{ .operation = .insert, .text = "34" },
2814 | };
2815 | try checkAllAllocationFailures(
2816 | testing.allocator,
2817 | testDiffCleanupEfficiency,
2818 | .{
2819 | dmp,
2820 | .{ .input = dslice, .expected = dslice },
2821 | },
2822 | );
2823 | }
2824 | { // Four-edit elimination.
2825 | const dslice: []const Diff = &.{
2826 | .{ .operation = .delete, .text = "ab" },
2827 | .{ .operation = .insert, .text = "12" },
2828 | .{ .operation = .equal, .text = "xyz" },
2829 | .{ .operation = .delete, .text = "cd" },
2830 | .{ .operation = .insert, .text = "34" },
2831 | };
2832 | const d_after: []const Diff = &.{
2833 | .{ .operation = .delete, .text = "abxyzcd" },
2834 | .{ .operation = .insert, .text = "12xyz34" },
2835 | };
2836 | try checkAllAllocationFailures(
2837 | testing.allocator,
2838 | testDiffCleanupEfficiency,
2839 | .{
2840 | dmp,
2841 | .{ .input = dslice, .expected = d_after },
2842 | },
2843 | );
2844 | }
2845 | { // Three-edit elimination.
2846 | const dslice: []const Diff = &.{
2847 | .{ .operation = .insert, .text = "12" },
2848 | .{ .operation = .equal, .text = "x" },
2849 | .{ .operation = .delete, .text = "cd" },
2850 | .{ .operation = .insert, .text = "34" },
2851 | };
2852 | const d_after: []const Diff = &.{
2853 | .{ .operation = .delete, .text = "xcd" },
2854 | .{ .operation = .insert, .text = "12x34" },
2855 | };
2856 | try checkAllAllocationFailures(
2857 | testing.allocator,
2858 | testDiffCleanupEfficiency,
2859 | .{
2860 | dmp,
2861 | .{ .input = dslice, .expected = d_after },
2862 | },
2863 | );
2864 | }
2865 | { // Backpass elimination.
2866 | const dslice: []const Diff = &.{
2867 | .{ .operation = .delete, .text = "ab" },
2868 | .{ .operation = .insert, .text = "12" },
2869 | .{ .operation = .equal, .text = "xy" },
2870 | .{ .operation = .insert, .text = "34" },
2871 | .{ .operation = .equal, .text = "z" },
2872 | .{ .operation = .delete, .text = "cd" },
2873 | .{ .operation = .insert, .text = "56" },
2874 | };
2875 | const d_after: []const Diff = &.{
2876 | .{ .operation = .delete, .text = "abxyzcd" },
2877 | .{ .operation = .insert, .text = "12xy34z56" },
2878 | };
2879 | try checkAllAllocationFailures(
2880 | testing.allocator,
2881 | testDiffCleanupEfficiency,
2882 | .{
2883 | dmp,
2884 | .{ .input = dslice, .expected = d_after },
2885 | },
2886 | );
2887 | }
2888 | { // High cost elimination.
2889 | dmp.diff_edit_cost = 5;
2890 | const dslice: []const Diff = &.{
2891 | .{ .operation = .delete, .text = "ab" },
2892 | .{ .operation = .insert, .text = "12" },
2893 | .{ .operation = .equal, .text = "wxyz" },
2894 | .{ .operation = .delete, .text = "cd" },
2895 | .{ .operation = .insert, .text = "34" },
2896 | };
2897 | const d_after: []const Diff = &.{
2898 | .{ .operation = .delete, .text = "abwxyzcd" },
2899 | .{ .operation = .insert, .text = "12wxyz34" },
2900 | };
2901 | try checkAllAllocationFailures(
2902 | testing.allocator,
2903 | testDiffCleanupEfficiency,
2904 | .{
2905 | dmp,
2906 | .{ .input = dslice, .expected = d_after },
2907 | },
2908 | );
2909 | dmp.diff_edit_cost = 4;
2910 | }
2911 | }
2912 |
2913 | /// https://github.com/ziglang/zig/pull/23042/files
2914 | fn checkAllAllocationFailures(
2915 | backing_allocator: std.mem.Allocator,
2916 | comptime test_fn: anytype,
2917 | extra_args: CheckAllAllocationFailuresTuples(@TypeOf(test_fn)).ExtraArgsTuple,
2918 | ) !void {
2919 | return std.testing.checkAllAllocationFailures(backing_allocator, test_fn, extra_args);
2920 | }
2921 |
2922 | fn CheckAllAllocationFailuresTuples(comptime TestFn: type) struct {
2923 | /// `std.meta.ArgsTuple(TestFn)`
2924 | ArgsTuple: type,
2925 | /// `std.meta.ArgsTuple(TestFn)` without the first argument
2926 | ExtraArgsTuple: type,
2927 | } {
2928 | switch (@typeInfo(@typeInfo(TestFn).@"fn".return_type.?)) {
2929 | .error_union => |info| {
2930 | if (info.payload != void) {
2931 | @compileError("Return type must be !void");
2932 | }
2933 | },
2934 | else => @compileError("Return type must be !void"),
2935 | }
2936 |
2937 | const ArgsTuple = std.meta.ArgsTuple(TestFn);
2938 |
2939 | const fn_args_fields = std.meta.fields(ArgsTuple);
2940 | if (fn_args_fields.len == 0 or fn_args_fields[0].type != std.mem.Allocator) {
2941 | @compileError("The provided function must have an " ++ @typeName(std.mem.Allocator) ++ " as its first argument");
2942 | }
2943 |
2944 | // remove the first tuple field (`std.mem.Allocator`)
2945 | var extra_args_tuple_info = @typeInfo(ArgsTuple);
2946 | var extra_args_fields = extra_args_tuple_info.@"struct".fields[1..].*;
2947 | for (&extra_args_fields, 0..) |*extra_field, i| {
2948 | extra_field.name = fn_args_fields[i].name;
2949 | }
2950 | extra_args_tuple_info.@"struct".fields = &extra_args_fields;
2951 | const ExtraArgsTuple = @Type(extra_args_tuple_info);
2952 |
2953 | return .{
2954 | .ArgsTuple = ArgsTuple,
2955 | .ExtraArgsTuple = ExtraArgsTuple,
2956 | };
2957 | }
2958 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 diffz authors
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # diffz
2 |
3 | An implementation of Google's diff-match-patch.
4 |
5 | Currently implemented:
6 |
7 | - [x] Diff
8 | - [ ] Match
9 | - [ ] Patch
10 |
11 | ## Installation
12 |
13 | > [!NOTE]
14 | > The minimum supported Zig version is `0.14.0`.
15 |
16 | ```bash
17 | # Initialize a `zig build` project if you haven't already
18 | zig init
19 | # Add the `diffz` package to your `build.zig.zon`
20 | zig fetch --save git+https://github.com/ziglibs/diffz.git
21 | ```
22 |
23 | You can then import `diffz` in your `build.zig` with:
24 |
25 | ```zig
26 | const diffz = b.dependency("diffz", .{});
27 | const exe = b.addExecutable(...);
28 | exe.root_module.addImport("diffz", diffz.module("diffz"));
29 | ```
30 |
31 | ## License
32 |
33 | This library is based off of https://github.com/google/diff-match-patch, which is licensed under the [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0). This library itself is licensed under the MIT License, see `LICENSE`.
34 |
--------------------------------------------------------------------------------
/build.zig:
--------------------------------------------------------------------------------
1 | const std = @import("std");
2 | const builtin = @import("builtin");
3 |
4 | const minimum_zig_version = std.SemanticVersion.parse("0.14.0") catch unreachable;
5 |
6 | pub fn build(b: *std.Build) void {
7 | if (comptime (builtin.zig_version.order(minimum_zig_version) == .lt)) {
8 | @compileError(std.fmt.comptimePrint(
9 | \\Your Zig version does not meet the minimum build requirement:
10 | \\ required Zig version: {[minimum_zig_version]}
11 | \\ actual Zig version: {[current_version]}
12 | \\
13 | , .{
14 | .current_version = builtin.zig_version,
15 | .minimum_zig_version = minimum_zig_version,
16 | }));
17 | }
18 |
19 | const target = b.standardTargetOptions(.{});
20 | const optimize = b.standardOptimizeOption(.{});
21 |
22 | const diffz_module = b.addModule("diffz", .{
23 | .root_source_file = b.path("DiffMatchPatch.zig"),
24 | .target = target,
25 | .optimize = optimize,
26 | });
27 |
28 | const tests = b.addTest(.{ .root_module = diffz_module });
29 | const run_tests = b.addRunArtifact(tests);
30 |
31 | const test_step = b.step("test", "Run all the tests");
32 | test_step.dependOn(&run_tests.step);
33 |
34 | const kcov_bin = b.findProgram(&.{"kcov"}, &.{}) catch "kcov";
35 |
36 | const run_kcov = b.addSystemCommand(&.{
37 | kcov_bin,
38 | "--clean",
39 | "--exclude-line=unreachable,expect(false)",
40 | });
41 | run_kcov.addPrefixedDirectoryArg("--include-pattern=", b.path("."));
42 | const coverage_output = run_kcov.addOutputDirectoryArg(".");
43 | run_kcov.addArtifactArg(tests);
44 | run_kcov.enableTestRunnerMode();
45 |
46 | const install_coverage = b.addInstallDirectory(.{
47 | .source_dir = coverage_output,
48 | .install_dir = .{ .custom = "coverage" },
49 | .install_subdir = "",
50 | });
51 |
52 | const coverage_step = b.step("coverage", "Generate coverage (kcov must be installed)");
53 | coverage_step.dependOn(&install_coverage.step);
54 | }
55 |
--------------------------------------------------------------------------------
/build.zig.zon:
--------------------------------------------------------------------------------
1 | .{
2 | .name = .diffz,
3 | .version = "0.0.1",
4 | .minimum_zig_version = "0.14.0",
5 | .paths = .{
6 | "DiffMatchPatch.zig",
7 | "LICENSE",
8 | "README.md",
9 | "build.zig.zon",
10 | "build.zig",
11 | },
12 | .fingerprint = 0x23937d8821656b1b, // Changing this has security and trust implications.
13 | }
14 |
--------------------------------------------------------------------------------