├── .gitignore ├── APACHE-LICENSE-2.0 ├── LICENSE ├── Makefile ├── README.md ├── include └── dmp.h ├── src ├── dmp.c ├── dmp_pool.c └── dmp_pool.h └── test ├── dmp_test.c ├── dmp_test.h └── dmp_test_internals.c /.gitignore: -------------------------------------------------------------------------------- 1 | /trash-*.exe 2 | /config.mak 3 | *.o 4 | *.a 5 | *.exe 6 | *.gcda 7 | *.gcno 8 | *.gcov 9 | .lock-wafbuild 10 | .waf* 11 | build/ 12 | *.sln 13 | *.suo 14 | *.vc*proj* 15 | *.sdf 16 | *.opensdf 17 | *.aps 18 | CMake* 19 | *.cmake 20 | .DS_Store 21 | *~ 22 | -------------------------------------------------------------------------------- /APACHE-LICENSE-2.0: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 Russell Belfer 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a 4 | copy of this software and associated documentation files (the "Software"), 5 | to deal in the Software without restriction, including without limitation 6 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | and/or sell copies of the Software, and to permit persons to whom the 8 | Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included 11 | in all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | DEALINGS IN THE SOFTWARE. 20 | 21 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Silly Makefile to build libdmp.a and test_dmp executable 2 | 3 | PLATFORM=$(shell uname -s) 4 | 5 | MINGW=0 6 | ifneq (,$(findstring MINGW32,$(PLATFORM))) 7 | MINGW=1 8 | endif 9 | ifneq (,$(findstring mingw,$(CROSS_COMPILE))) 10 | MINGW=1 11 | endif 12 | 13 | rm=rm -f 14 | AR=ar cq 15 | RANLIB=ranlib 16 | 17 | LIBNAME=libdmp.a 18 | 19 | ifeq ($(MINGW),1) 20 | CC=gcc 21 | else 22 | CC=cc 23 | endif 24 | 25 | INCLUDES=-Isrc -Iinclude 26 | 27 | DEFINES= $(INCLUDES) -DSTDC -D_GNU_SOURCE $(EXTRA_DEFINES) 28 | CFLAGS= -g $(DEFINES) -Wall -Wextra -Wno-missing-field-initializers -std=c99 -O2 $(EXTRA_CFLAGS) 29 | 30 | SRCS = $(wildcard src/*.c) 31 | 32 | ifeq ($(MINGW),1) 33 | DEFINES += -DWIN32 -D_WIN32_WINNT=0x0501 -D__USE_MINGW_ANSI_STDIO=1 34 | else 35 | CFLAGS += -fPIC 36 | endif 37 | 38 | OBJS = $(patsubst %.c,%.o,$(SRCS)) 39 | 40 | %.c.o: 41 | $(CC) $(CFLAGS) -c $*.c 42 | 43 | default: $(LIBNAME) 44 | 45 | $(LIBNAME): $(OBJS) 46 | $(rm) $@ 47 | $(AR) $@ $(OBJS) 48 | $(RANLIB) $@ 49 | 50 | TESTSRCS = $(wildcard test/*.c) 51 | 52 | test: dmp_test 53 | 54 | dmp_test: $(LIBNAME) include/dmp.h $(TESTSRCS) 55 | $(CC) -o dmp_test $(CFLAGS) $(TESTSRCS) -L. -ldmp 56 | 57 | clean: 58 | $(rm) -rf $(OBJS) $(LIBNAME) dmp_test *.dSYM 59 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Port of google-diff-match-patch to C 2 | ==================================== 3 | 4 | This is a C language port of Neil Fraser's google-diff-match-patch code. 5 | 6 | Right now, this is focused on the `diff` part of `diff-match-patch`. It 7 | contains APIs to compare two blocks on text and return a structure 8 | containing the list of differences (as shared, inserted, and deleted 9 | sections). 10 | 11 | Getting Started 12 | --------------- 13 | 14 | This library is in an early state, so be prepared to have to bend it to 15 | your will a bit. Public APIs are declared in `include/dmp.h`. A basic 16 | Makefile is included. 17 | 18 | ```sh 19 | $ make 20 | cc -g -Isrc -Iinclude -DSTDC -D_GNU_SOURCE -Wall -Wextra -Wno-missing-field-initializers -std=c99 -O2 -fPIC -c -o src/dmp.o src/dmp.c 21 | cc -g -Isrc -Iinclude -DSTDC -D_GNU_SOURCE -Wall -Wextra -Wno-missing-field-initializers -std=c99 -O2 -fPIC -c -o src/dmp_pool.o src/dmp_pool.c 22 | rm -f libdmp.a 23 | ar cq libdmp.a src/dmp.o src/dmp_pool.o 24 | ranlib libdmp.a 25 | 26 | $ make test 27 | cc -o dmp_test -g -Isrc -Iinclude -DSTDC -D_GNU_SOURCE -Wall -Wextra -Wno-missing-field-initializers -std=c99 -O2 -fPIC test/dmp_test.c test/dmp_test_internals.c -L. -ldmp 28 | 29 | $ ./dmp_test 30 | ...done 31 | ..done 32 | ................... 33 | > "ax\x09" 34 | -"a", +"\xda\x80", ="x", -"\x09", +"\x00" 35 | < "\xda\x80x\x00" 36 | . 37 | > "1ayb2" 38 | -"1", ="a", -"y", ="b", -"2", +"xab" 39 | < "abxab" 40 | . 41 | > "abcy" 42 | +"xaxcx", ="abc", -"y" 43 | < "xaxcxabc" 44 | .done 45 | ``` 46 | 47 | Example API Usage 48 | ----------------- 49 | 50 | All functions and structures used in this library are prefixed with 51 | `dmp_`. To generate a diff, you use a function to create a `dmp_diff` 52 | object which you can then access and manipulate via other functions. 53 | 54 | Here is a silly little example that counts the total length of the "equal" 55 | runs from the diff. 56 | 57 | ```c 58 | { 59 | dmp_diff *diff; 60 | int eq = 0; 61 | 62 | if (dmp_diff_from_strs(&diff, NULL, "string 1", "string 2") != 0) 63 | handle_error(); 64 | 65 | dmp_diff_foreach(diff, how_equal, &eq); 66 | printf("Strings had %d equal bytes\n", eq); 67 | 68 | dmp_diff_free(diff); 69 | } 70 | 71 | int how_equal( 72 | void *ref, dmp_operation_t op, const void *data, uint32_t len) 73 | { 74 | int *sum = ref; 75 | if (op == DMP_DIFF_EQUAL) 76 | (*sum) += len; 77 | return 0; 78 | } 79 | ``` 80 | 81 | This shows the basic pattern of diff API usage: 82 | 83 | 1. Generate a diff 84 | 2. Process the diff in some way 85 | 3. Free the diff 86 | 87 | Diff API 88 | -------- 89 | 90 | All public functions in the library that could fail return an `int` and 91 | will return 0 for success or -1 for failure. Functions which cannot fail 92 | will either have a void return or will return a specific other data type 93 | if they are simple data lookups. 94 | 95 | Here are the main functions for generating and accessing diffs: 96 | 97 | ```c 98 | /** 99 | * Public: Calculate the diff between two texts. 100 | * 101 | * This will allocate and populate a new `dmp_diff` object with records 102 | * describing how to transform `text1` into `text2`. This returns a diff 103 | * with byte-level differences between the two texts. You can use one of 104 | * the diff transformation functions below to modify the diffs to word or 105 | * line level diffs, or to align diffs to UTF-8 boundaries or the like. 106 | * 107 | * diff - Pointer to a `dmp_diff` pointer that will be allocated. You must 108 | * call `dmp_diff_free()` on this pointer when done. 109 | * options - `dmp_options` structure to control diff, or NULL to use defaults. 110 | * text1 - The FROM text for the left side of the diff. 111 | * len1 - The number of bytes of data in `text1`. 112 | * text2 - The TO text for the right side of the diff. 113 | * len2 - The number of bytes of data in `text2`. 114 | * 115 | * Returns 0 if the diff was successfully generated, -1 on failure. The 116 | * only current failure scenario would be a failed allocation. Otherwise, 117 | * some sort of diff should be generated.. 118 | */ 119 | extern int dmp_diff_new( 120 | dmp_diff **diff, 121 | const dmp_options *options, 122 | const char *text1, 123 | uint32_t len1, 124 | const char *text2, 125 | uint32_t len2); 126 | 127 | /** 128 | * Public: Generate diff from NUL-terminated strings. 129 | * 130 | * This is a convenience function when you know that you are diffing 131 | * NUL-terminated strings. It simply calls `strlen()` and passes the 132 | * results along to `dmp_diff_new` (plus it deals correctly with NULL 133 | * strings, passing them in a zero-length texts). 134 | * 135 | * diff - Pointer to a `dmp_diff` pointer that will be allocated. You must 136 | * call `dmp_diff_free()` on this pointer when done. 137 | * options - `dmp_options` structure to control diff, or NULL to use defaults. 138 | * text1 - The FROM string for the left side of the diff. Must be a regular 139 | * NUL-terminated C string. 140 | * text2 - The TO string for the right side of the diff. Must be a regular 141 | * NUL-terminated C string. 142 | * 143 | * Returns 0 if the diff was successfully generated, -1 on failure. The 144 | * only current failure scenario would be a failed allocation. Otherwise, 145 | * some sort of diff should be generated.. 146 | */ 147 | extern int dmp_diff_from_strs( 148 | dmp_diff **diff, 149 | const dmp_options *options, 150 | const char *text1, 151 | const char *text2); 152 | 153 | /** 154 | * Public: Free the diff structure. 155 | * 156 | * Call this when you are done with the diff data. 157 | * 158 | * diff - The `dmp_diff` object to be freed. 159 | */ 160 | extern void dmp_diff_free(dmp_diff *diff); 161 | 162 | /** 163 | * Public: Iterate over changes in a diff list. 164 | * 165 | * Invoke a callback on each hunk of a diff. 166 | * 167 | * diff - The `dmp_diff` object to iterate over. 168 | * cb - The callback function to invoke on each hunk. 169 | * cb_ref - A reference pointer that will be passed to callback. 170 | * 171 | * Returns 0 if iteration completed successfully, or any non-zero value 172 | * that was returned by the `cb` callback function to terminate iteration. 173 | */ 174 | extern int dmp_diff_foreach( 175 | const dmp_diff *diff, 176 | dmp_diff_callback cb, 177 | void *cb_ref); 178 | ``` 179 | 180 | Status 181 | ------ 182 | 183 | The library is currently at version **0.1.1**. There has only really been 184 | one iteration on the core functionality and then one minor update to 185 | reorganize and clean things up a bit. 186 | 187 | At this point, the basic Myers diff code works, although I haven't 188 | implemented all of the optimizations from the upstream library yet. I 189 | haven't written any of the diff formatting helpers from the original 190 | library yet, nor have I started on the match or patch related code yet. 191 | 192 | Copyright and License 193 | --------------------- 194 | 195 | Copyright 196 | --------- 197 | 198 | The original **Google Diff, Match and Patch Library** is licensed under 199 | the [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0). 200 | The full terms of that license are included here in the 201 | `APACHE-LICENSE-2.0` file. 202 | 203 | Diff, Match and Patch Library 204 | 205 | Written by Neil Fraser 206 | Copyright (c) 2006 Google Inc. 207 | 208 | 209 | This C version of Diff, Match and Patch Library is licensed under 210 | the [MIT License](http://www.opensource.org/licenses/MIT) (a.k.a. 211 | the Expat License) which is included here in the `LICENSE` file. 212 | 213 | C version of Diff, Match and Patch Library 214 | 215 | Copyright (c) Russell Belfer 216 | 217 | 218 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 219 | 220 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 221 | 222 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 223 | 224 | 225 | -------------------------------------------------------------------------------- /include/dmp.h: -------------------------------------------------------------------------------- 1 | /** 2 | * dmp.h 3 | * 4 | * C language implementation of Google Diff, Match, and Patch library 5 | * 6 | * Original library is Copyright (c) 2006 Google Inc. 7 | * http://code.google.com/p/google-diff-match-patch/ 8 | * 9 | * Copyright (c) Russell Belfer 10 | * https://github.com/arrbee/google-diff-match-patch-c/ 11 | * 12 | * Permission is hereby granted, free of charge, to any person obtaining a 13 | * copy of this software and associated documentation files (the "Software"), 14 | * to deal in the Software without restriction, including without limitation 15 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 16 | * and/or sell copies of the Software, and to permit persons to whom the 17 | * Software is furnished to do so, subject to the following conditions: 18 | * 19 | * The above copyright notice and this permission notice shall be included in 20 | * all copies or substantial portions of the Software. 21 | * 22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 24 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 25 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 26 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 27 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 28 | * DEALINGS IN THE SOFTWARE. 29 | */ 30 | #ifndef INCLUDE_H_dmp 31 | #define INCLUDE_H_dmp 32 | 33 | #define DMP_VERSION "0.1.1" 34 | 35 | #include 36 | #include 37 | #include 38 | 39 | /** 40 | * Public: Each hunk of diff describes one of these operations. 41 | */ 42 | typedef enum { 43 | DMP_DIFF_DELETE = -1, 44 | DMP_DIFF_EQUAL = 0, 45 | DMP_DIFF_INSERT = 1 46 | } dmp_operation_t; 47 | 48 | /** 49 | * Public: Options structure configures behavior of diff functions. 50 | */ 51 | typedef struct { 52 | /* Number of seconds to map a diff before giving up (0 for infinity). */ 53 | float timeout; /* = 1.0 */ 54 | 55 | /* Cost of an empty edit operation in terms of edit characters. */ 56 | int edit_cost; /* = 4 */ 57 | 58 | /* At what point is no match declared (0.0 = perfection, 1.0 = very 59 | * loose). 60 | */ 61 | float match_threshold; /* = 0.5 */ 62 | 63 | /* How far to search for a match (0 = exact location, 1000+ = broad match). 64 | * A match this many characters away from the expected location will add 65 | * 1.0 to the score (0.0 is a perfect match). 66 | */ 67 | float match_distance; /* = 1000 */ 68 | 69 | /* When deleting a large block of text (over ~64 characters), how close 70 | * do the contents have to be to match the expected contents. (0.0 = 71 | * perfection, 1.0 = very loose). Note that match_threshold controls 72 | * how closely the end points of a delete need to match. 73 | */ 74 | float patch_delete_threshold; /* = 0.5 */ 75 | 76 | /* Chunk size for context length. */ 77 | int patch_margin; /* = 4 */ 78 | 79 | /* The number of bits in an int. 80 | * Python has no maximum, thus to disable patch splitting set to 0. 81 | * However to avoid long patches in certain pathological cases, use 32. 82 | * Multiple short patches (using native ints) are much faster than long 83 | * ones. 84 | */ 85 | int match_maxbits; /* = 32 */ 86 | 87 | /* Should diff run an initial line-level diff to identify changed areas? 88 | * Running initial diff will be slightly faster but slightly less optimal. 89 | */ 90 | int check_lines; /* = 1 */ 91 | 92 | /* Should the diff trim the common prefix? */ 93 | int trim_common_prefix; /* = 1 */ 94 | 95 | /* Should the diff trim the common suffix? */ 96 | int trim_common_suffix; /* = 1 */ 97 | } dmp_options; 98 | 99 | /** 100 | * Public: Main diff object. 101 | * 102 | * This is an opaque structure. It is internally a linked list of diff 103 | * records, each tracking one of the operations listed above, along with 104 | * pointers into the original text data and run lenths for the diff 105 | * records. 106 | */ 107 | typedef struct dmp_diff dmp_diff; 108 | 109 | typedef struct dmp_patch dmp_patch; 110 | 111 | /** 112 | * Public: Callback function for iterating over a diff. 113 | * 114 | * When you call `dmp_diff_foreach`, pass a function with this signature 115 | * to iterate over the diff records. If the `op` is a DELETE, the `data` 116 | * pointer will be into the `text1` original text. If the `op` is an 117 | * INSERT, the pointer will be into the `text2` new text. If the `op` is 118 | * an EQUAL, we generally attempt to keep the pointer into the `text1` 119 | * original text, but that is not guaranteed. 120 | * 121 | * cb_ref - The reference pointer you passed to the foreach fn. 122 | * op - A `dmp_operation_t` value for the chunk of data. 123 | * data - Pointer to the diff data as described above. This data will 124 | * generally not be NUL-terminated, since it is a reference into 125 | * the original data. You must use the `len` parameter correctly. 126 | * len - Bytes of data after the pointer in this chunk. 127 | * 128 | * Returns 0 to keep iterator or non-zero to stop iteration. Any value 129 | * you return will be passed back from the foreach function. 130 | */ 131 | typedef int (*dmp_diff_callback)( 132 | void *cb_ref, dmp_operation_t op, const void *data, uint32_t len); 133 | 134 | /** 135 | * Public: Initialize options structure to default values. 136 | * 137 | * This initializes a `dmp_options` structure for passing into the various 138 | * functions that take options. After initialization, you should set the 139 | * parameters explicitly that you wish to change. 140 | * 141 | * opts - Structure to be initialized, generally created on the stack. 142 | * 143 | * Returns 0 on success, -1 on failure. 144 | */ 145 | extern int dmp_options_init(dmp_options *opts); 146 | 147 | /** 148 | * Public: Calculate the diff between two texts. 149 | * 150 | * This will allocate and populate a new `dmp_diff` object with records 151 | * describing how to transform `text1` into `text2`. This returns a diff 152 | * with byte-level differences between the two texts. You can use one of 153 | * the diff transformation functions below to modify the diffs to word or 154 | * line level diffs, or to align diffs to UTF-8 boundaries or the like. 155 | * 156 | * diff - Pointer to a `dmp_diff` pointer that will be allocated. You must 157 | * call `dmp_diff_free()` on this pointer when done. 158 | * options - `dmp_options` structure to control diff, or NULL to use defaults. 159 | * text1 - The FROM text for the left side of the diff. 160 | * len1 - The number of bytes of data in `text1`. 161 | * text2 - The TO text for the right side of the diff. 162 | * len2 - The number of bytes of data in `text2`. 163 | * 164 | * Returns 0 if the diff was successfully generated, -1 on failure. The 165 | * only current failure scenario would be a failed allocation. Otherwise, 166 | * some sort of diff should be generated.. 167 | */ 168 | extern int dmp_diff_new( 169 | dmp_diff **diff, 170 | const dmp_options *options, 171 | const char *text1, 172 | uint32_t len1, 173 | const char *text2, 174 | uint32_t len2); 175 | 176 | /** 177 | * Public: Generate diff from NUL-terminated strings. 178 | * 179 | * This is a convenience function when you know that you are diffing 180 | * NUL-terminated strings. It simply calls `strlen()` and passes the 181 | * results along to `dmp_diff_new` (plus it deals correctly with NULL 182 | * strings, passing them in a zero-length texts). 183 | * 184 | * diff - Pointer to a `dmp_diff` pointer that will be allocated. You must 185 | * call `dmp_diff_free()` on this pointer when done. 186 | * options - `dmp_options` structure to control diff, or NULL to use defaults. 187 | * text1 - The FROM string for the left side of the diff. Must be a regular 188 | * NUL-terminated C string. 189 | * text2 - The TO string for the right side of the diff. Must be a regular 190 | * NUL-terminated C string. 191 | * 192 | * Returns 0 if the diff was successfully generated, -1 on failure. The 193 | * only current failure scenario would be a failed allocation. Otherwise, 194 | * some sort of diff should be generated.. 195 | */ 196 | extern int dmp_diff_from_strs( 197 | dmp_diff **diff, 198 | const dmp_options *options, 199 | const char *text1, 200 | const char *text2); 201 | 202 | /** 203 | * Public: Free the diff structure. 204 | * 205 | * Call this when you are done with the diff data. 206 | * 207 | * diff - The `dmp_diff` object to be freed. 208 | */ 209 | extern void dmp_diff_free(dmp_diff *diff); 210 | 211 | /** 212 | * Public: Iterate over changes in a diff list. 213 | * 214 | * Invoke a callback on each hunk of a diff. 215 | * 216 | * diff - The `dmp_diff` object to iterate over. 217 | * cb - The callback function to invoke on each hunk. 218 | * cb_ref - A reference pointer that will be passed to callback. 219 | * 220 | * Returns 0 if iteration completed successfully, or any non-zero value 221 | * that was returned by the `cb` callback function to terminate iteration. 222 | */ 223 | extern int dmp_diff_foreach( 224 | const dmp_diff *diff, 225 | dmp_diff_callback cb, 226 | void *cb_ref); 227 | 228 | /** 229 | * Public: Count the number of diff hunks. 230 | * 231 | * This computes the number of hunks in a diff object. This is the 232 | * number of times that your iterator function would be invoked. 233 | * 234 | * diff - The `dmp_diff` object. 235 | * 236 | * Returns a count of the number of hunks in the diff. 237 | */ 238 | extern uint32_t dmp_diff_hunks(const dmp_diff *diff); 239 | 240 | extern void dmp_diff_print_raw(FILE *fp, const dmp_diff *diff); 241 | 242 | extern int dmp_patch_new( 243 | dmp_patch **patch, 244 | const char *text1, 245 | uint32_t len1, 246 | const dmp_diff *diff); 247 | 248 | extern void dmp_patch_free(dmp_patch *patch); 249 | 250 | /* 251 | * Utility functions 252 | */ 253 | 254 | extern uint32_t dmp_common_prefix( 255 | const char *t1, uint32_t l1, const char *t2, uint32_t l2); 256 | 257 | extern uint32_t dmp_common_suffix( 258 | const char *t1, uint32_t l1, const char *t2, uint32_t l2); 259 | 260 | extern int dmp_has_prefix( 261 | const char *text, uint32_t tlen, const char *pfx, uint32_t plen); 262 | 263 | extern int dmp_has_suffix( 264 | const char *text, uint32_t tlen, const char *sfx, uint32_t slen); 265 | 266 | extern int dmp_strcmp( 267 | const char *t1, uint32_t l1, const char *t2, uint32_t l2); 268 | 269 | extern const char *dmp_strstr( 270 | const char *haystack, uint32_t lh, const char *needle, uint32_t ln); 271 | 272 | extern void dmp_build_texts_from_diff( 273 | char **t1, uint32_t *l1, char **t2, uint32_t *l2, const dmp_diff *diff); 274 | 275 | #endif 276 | -------------------------------------------------------------------------------- /src/dmp.c: -------------------------------------------------------------------------------- 1 | /** 2 | * dmp.c 3 | * 4 | * C language implementation of Google Diff, Match, and Patch library 5 | * 6 | * Original library is Copyright (c) 2006 Google Inc. 7 | * http://code.google.com/p/google-diff-match-patch/ 8 | * 9 | * Copyright (c) Russell Belfer 10 | * https://github.com/arrbee/google-diff-match-patch-c/ 11 | * 12 | * See included LICENSE file for license details. 13 | */ 14 | 15 | #include "dmp.h" 16 | #include "dmp_pool.h" 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #define dmp_min(A,B) (((A) < (B)) ? (A) : (B)) 23 | #define dmp_num_cmp(A,B) (((A) < (B)) ? -1 : ((A) > (B)) ? 1 : 0) 24 | 25 | #define START_POOL 8 26 | 27 | struct dmp_diff { 28 | dmp_pool pool; 29 | dmp_range list; 30 | double deadline; 31 | /* original parameters */ 32 | const char *t1, *t2; 33 | uint32_t l1, l2; 34 | /* used by bisect */ 35 | int *v1, *v2; 36 | uint32_t v_alloc; 37 | }; 38 | 39 | static double dmp_time(void); 40 | 41 | static int diff_main( 42 | dmp_range *, dmp_diff *, const dmp_options *, 43 | const char *, uint32_t, const char *, uint32_t); 44 | 45 | static int diff_bisect( 46 | dmp_range *, dmp_diff *, const dmp_options *, 47 | const char *, uint32_t, const char *, uint32_t); 48 | 49 | static int diff_cleanup_merge(dmp_diff *diff, dmp_range *list); 50 | 51 | static dmp_diff *alloc_diff(const dmp_options *opts) 52 | { 53 | dmp_diff *diff = malloc(sizeof(dmp_diff)); 54 | if (!diff) 55 | return NULL; 56 | 57 | memset(diff, 0, sizeof(*diff)); 58 | 59 | diff->deadline = (opts && opts->timeout > 0) ? 60 | dmp_time() + opts->timeout : -1.0; 61 | 62 | if (dmp_pool_alloc(&diff->pool, START_POOL) < 0) { 63 | free(diff); 64 | diff = NULL; 65 | } 66 | 67 | return diff; 68 | } 69 | 70 | int dmp_diff_new( 71 | dmp_diff **diff_ptr, 72 | const dmp_options *options, 73 | const char *text1, 74 | uint32_t len1, 75 | const char *text2, 76 | uint32_t len2) 77 | { 78 | dmp_diff *diff; 79 | 80 | assert(diff_ptr); 81 | 82 | *diff_ptr = diff = alloc_diff(options); 83 | if (!diff) 84 | return -1; 85 | 86 | diff->t1 = text1; 87 | diff->l1 = len1; 88 | diff->t2 = text2; 89 | diff->l2 = len2; 90 | 91 | return diff_main(&diff->list, diff, options, text1, len1, text2, len2); 92 | } 93 | 94 | int dmp_diff_from_strs( 95 | dmp_diff **diff, 96 | const dmp_options *options, 97 | const char *text1, 98 | const char *text2) 99 | { 100 | if (!text1) 101 | text1 = ""; 102 | if (!text2) 103 | text2 = ""; 104 | 105 | return dmp_diff_new( 106 | diff, options, text1, strlen(text1), text2, strlen(text2)); 107 | } 108 | 109 | static int diff_main( 110 | dmp_range *out, 111 | dmp_diff *diff, 112 | const dmp_options *opts, 113 | const char *text1, 114 | uint32_t len1, 115 | const char *text2, 116 | uint32_t len2) 117 | { 118 | const char *t_short, *t_long, *found; 119 | uint32_t l_short, l_long, common; 120 | dmp_pool *pool = &diff->pool; 121 | 122 | /* check for one-sided diffs */ 123 | 124 | if (!text1 || !len1) { 125 | dmp_range_init( 126 | pool, out, DMP_DIFF_INSERT, text2, 0, len2); 127 | return pool->error; 128 | } 129 | 130 | if (!text2 || !len2) { 131 | dmp_range_init( 132 | pool, out, DMP_DIFF_DELETE, text1, 0, len1); 133 | return pool->error; 134 | } 135 | 136 | /* allocate sentinel */ 137 | if (dmp_range_init(pool, out, DMP_DIFF_EQUAL, text1, len1, 0) < 0) 138 | goto finish; 139 | 140 | /* trim common prefix */ 141 | 142 | common = dmp_common_prefix(text1, len1, text2, len2); 143 | if (common > 0) { 144 | dmp_range_insert( 145 | pool, out, -1, DMP_DIFF_EQUAL, text1, 0, common); 146 | 147 | text1 += common; 148 | len1 -= common; 149 | text2 += common; 150 | len2 -= common; 151 | } 152 | 153 | /* trim common suffix */ 154 | 155 | common = dmp_common_suffix(text1, len1, text2, len2); 156 | if (common > 0) { 157 | dmp_range_insert( 158 | pool, out, out->end, 159 | DMP_DIFF_EQUAL, text1, len1 - common, common); 160 | 161 | len1 -= common; 162 | len2 -= common; 163 | } 164 | 165 | /* after trimming, check for degenerate cases */ 166 | 167 | if (!len1) { 168 | if (len2) 169 | dmp_range_insert( 170 | pool, out, -1, DMP_DIFF_INSERT, text2, 0, len2); 171 | goto finish; 172 | } else if (!len2) { 173 | dmp_range_insert( 174 | pool, out, -1, DMP_DIFF_DELETE, text1, 0, len1); 175 | goto finish; 176 | } 177 | 178 | /* check for "common middle" - i.e. one text inside the other */ 179 | 180 | if (len1 <= len2) { 181 | t_short = text1; 182 | l_short = len1; 183 | t_long = text2; 184 | l_long = len2; 185 | } else { 186 | t_short = text2; 187 | l_short = len2; 188 | t_long = text1; 189 | l_long = len1; 190 | } 191 | 192 | if ((found = dmp_strstr(t_long, l_long, t_short, l_short)) != NULL) { 193 | int op = (t_short == text1) ? DMP_DIFF_INSERT : DMP_DIFF_DELETE; 194 | uint32_t found_at = (found - t_long); 195 | 196 | dmp_range_insert( 197 | pool, out, -1, op, t_long, 0, found_at); 198 | dmp_range_insert( 199 | pool, out, -1, DMP_DIFF_EQUAL, t_short, 0, l_short); 200 | found_at += l_short; 201 | dmp_range_insert( 202 | pool, out, -1, op, t_long, found_at, l_long - found_at); 203 | 204 | goto finish; 205 | } 206 | 207 | if (l_short == 1) { 208 | /* this speed up applies after testing for short inside long above */ 209 | dmp_range_insert( 210 | pool, out, -1, DMP_DIFF_DELETE, text1, 0, len1); 211 | dmp_range_insert( 212 | pool, out, -1, DMP_DIFF_INSERT, text2, 0, len2); 213 | goto finish; 214 | } 215 | 216 | /* TODO: "half match" and "line mode" optimizations */ 217 | 218 | /* full Myers bisect diff */ 219 | 220 | if (!pool->error) 221 | diff_bisect(out, diff, opts, text1, len1, text2, len2); 222 | 223 | if (!pool->error) 224 | diff_cleanup_merge(diff, out); 225 | 226 | finish: 227 | dmp_range_normalize(pool, out); 228 | 229 | return pool->error; 230 | } 231 | 232 | static int diff_bisect_split( 233 | dmp_range *out, 234 | dmp_diff *diff, 235 | const dmp_options *opts, 236 | const char *t1, 237 | int t1pivot, 238 | uint32_t t1len, 239 | const char *t2, 240 | int t2pivot, 241 | uint32_t t2len) 242 | { 243 | dmp_range l1, l2; 244 | int rv = diff_main(&l1, diff, opts, t1, t1pivot, t2, t2pivot); 245 | 246 | if (rv == 0) 247 | rv = diff_main(&l2, diff, opts, 248 | t1 + t1pivot, t1len - t1pivot, t2 + t2pivot, t2len - t2pivot); 249 | 250 | if (rv == 0) { 251 | dmp_range_splice(&diff->pool, out, -1, &l1); 252 | dmp_range_splice(&diff->pool, out, -1, &l2); 253 | } 254 | 255 | return rv; 256 | } 257 | 258 | /* bisect diff - find "middle snake" of a diff 259 | * See Myers 1986: An O(ND) Difference Algorithm and Its Variations. 260 | */ 261 | static int diff_bisect( 262 | dmp_range *out, 263 | dmp_diff *diff, 264 | const dmp_options *opts, 265 | const char *t1, 266 | uint32_t t1len, 267 | const char *t2, 268 | uint32_t t2len) 269 | { 270 | int max_d, v_offset, v_length, d; 271 | int delta, front, k1start, k1end, k2start, k2end, *v1, *v2; 272 | 273 | v_offset = max_d = (t1len + t2len + 1) / 2; 274 | v_length = 2 * max_d; 275 | delta = (int)t1len - (int)t2len; 276 | front = (delta % 2 != 0); 277 | k1start = k1end = k2start = k2end = 0; 278 | 279 | if ((int)diff->v_alloc < v_length) { 280 | size_t asize = v_length * sizeof(int); 281 | diff->v1 = diff->v1 ? realloc(diff->v1, asize) : malloc(asize); 282 | diff->v2 = diff->v2 ? realloc(diff->v2, asize) : malloc(asize); 283 | diff->v_alloc = v_length; 284 | 285 | if (!diff->v1 || !diff->v2) 286 | return -1; 287 | } 288 | v1 = diff->v1; 289 | v2 = diff->v2; 290 | /* initialize arrays to -1 (except v_offset + 1 element to 0) */ 291 | memset(v1, 0xff, v_length * sizeof(int)); 292 | memset(v2, 0xff, v_length * sizeof(int)); 293 | v1[v_offset + 1] = 0; 294 | v2[v_offset + 1] = 0; 295 | 296 | for (d = 0; d < max_d; d++) { 297 | int k1, k2; 298 | 299 | /* TODO: bail out if deadline is reached */ 300 | 301 | /* advance the front contour */ 302 | for (k1 = -d + k1start; k1 <= d - k1end; k1 += 2) { 303 | int k1off = v_offset + k1; 304 | uint32_t x1, y1; 305 | 306 | if (k1 == -d || (k1 != d && v1[k1off - 1] < v1[k1off + 1])) 307 | x1 = v1[k1off + 1]; 308 | else 309 | x1 = v1[k1off - 1] + 1; 310 | y1 = x1 - k1; 311 | 312 | while (x1 < t1len && y1 < t2len && t1[x1] == t2[y1]) 313 | x1++, y1++; 314 | 315 | v1[k1off] = x1; 316 | if (x1 > t1len) /* ran off the right of the graph */ 317 | k1end += 2; 318 | else if (y1 > t2len) /* ran off bottom of the graph */ 319 | k1start += 2; 320 | else if (front) { 321 | int k2off = v_offset + delta - k1; 322 | if (k2off >= 0 && k2off < v_length && v2[k2off] != -1) { 323 | /* mirror x2 onto top-left coordinate system */ 324 | uint32_t x2 = (int)t1len - v2[k2off]; 325 | if (x1 >= x2) 326 | return diff_bisect_split( 327 | out, diff, opts, t1, x1, t1len, t2, y1, t2len); 328 | } 329 | } 330 | } 331 | 332 | /* advance the reverse contour */ 333 | for (k2 = -d + k2start; k2 <= d - k2end; k2 += 2) { 334 | int k2off = v_offset + k2; 335 | uint32_t x2, y2; 336 | 337 | if (k2 == -d || (k2 != d && v2[k2off - 1] < v2[k2off + 1])) 338 | x2 = v2[k2off + 1]; 339 | else 340 | x2 = v2[k2off - 1] + 1; 341 | y2 = x2 - k2; 342 | 343 | while (x2 < t1len && y2 < t2len && 344 | t1[t1len - x2 - 1] == t2[t2len - y2 - 1]) 345 | x2++, y2++; 346 | 347 | v2[k2off] = x2; 348 | if (x2 > t1len) /* ran off the left of the graph */ 349 | k2end += 2; 350 | else if (y2 > t2len) /* ran off top of the graph */ 351 | k2start += 2; 352 | else if (!front) { 353 | int k1off = v_offset + delta - k2; 354 | if (k1off >= 0 && k1off < v_length && v1[k1off] != -1) { 355 | /* mirror x2 onto top-left coordinate system */ 356 | uint32_t x1 = v1[k1off], y1 = v_offset + x1 - k1off; 357 | x2 = t1len - x2; 358 | if (x1 >= x2) 359 | return diff_bisect_split( 360 | out, diff, opts, t1, x1, t1len, t2, y1, t2len); 361 | } 362 | } 363 | } 364 | } 365 | 366 | /* diff took too long or # diffs == # chars (i.e. no commonality) */ 367 | dmp_range_insert(&diff->pool, out, -1, DMP_DIFF_DELETE, t1, 0, t1len); 368 | dmp_range_insert(&diff->pool, out, -1, DMP_DIFF_INSERT, t2, 0, t2len); 369 | 370 | return diff->pool.error; 371 | } 372 | 373 | static int diff_cleanup_merge(dmp_diff *diff, dmp_range *list) 374 | { 375 | dmp_pool *pool = &diff->pool; 376 | int i, before, common, changes; 377 | int count_delete, count_insert, len_delete, len_insert; 378 | dmp_node *ins = NULL, *del = NULL, *last = NULL, *node, *next; 379 | 380 | count_insert = count_delete = 0; 381 | len_insert = len_delete = 0; 382 | before = -1; 383 | 384 | dmp_range_normalize(pool, list); 385 | 386 | /* ensure EQUAL at end to guarantee termination of cleanup passes */ 387 | node = dmp_node_at(pool, list->end); 388 | if (node->op != DMP_DIFF_EQUAL) 389 | dmp_range_insert( 390 | pool, list, -1, DMP_DIFF_EQUAL, node->text, node->len, 0); 391 | 392 | /* first pass - look for groups of consecutive inserts and deletes 393 | * that can be merged or that have unnoticed common prefixes/suffixes 394 | * that can be extracted 395 | */ 396 | 397 | for (i = list->start; i != -1; i = node->next) { 398 | node = dmp_node_at(pool, i); 399 | 400 | switch (node->op) { 401 | case DMP_DIFF_INSERT: 402 | count_insert++; 403 | len_insert += node->len; 404 | if (!ins) 405 | ins = node; 406 | else { 407 | last->next = node->next; /* collapse node */ 408 | dmp_node_release(pool, i); 409 | } 410 | break; 411 | case DMP_DIFF_DELETE: 412 | count_delete++; 413 | len_delete += node->len; 414 | if (!del) 415 | del = node; 416 | else { 417 | last->next = node->next; /* collapse node */ 418 | dmp_node_release(pool, i); 419 | } 420 | break; 421 | case DMP_DIFF_EQUAL: 422 | if (count_delete + count_insert > 0) { 423 | if (count_delete > 0 && count_insert > 0) { 424 | /* factor out common prefix */ 425 | common = dmp_common_prefix( 426 | ins->text, len_insert, del->text, len_delete); 427 | 428 | if (common > 0) { 429 | if (before == -1) { 430 | dmp_range_insert(pool, list, 0, 431 | DMP_DIFF_EQUAL, ins->text, 0, common); 432 | } else { 433 | last = dmp_node_at(pool, before); 434 | last->len += common; 435 | } 436 | ins->text += common; 437 | len_insert -= common; 438 | del->text += common; 439 | len_delete -= common; 440 | } 441 | 442 | /* factor out common suffix */ 443 | common = dmp_common_suffix( 444 | ins->text, len_insert, del->text, len_delete); 445 | if (common > 0) { 446 | node->text -= common; 447 | node->len += common; 448 | len_insert -= common; 449 | len_delete -= common; 450 | } 451 | } 452 | /* merge deletes */ 453 | if (del) 454 | del->len = len_delete; 455 | /* merge inserts */ 456 | if (ins) 457 | ins->len = len_insert; 458 | } 459 | else if (last && last->op == DMP_DIFF_EQUAL) { 460 | /* merge this equality with the previous one */ 461 | last->len += node->len; 462 | last->next = node->next; 463 | dmp_node_release(pool, i); 464 | } 465 | 466 | count_insert = count_delete = 0; 467 | len_insert = len_delete = 0; 468 | ins = del = NULL; 469 | before = i; 470 | break; 471 | default: 472 | /* skip me */ 473 | break; 474 | } 475 | 476 | last = node; 477 | } 478 | 479 | /* second pass - look for single edits surrounded by equalities 480 | * which can be shifted sideways to eliminate an equality 481 | */ 482 | last = dmp_node_at(pool, list->start); 483 | next = (last->next < 0) ? NULL : dmp_node_at(pool, last->next); 484 | changes = 0; 485 | 486 | for (i = last->next; next != NULL && i != -1; i = node->next) { 487 | node = next; 488 | if (node->next < 0) 489 | break; 490 | next = dmp_node_at(pool, node->next); 491 | 492 | if (last->op == DMP_DIFF_EQUAL && next->op == DMP_DIFF_EQUAL) { 493 | if (last->len > 0 && 494 | dmp_has_suffix(node->text, node->len, last->text, last->len)) 495 | { 496 | node->text -= last->len; 497 | next->text -= last->len; 498 | next->len += last->len; 499 | last->len = 0; 500 | changes++; 501 | } 502 | else if (next->len > 0 && 503 | dmp_has_prefix(node->text, node->len, next->text, next->len)) 504 | { 505 | last->len += next->len; 506 | node->text += next->len; 507 | next->len = 0; 508 | changes++; 509 | } 510 | } 511 | 512 | last = node; 513 | } 514 | 515 | /* remove 0-len nodes */ 516 | dmp_range_normalize(pool, list); 517 | 518 | /* if shifts were made, diff needs reordering and another shift sweep */ 519 | if (changes > 0) 520 | return diff_cleanup_merge(diff, list); 521 | 522 | return pool->error; 523 | } 524 | 525 | void dmp_diff_free(dmp_diff *diff) 526 | { 527 | free(diff->v1); 528 | free(diff->v2); 529 | dmp_pool_free(&diff->pool); 530 | free(diff); 531 | } 532 | 533 | int dmp_diff_foreach( 534 | const dmp_diff *diff, 535 | dmp_diff_callback cb, 536 | void *cb_ref) 537 | { 538 | int pos, rval = 0; 539 | const dmp_node *node; 540 | 541 | dmp_range_foreach(&diff->pool, &diff->list, pos, node) { 542 | if ((rval = cb(cb_ref, node->op, node->text, node->len)) != 0) 543 | break; 544 | } 545 | 546 | return rval; 547 | } 548 | 549 | uint32_t dmp_diff_hunks(const dmp_diff *diff) 550 | { 551 | int pos; 552 | const dmp_node *node; 553 | uint32_t count = 0; 554 | 555 | dmp_range_foreach(&diff->pool, &diff->list, pos, node) 556 | count++; 557 | 558 | return count; 559 | } 560 | 561 | static void print_bytes(FILE *fp, const char *bytes, uint32_t len) 562 | { 563 | uint32_t i; 564 | 565 | for (i = 0; i < len; ++i) { 566 | char ch = bytes[i]; 567 | if (isprint(ch)) 568 | fprintf(fp, "%c", ch); 569 | else 570 | fprintf(fp, "\\x%02x", ((unsigned int)ch) & 0x00ffu); 571 | } 572 | } 573 | 574 | void dmp_diff_print_raw(FILE *fp, const dmp_diff *diff) 575 | { 576 | int pos, ct = 0, ct0 = 0; 577 | const dmp_node *node; 578 | 579 | fputs("\n> \"", fp); 580 | print_bytes(fp, diff->t1, diff->l1); 581 | fputs("\"\n", fp); 582 | 583 | for (pos = diff->list.start; pos >= 0; pos = node->next) { 584 | node = dmp_node_at(&diff->pool,pos); 585 | ct0++; 586 | if (node->len > 0) 587 | ct++; 588 | fprintf(fp, "%c\"", (node->op < 0) ? '-' : (node->op > 0) ? '+' : '='); 589 | print_bytes(fp, node->text, node->len); 590 | fputs(node->next >= 0 ? "\", " : "\"\n", fp); 591 | } 592 | 593 | fputs("< \"", fp); 594 | print_bytes(fp, diff->t2, diff->l2); 595 | fputs("\"\n", fp); 596 | } 597 | 598 | int dmp_options_init(dmp_options *opts) 599 | { 600 | opts->timeout = 1.0F; 601 | opts->edit_cost = 4; 602 | opts->match_threshold = 0.5F; 603 | opts->match_distance = 1000.0F; 604 | opts->patch_delete_threshold = 0.5F; 605 | opts->patch_margin = 4; 606 | opts->match_maxbits = 32; 607 | opts->check_lines = 1; 608 | opts->trim_common_prefix = 1; 609 | opts->trim_common_suffix = 1; 610 | return 0; 611 | } 612 | 613 | uint32_t dmp_common_prefix( 614 | const char *t1, uint32_t l1, const char *t2, uint32_t l2) 615 | { 616 | const char *start = t1; 617 | const char *end = t1 + dmp_min(l1, l2); 618 | 619 | for (; t1 < end && *t1 == *t2; t1++, t2++); 620 | 621 | return (uint32_t)(t1 - start); 622 | } 623 | 624 | uint32_t dmp_common_suffix( 625 | const char *t1, uint32_t l1, const char *t2, uint32_t l2) 626 | { 627 | const char *start; 628 | 629 | if (l1 > l2) { 630 | const char *tswap = t1; t1 = t2; t2 = tswap; 631 | uint32_t lswap = l1; l1 = l2; l2 = lswap; 632 | } 633 | 634 | start = t1; 635 | 636 | for (t1 = t1+l1-1, t2 = t2+l2-1; t1 >= start && *t1 == *t2; t1--, t2--); 637 | 638 | return (uint32_t)((start + l1 - 1) - t1); 639 | } 640 | 641 | int dmp_strcmp( 642 | const char *t1, uint32_t l1, const char *t2, uint32_t l2) 643 | { 644 | int cmp = memcmp(t1, t2, dmp_min(l1, l2)); 645 | return (cmp != 0) ? cmp : dmp_num_cmp(l1, l2); 646 | } 647 | 648 | int dmp_has_prefix( 649 | const char *text, uint32_t tlen, const char *pfx, uint32_t plen) 650 | { 651 | if (plen > tlen) 652 | return 0; 653 | 654 | for (; plen > 0; --plen, ++text, ++pfx) 655 | if (*text != *pfx) 656 | return 0; 657 | 658 | return 1; 659 | } 660 | 661 | int dmp_has_suffix( 662 | const char *text, uint32_t tlen, const char *sfx, uint32_t slen) 663 | { 664 | if (slen > tlen) 665 | return 0; 666 | 667 | for (text = text + tlen - 1, sfx = sfx + slen - 1; 668 | slen > 0; --slen, --text, --sfx) 669 | if (*text != *sfx) 670 | return 0; 671 | 672 | return 1; 673 | } 674 | 675 | /* Railgun is a fast memmem search */ 676 | 677 | /* All Railgun variants are written by Georgi 'Kaze', they are free, 678 | * however I expect the user to mention its homepage, that is: 679 | * http://www.sanmayce.com/Railgun/index.html 680 | * 681 | * Author's email: sanmayce@sanmayce.com 682 | * 683 | * Caution: For better speed the case 'if (cbPattern==1)' was removed, 684 | * so Pattern must be longer than 1 char. 685 | */ 686 | static const char *Railgun_Doublet( 687 | const char * pbTarget, const char * pbPattern, 688 | uint32_t cbTarget, uint32_t cbPattern) 689 | { 690 | const char * pbTargetMax = pbTarget + cbTarget; 691 | register uint32_t ulHashPattern; 692 | uint32_t count, countSTATIC; 693 | 694 | if (cbPattern > cbTarget) return(NULL); 695 | 696 | countSTATIC = cbPattern-2; 697 | 698 | pbTarget = pbTarget+cbPattern; 699 | ulHashPattern = (*(uint16_t *)(pbPattern)); 700 | 701 | for ( ;; ) { 702 | if ( ulHashPattern == (*(uint16_t *)(pbTarget-cbPattern)) ) { 703 | count = countSTATIC; 704 | while ( count && *(char *)(pbPattern+2+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+2+(countSTATIC-count)) ) { 705 | count--; 706 | } 707 | if ( count == 0 ) return((pbTarget-cbPattern)); 708 | } 709 | pbTarget++; 710 | if (pbTarget > pbTargetMax) return(NULL); 711 | } 712 | } 713 | 714 | const char *dmp_strstr( 715 | const char *haystack, uint32_t lh, const char *needle, uint32_t ln) 716 | { 717 | switch (ln) { 718 | case 0: 719 | return haystack; 720 | case 1: 721 | return memchr(haystack, *needle, lh); 722 | default: 723 | return Railgun_Doublet(haystack, needle, lh, ln); 724 | } 725 | } 726 | 727 | /* 728 | * Platform specific stuff 729 | */ 730 | 731 | #ifdef _WIN32 732 | 733 | #include 734 | 735 | static double dmp_time(void) 736 | { 737 | LARGE_INTEGER counter, freq; 738 | QueryPerformanceCounter(&counter); 739 | QueryPerformanceFrequency(&freq); 740 | return (double)counter.QuadPart / (double)freq.QuadPart; 741 | } 742 | 743 | #else 744 | 745 | #include 746 | 747 | static double dmp_time(void) 748 | { 749 | struct timeval tv; 750 | struct timezone tz; 751 | gettimeofday(&tv, &tz); 752 | return (double)tv.tv_sec + tv.tv_usec * 1E-6; 753 | } 754 | 755 | #endif 756 | 757 | -------------------------------------------------------------------------------- /src/dmp_pool.c: -------------------------------------------------------------------------------- 1 | /** 2 | * dmp_pool.c 3 | * 4 | * Utilities for maintaining a Linked list of diff records 5 | * 6 | * Copyright (c) Russell Belfer 7 | * https://github.com/arrbee/google-diff-match-patch-c/ 8 | * 9 | * See included LICENSE file for license details. 10 | */ 11 | #include "dmp.h" 12 | #include "dmp_pool.h" 13 | #include 14 | #include 15 | 16 | #define MIN_POOL 2 17 | #define MAX_POOL_INCREMENT 128 18 | 19 | int dmp_pool_alloc(dmp_pool *pool, uint32_t start_pool) 20 | { 21 | memset(pool, 0, sizeof(*pool)); 22 | 23 | if (start_pool < MIN_POOL) 24 | start_pool = MIN_POOL; 25 | 26 | pool->pool = calloc(start_pool, sizeof(dmp_node)); 27 | if (!pool->pool) 28 | return -1; 29 | 30 | pool->pool_size = start_pool; 31 | pool->pool_used = 1; /* set aside first item */ 32 | pool->free_list = -1; 33 | 34 | return 0; 35 | } 36 | 37 | void dmp_pool_free(dmp_pool *pool) 38 | { 39 | free(pool->pool); 40 | } 41 | 42 | void dmp_node_release(dmp_pool *pool, dmp_pos idx) 43 | { 44 | dmp_node *node = dmp_node_at(pool, idx); 45 | node->next = pool->free_list; 46 | pool->free_list = idx; 47 | } 48 | 49 | static dmp_pos grow_pool(dmp_pool *pool) 50 | { 51 | uint32_t new_size; 52 | dmp_node *new_pool; 53 | 54 | if (pool->pool_size > MAX_POOL_INCREMENT) 55 | new_size = pool->pool_size + MAX_POOL_INCREMENT; 56 | else 57 | new_size = pool->pool_size * 2; 58 | 59 | new_pool = realloc(pool->pool, new_size * sizeof(dmp_node)); 60 | if (!new_pool) { 61 | pool->error = -1; 62 | return -1; 63 | } 64 | 65 | pool->pool = new_pool; 66 | pool->pool_size = new_size; 67 | 68 | return pool->pool_used; 69 | } 70 | 71 | static dmp_pos alloc_node( 72 | dmp_pool *pool, int op, const char *data, uint32_t offset, uint32_t len) 73 | { 74 | dmp_pos pos; 75 | dmp_node *node; 76 | 77 | assert(pool && data && op >= -1 && op <= 1); 78 | 79 | /* don't insert zero length INSERT or DELETE ops */ 80 | if (len == 0 && op != 0) 81 | return -1; 82 | 83 | if (pool->free_list > 0) { 84 | pos = pool->free_list; 85 | node = dmp_node_at(pool, pos); 86 | pool->free_list = node->next; 87 | } 88 | else { 89 | if (pool->pool_used >= pool->pool_size) 90 | (void)grow_pool(pool); 91 | 92 | pos = pool->pool_used; 93 | pool->pool_used += 1; 94 | node = dmp_node_at(pool, pos); 95 | } 96 | 97 | node->text = data + offset; 98 | node->len = len; 99 | node->op = op; 100 | node->next = -1; 101 | 102 | #ifdef BUGALICIOUS 103 | if (len > 0) 104 | fprintf(stderr, "adding <%c'%.*s'> (len %d) %02x\n", 105 | !node->op ? '=' : node->op < 0 ? '-' : '+', 106 | node->len, node->text, node->len, (int)*node->text); 107 | #endif 108 | 109 | return pos; 110 | } 111 | 112 | dmp_pos dmp_range_init( 113 | dmp_pool *pool, dmp_range *run, 114 | int op, const char *data, uint32_t offset, uint32_t len) 115 | { 116 | run->start = run->end = alloc_node(pool, op, data, offset, len); 117 | return run->start; 118 | } 119 | 120 | dmp_pos dmp_range_insert( 121 | dmp_pool *pool, dmp_range *run, dmp_pos pos, 122 | int op, const char *data, uint32_t offset, uint32_t len) 123 | { 124 | dmp_node *node; 125 | dmp_pos added_at = alloc_node(pool, op, data, offset, len); 126 | if (added_at < 0) 127 | return pos; 128 | 129 | node = dmp_node_at(pool, added_at); 130 | 131 | if (pos == -1) { 132 | dmp_node *end = dmp_node_at(pool, run->end); 133 | node->next = end->next; 134 | end->next = added_at; 135 | run->end = added_at; 136 | } 137 | else if (pos == 0) { 138 | node->next = run->start; 139 | run->start = added_at; 140 | } 141 | else { 142 | dmp_node *after = dmp_node_at(pool, pos); 143 | node->next = after->next; 144 | after->next = added_at; 145 | } 146 | 147 | return added_at; 148 | } 149 | 150 | void dmp_range_splice( 151 | dmp_pool *pool, dmp_range *onto, dmp_pos pos, dmp_range *from) 152 | { 153 | dmp_node *tail; 154 | 155 | dmp_range_normalize(pool, from); 156 | 157 | tail = dmp_node_at(pool, from->end); 158 | 159 | if (pos == -1) { 160 | dmp_node *after = dmp_node_at(pool, onto->end); 161 | tail->next = after->next; 162 | after->next = from->start; 163 | onto->end = from->end; 164 | } 165 | else if (pos == 0) { 166 | tail->next = onto->start; 167 | onto->start = from->start; 168 | } 169 | else { 170 | dmp_node *after = dmp_node_at(pool, pos); 171 | tail->next = after->next; 172 | after->next = from->start; 173 | } 174 | } 175 | 176 | int dmp_range_len(dmp_pool *pool, dmp_range *run) 177 | { 178 | int count = 0; 179 | dmp_pos scan; 180 | 181 | for (scan = run->start; scan != -1; ) { 182 | dmp_node *node = dmp_node_at(pool, scan); 183 | count++; 184 | scan = node->next; 185 | } 186 | 187 | return count; 188 | } 189 | 190 | void dmp_range_normalize(dmp_pool *pool, dmp_range *range) 191 | { 192 | dmp_pos last_nonzero = -1, *pos = &range->start; 193 | 194 | while (*pos != -1) { 195 | dmp_node *node = dmp_node_at(pool, *pos); 196 | if (!node->len) { 197 | *pos = node->next; 198 | dmp_node_release(pool, dmp_node_pos(pool, node)); 199 | } else { 200 | last_nonzero = *pos; 201 | pos = &node->next; 202 | } 203 | } 204 | 205 | if (last_nonzero >= 0) 206 | range->end = last_nonzero; 207 | } 208 | -------------------------------------------------------------------------------- /src/dmp_pool.h: -------------------------------------------------------------------------------- 1 | /** 2 | * dmp_pool.h 3 | * 4 | * Utilities for maintaining a Linked list of diff records 5 | * 6 | * Copyright (c) Russell Belfer 7 | * https://github.com/arrbee/google-diff-match-patch-c/ 8 | * 9 | * See included LICENSE file for license details. 10 | */ 11 | #ifndef INCLUDE_H_dmp_pool 12 | #define INCLUDE_H_dmp_pool 13 | 14 | #include 15 | 16 | typedef int dmp_pos; 17 | 18 | typedef struct { 19 | const char *text; 20 | uint32_t len; 21 | int op; 22 | dmp_pos next; 23 | } dmp_node; 24 | 25 | typedef struct { 26 | dmp_pos start, end; 27 | } dmp_range; 28 | 29 | typedef struct { 30 | dmp_node *pool; 31 | uint32_t pool_size, pool_used; 32 | dmp_pos free_list; 33 | int error; 34 | } dmp_pool; 35 | 36 | extern int dmp_pool_alloc(dmp_pool *pool, uint32_t start_pool); 37 | 38 | extern void dmp_pool_free(dmp_pool *list); 39 | 40 | extern dmp_pos dmp_range_init( 41 | dmp_pool *list, dmp_range *run, 42 | int op, const char *data, uint32_t offset, uint32_t len); 43 | 44 | extern dmp_pos dmp_range_insert( 45 | dmp_pool *list, dmp_range *run, dmp_pos pos, 46 | int op, const char *data, uint32_t offset, uint32_t len); 47 | 48 | extern void dmp_range_splice( 49 | dmp_pool *list, dmp_range *onto, dmp_pos pos, dmp_range *from); 50 | 51 | extern int dmp_range_len(dmp_pool *pool, dmp_range *run); 52 | 53 | /* remove all 0-length nodes and advance 'end' to actual end */ 54 | extern void dmp_range_normalize(dmp_pool *pool, dmp_range *range); 55 | 56 | extern void dmp_node_release(dmp_pool *pool, dmp_pos idx); 57 | 58 | #define dmp_node_at(POOL,POS) (&((POOL)->pool[(POS)])) 59 | 60 | #define dmp_node_pos(POOL,NODE) ((dmp_pos)((NODE) - (POOL)->pool)) 61 | 62 | #define dmp_range_foreach(POOL, RANGE, IDX, PTR) \ 63 | for (IDX = (RANGE)->start; IDX >= 0; IDX = (PTR)->next) \ 64 | if (((PTR) = dmp_node_at((POOL),IDX))->len > 0) 65 | 66 | #endif 67 | -------------------------------------------------------------------------------- /test/dmp_test.c: -------------------------------------------------------------------------------- 1 | /** 2 | * dmp_test.c 3 | * 4 | * Tests for public APIs of libdmp (plus a quick and dirty test driver) 5 | */ 6 | 7 | #include "dmp_test.h" 8 | 9 | void test_util_0(void) 10 | { 11 | assert(dmp_common_prefix("aaa", 3, "abc", 3) == 1); 12 | assert(dmp_common_prefix("abc", 3, "aaa", 3) == 1); 13 | assert(dmp_common_prefix("", 0, "abc", 3) == 0); 14 | assert(dmp_common_prefix("abc", 3, "", 0) == 0); 15 | assert(dmp_common_prefix("aaa", 3, "aaa", 3) == 3); 16 | assert(dmp_common_prefix("aaa\000bbb", 7, "aaa\000bqq", 7) == 5); 17 | progress(); 18 | 19 | assert(dmp_has_prefix("aaa", 3, "a", 1)); 20 | assert(!dmp_has_prefix("a", 1, "aaa", 3)); 21 | assert(dmp_has_prefix("aaa\000bbb", 7, "aaa\000b", 5)); 22 | assert(!dmp_has_prefix("abc", 3, "b", 1)); 23 | progress(); 24 | 25 | assert(dmp_has_suffix("aaa", 3, "a", 1)); 26 | assert(dmp_has_suffix("aaa", 4, "a", 2)); 27 | assert(dmp_has_suffix("aaa\000q", 5, "a\000q", 3)); 28 | assert(!dmp_has_suffix("aaa", 3, "q", 1)); 29 | assert(!dmp_has_suffix("abcdef", 6, "qcdef", 5)); 30 | progress(); 31 | } 32 | 33 | struct diff_stat_data { 34 | uint32_t deletes; 35 | uint32_t delete_bytes; 36 | uint32_t equals; 37 | uint32_t equal_bytes; 38 | uint32_t inserts; 39 | uint32_t insert_bytes; 40 | uint32_t map; 41 | }; 42 | 43 | static int diff_stats( 44 | void *ref, dmp_operation_t op, const void *data, uint32_t len) 45 | { 46 | struct diff_stat_data *d = ref; 47 | 48 | (void)data; 49 | 50 | switch (op) { 51 | case DMP_DIFF_DELETE: 52 | d->deletes++; 53 | d->delete_bytes += len; 54 | d->map = (d->map << 1) | 1; 55 | break; 56 | case DMP_DIFF_EQUAL: 57 | d->equals++; 58 | d->equal_bytes += len; 59 | d->map = (d->map << 1); 60 | break; 61 | case DMP_DIFF_INSERT: 62 | d->inserts++; 63 | d->insert_bytes += len; 64 | d->map = (d->map << 1) | 1; 65 | break; 66 | } 67 | 68 | return 0; 69 | } 70 | 71 | static void expect_diff_stat( 72 | dmp_diff *diff, uint32_t dels, uint32_t eqs, uint32_t ins, uint32_t map) 73 | { 74 | struct diff_stat_data d; 75 | 76 | memset(&d, 0, sizeof(d)); 77 | 78 | assert(dmp_diff_foreach(diff, diff_stats, &d) == 0); 79 | 80 | assert(d.deletes == dels); 81 | assert(d.equals == eqs); 82 | assert(d.inserts == ins); 83 | assert(d.map == map); 84 | 85 | progress(); 86 | } 87 | 88 | void test_diff_0(void) 89 | { 90 | dmp_diff *diff; 91 | 92 | dmp_diff_from_strs(&diff, NULL, "", ""); 93 | assert(diff != NULL); 94 | expect_diff_stat(diff, 0, 0, 0, 0x0); 95 | dmp_diff_free(diff); 96 | 97 | dmp_diff_from_strs(&diff, NULL, "same", "same"); 98 | assert(diff != NULL); 99 | expect_diff_stat(diff, 0, 1, 0, 0x0); 100 | dmp_diff_free(diff); 101 | 102 | dmp_diff_from_strs(&diff, NULL, "", "new"); 103 | assert(diff != NULL); 104 | expect_diff_stat(diff, 0, 0, 1, 0x01); 105 | dmp_diff_free(diff); 106 | 107 | dmp_diff_from_strs(&diff, NULL, "old", ""); 108 | assert(diff != NULL); 109 | expect_diff_stat(diff, 1, 0, 0, 0x01); 110 | dmp_diff_free(diff); 111 | 112 | dmp_diff_from_strs(&diff, NULL, "commonAAA", "common"); 113 | assert(diff != NULL); 114 | expect_diff_stat(diff, 1, 1, 0, 0x01); 115 | dmp_diff_free(diff); 116 | 117 | dmp_diff_from_strs(&diff, NULL, "common", "commonBBB"); 118 | assert(diff != NULL); 119 | expect_diff_stat(diff, 0, 1, 1, 0x01); 120 | dmp_diff_free(diff); 121 | 122 | dmp_diff_from_strs(&diff, NULL, "AAAcommon", "common"); 123 | assert(diff != NULL); 124 | expect_diff_stat(diff, 1, 1, 0, 0x02); 125 | dmp_diff_free(diff); 126 | 127 | dmp_diff_from_strs(&diff, NULL, "common", "BBBcommon"); 128 | assert(diff != NULL); 129 | expect_diff_stat(diff, 0, 1, 1, 0x02); 130 | dmp_diff_free(diff); 131 | 132 | dmp_diff_from_strs(&diff, NULL, "commonAAAcommon", "commoncommon"); 133 | assert(diff != NULL); 134 | expect_diff_stat(diff, 1, 2, 0, 0x02); 135 | dmp_diff_free(diff); 136 | 137 | dmp_diff_from_strs(&diff, NULL, "commoncommon", "commonBBBcommon"); 138 | assert(diff != NULL); 139 | expect_diff_stat(diff, 0, 2, 1, 0x02); 140 | dmp_diff_free(diff); 141 | 142 | dmp_diff_from_strs(&diff, NULL, "AAA", "bigAAAfun"); 143 | assert(diff != NULL); 144 | expect_diff_stat(diff, 0, 1, 2, 0x05); 145 | dmp_diff_free(diff); 146 | 147 | dmp_diff_from_strs(&diff, NULL, "bigBBBfun", "BBB"); 148 | assert(diff != NULL); 149 | expect_diff_stat(diff, 2, 1, 0, 0x05); 150 | dmp_diff_free(diff); 151 | 152 | dmp_diff_from_strs(&diff, NULL, "commonAAA", "commonbigAAAfun"); 153 | assert(diff != NULL); 154 | expect_diff_stat(diff, 0, 2, 2, 0x05); 155 | dmp_diff_free(diff); 156 | 157 | dmp_diff_from_strs(&diff, NULL, "bigBBBfuncommon", "BBBcommon"); 158 | assert(diff != NULL); 159 | expect_diff_stat(diff, 2, 2, 0, 0x0a); 160 | dmp_diff_free(diff); 161 | 162 | dmp_diff_from_strs(&diff, NULL, "aaa", "bbb"); 163 | assert(diff != NULL); 164 | expect_diff_stat(diff, 1, 0, 1, 0x03); 165 | dmp_diff_free(diff); 166 | 167 | /* two insertions */ 168 | dmp_diff_from_strs(&diff, NULL, "abc", "a123b456c"); 169 | assert(diff != NULL); 170 | expect_diff_stat(diff, 0, 3, 2, 0x0a); 171 | dmp_diff_free(diff); 172 | 173 | /* two deletions */ 174 | dmp_diff_from_strs(&diff, NULL, "a123b456c", "abc"); 175 | assert(diff != NULL); 176 | expect_diff_stat(diff, 2, 3, 0, 0x0a); 177 | dmp_diff_free(diff); 178 | 179 | dmp_diff_from_strs(&diff, NULL, "aabbccdd", "aaddccbb"); 180 | /* expect: eq='aa' del='bbcc' eq='dd' ins='ccbb' */ 181 | assert(diff != NULL); 182 | expect_diff_stat(diff, 1, 2, 1, 0x05); /* 0101 */ 183 | dmp_diff_free(diff); 184 | 185 | dmp_diff_from_strs( 186 | &diff, NULL, "Apples are a fruit.", "Bananas are also fruit."); 187 | /* expect: del='Apple' ins='Banana' eq='s are a' ins='lso' eq ' fruit.' */ 188 | assert(diff != NULL); 189 | expect_diff_stat(diff, 1, 2, 2, 0x1a); /* 11010 */ 190 | dmp_diff_free(diff); 191 | 192 | dmp_diff_new(&diff, NULL, "ax\t", 3, "\u0680x\x00", 4); 193 | /* expect: del='a' ins='\u0680' eq='x' del='\t' ins='\x00' */ 194 | assert(diff != NULL); 195 | dmp_diff_print_raw(stderr, diff); 196 | expect_diff_stat(diff, 2, 1, 2, 0x1b); /* 11011 */ 197 | dmp_diff_free(diff); 198 | 199 | dmp_diff_from_strs(&diff, NULL, "1ayb2", "abxab"); 200 | /* expect: del='1' eq='a' del='y' eq='b' del='2' ins='xab' */ 201 | assert(diff != NULL); 202 | dmp_diff_print_raw(stderr, diff); 203 | expect_diff_stat(diff, 3, 2, 1, 0x2b); /* 101011 */ 204 | dmp_diff_free(diff); 205 | 206 | dmp_diff_from_strs(&diff, NULL, "abcy", "xaxcxabc"); 207 | /* expect: ins='xaxcx' eq='abc' del='y' */ 208 | assert(diff != NULL); 209 | dmp_diff_print_raw(stderr, diff); 210 | expect_diff_stat(diff, 1, 1, 1, 0x5); /* 0101 */ 211 | dmp_diff_free(diff); 212 | } 213 | 214 | 215 | static test_fn g_tests[] = { 216 | test_util_0, 217 | test_ranges_0, 218 | test_diff_0, 219 | NULL 220 | }; 221 | 222 | int main(int argc, char **argv) 223 | { 224 | test_fn *scan; 225 | 226 | (void)argc; (void)argv; 227 | 228 | for (scan = g_tests; *scan != NULL; ++scan) { 229 | (*scan)(); 230 | fputs("done\n", stderr); 231 | } 232 | 233 | return 0; 234 | } 235 | -------------------------------------------------------------------------------- /test/dmp_test.h: -------------------------------------------------------------------------------- 1 | /** 2 | * dmp_test.h 3 | * 4 | * Some common test declarations 5 | */ 6 | #ifndef INCLUDE_dmp_test_h__ 7 | #define INCLUDE_dmp_test_h__ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | typedef void (*test_fn)(void); 14 | 15 | #define progress() fputs(".", stderr) 16 | 17 | extern void test_util_0(void); 18 | extern void test_ranges_0(void); 19 | extern void test_diff_0(void); 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /test/dmp_test_internals.c: -------------------------------------------------------------------------------- 1 | /** 2 | * dmp_test_internals.c 3 | * 4 | * Tests that include internal APIs of libdmp 5 | */ 6 | 7 | #include "dmp_test.h" 8 | #include "../src/dmp_pool.h" 9 | 10 | void test_ranges_0(void) 11 | { 12 | dmp_pool pool, *p = &pool; 13 | dmp_range range, *r = ⦥ 14 | uint32_t used; 15 | 16 | assert(dmp_pool_alloc(p, 4) == 0); 17 | 18 | assert(dmp_range_init(p, r, 0, "", 0, 0) > 0); 19 | assert(r->start > 0); 20 | assert(r->start == r->end); 21 | assert(dmp_range_len(p, r) == 1); 22 | assert(dmp_range_insert(p, r, -1, 0, "ab", 0, 2) > 0); 23 | assert(r->start != r->end); 24 | assert(dmp_range_len(p, r) == 2); 25 | assert(dmp_range_insert(p, r, -1, 0, "", 0, 0) > 0); 26 | assert(dmp_range_len(p, r) == 3); 27 | assert(dmp_range_insert(p, r, -1, 0, "cd", 0, 2) > 0); 28 | assert(dmp_range_insert(p, r, -1, 0, "", 0, 0) > 0); 29 | assert(dmp_range_insert(p, r, -1, 0, "ef", 0, 2) > 0); 30 | assert(dmp_range_insert(p, r, -1, 0, "", 0, 0) > 0); 31 | assert(r->start != r->end); 32 | assert(dmp_range_len(p, r) == 7); 33 | progress(); 34 | 35 | used = p->pool_used; 36 | dmp_range_normalize(p, r); 37 | assert(dmp_range_len(p, r) == 3); 38 | assert(strcmp(dmp_node_at(p, r->start)->text, "ab") == 0); 39 | assert(strcmp(dmp_node_at(p, r->end)->text, "ef") == 0); 40 | 41 | assert(dmp_range_insert(p, r, -1, 0, "", 0, 0) > 0); 42 | assert(p->pool_used == used); 43 | 44 | assert(dmp_range_insert(p, r, -1, 0, "", 0, 0) > 0); 45 | assert(dmp_range_insert(p, r, -1, 0, "", 0, 0) > 0); 46 | assert(dmp_range_insert(p, r, -1, 0, "", 0, 0) > 0); 47 | assert(p->pool_used == used); 48 | 49 | assert(dmp_range_insert(p, r, -1, 0, "", 0, 0) > 0); 50 | assert(p->pool_used == used + 1); 51 | progress(); 52 | } 53 | --------------------------------------------------------------------------------