├── .gitignore ├── COPYING.LIB ├── README.md ├── example └── tdiff-trace-a-cat-ate-my-hat.svg ├── rebar.config.script ├── src ├── tdiff.app.src ├── tdiff.erl ├── tdiff_benchmark.erl └── tdiff_debug.erl └── test └── tdiff_tests.erl /.gitignore: -------------------------------------------------------------------------------- 1 | erl_crash.dump 2 | *.beam 3 | # Rebar compiles tests and more to .eunit/ 4 | .eunit 5 | .rebar 6 | _build 7 | rebar.lock 8 | ebin 9 | doc 10 | -------------------------------------------------------------------------------- /COPYING.LIB: -------------------------------------------------------------------------------- 1 | NOTE! It is ok to link this library with code covered by 2 | the Erlang public license, http://www.erlang.org/EPLICENSE 3 | 4 | /Tomas Abrahamsson 5 | 6 | ---------------------------------------- 7 | 8 | GNU LIBRARY GENERAL PUBLIC LICENSE 9 | Version 2, June 1991 10 | 11 | Copyright (C) 1991 Free Software Foundation, Inc. 12 | 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 13 | Everyone is permitted to copy and distribute verbatim copies 14 | of this license document, but changing it is not allowed. 15 | 16 | [This is the first released version of the library GPL. It is 17 | numbered 2 because it goes with version 2 of the ordinary GPL.] 18 | 19 | Preamble 20 | 21 | The licenses for most software are designed to take away your 22 | freedom to share and change it. By contrast, the GNU General Public 23 | Licenses are intended to guarantee your freedom to share and change 24 | free software--to make sure the software is free for all its users. 25 | 26 | This license, the Library General Public License, applies to some 27 | specially designated Free Software Foundation software, and to any 28 | other libraries whose authors decide to use it. You can use it for 29 | your libraries, too. 30 | 31 | When we speak of free software, we are referring to freedom, not 32 | price. Our General Public Licenses are designed to make sure that you 33 | have the freedom to distribute copies of free software (and charge for 34 | this service if you wish), that you receive source code or can get it 35 | if you want it, that you can change the software or use pieces of it 36 | in new free programs; and that you know you can do these things. 37 | 38 | To protect your rights, we need to make restrictions that forbid 39 | anyone to deny you these rights or to ask you to surrender the rights. 40 | These restrictions translate to certain responsibilities for you if 41 | you distribute copies of the library, or if you modify it. 42 | 43 | For example, if you distribute copies of the library, whether gratis 44 | or for a fee, you must give the recipients all the rights that we gave 45 | you. You must make sure that they, too, receive or can get the source 46 | code. If you link a program with the library, you must provide 47 | complete object files to the recipients so that they can relink them 48 | with the library, after making changes to the library and recompiling 49 | it. And you must show them these terms so they know their rights. 50 | 51 | Our method of protecting your rights has two steps: (1) copyright 52 | the library, and (2) offer you this license which gives you legal 53 | permission to copy, distribute and/or modify the library. 54 | 55 | Also, for each distributor's protection, we want to make certain 56 | that everyone understands that there is no warranty for this free 57 | library. If the library is modified by someone else and passed on, we 58 | want its recipients to know that what they have is not the original 59 | version, so that any problems introduced by others will not reflect on 60 | the original authors' reputations. 61 | 62 | Finally, any free program is threatened constantly by software 63 | patents. We wish to avoid the danger that companies distributing free 64 | software will individually obtain patent licenses, thus in effect 65 | transforming the program into proprietary software. To prevent this, 66 | we have made it clear that any patent must be licensed for everyone's 67 | free use or not licensed at all. 68 | 69 | Most GNU software, including some libraries, is covered by the ordinary 70 | GNU General Public License, which was designed for utility programs. This 71 | license, the GNU Library General Public License, applies to certain 72 | designated libraries. This license is quite different from the ordinary 73 | one; be sure to read it in full, and don't assume that anything in it is 74 | the same as in the ordinary license. 75 | 76 | The reason we have a separate public license for some libraries is that 77 | they blur the distinction we usually make between modifying or adding to a 78 | program and simply using it. Linking a program with a library, without 79 | changing the library, is in some sense simply using the library, and is 80 | analogous to running a utility program or application program. However, in 81 | a textual and legal sense, the linked executable is a combined work, a 82 | derivative of the original library, and the ordinary General Public License 83 | treats it as such. 84 | 85 | Because of this blurred distinction, using the ordinary General 86 | Public License for libraries did not effectively promote software 87 | sharing, because most developers did not use the libraries. We 88 | concluded that weaker conditions might promote sharing better. 89 | 90 | However, unrestricted linking of non-free programs would deprive the 91 | users of those programs of all benefit from the free status of the 92 | libraries themselves. This Library General Public License is intended to 93 | permit developers of non-free programs to use free libraries, while 94 | preserving your freedom as a user of such programs to change the free 95 | libraries that are incorporated in them. (We have not seen how to achieve 96 | this as regards changes in header files, but we have achieved it as regards 97 | changes in the actual functions of the Library.) The hope is that this 98 | will lead to faster development of free libraries. 99 | 100 | The precise terms and conditions for copying, distribution and 101 | modification follow. Pay close attention to the difference between a 102 | "work based on the library" and a "work that uses the library". The 103 | former contains code derived from the library, while the latter only 104 | works together with the library. 105 | 106 | Note that it is possible for a library to be covered by the ordinary 107 | General Public License rather than by this special one. 108 | 109 | GNU LIBRARY GENERAL PUBLIC LICENSE 110 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 111 | 112 | 0. This License Agreement applies to any software library which 113 | contains a notice placed by the copyright holder or other authorized 114 | party saying it may be distributed under the terms of this Library 115 | General Public License (also called "this License"). Each licensee is 116 | addressed as "you". 117 | 118 | A "library" means a collection of software functions and/or data 119 | prepared so as to be conveniently linked with application programs 120 | (which use some of those functions and data) to form executables. 121 | 122 | The "Library", below, refers to any such software library or work 123 | which has been distributed under these terms. A "work based on the 124 | Library" means either the Library or any derivative work under 125 | copyright law: that is to say, a work containing the Library or a 126 | portion of it, either verbatim or with modifications and/or translated 127 | straightforwardly into another language. (Hereinafter, translation is 128 | included without limitation in the term "modification".) 129 | 130 | "Source code" for a work means the preferred form of the work for 131 | making modifications to it. For a library, complete source code means 132 | all the source code for all modules it contains, plus any associated 133 | interface definition files, plus the scripts used to control compilation 134 | and installation of the library. 135 | 136 | Activities other than copying, distribution and modification are not 137 | covered by this License; they are outside its scope. The act of 138 | running a program using the Library is not restricted, and output from 139 | such a program is covered only if its contents constitute a work based 140 | on the Library (independent of the use of the Library in a tool for 141 | writing it). Whether that is true depends on what the Library does 142 | and what the program that uses the Library does. 143 | 144 | 1. You may copy and distribute verbatim copies of the Library's 145 | complete source code as you receive it, in any medium, provided that 146 | you conspicuously and appropriately publish on each copy an 147 | appropriate copyright notice and disclaimer of warranty; keep intact 148 | all the notices that refer to this License and to the absence of any 149 | warranty; and distribute a copy of this License along with the 150 | Library. 151 | 152 | You may charge a fee for the physical act of transferring a copy, 153 | and you may at your option offer warranty protection in exchange for a 154 | fee. 155 | 156 | 2. You may modify your copy or copies of the Library or any portion 157 | of it, thus forming a work based on the Library, and copy and 158 | distribute such modifications or work under the terms of Section 1 159 | above, provided that you also meet all of these conditions: 160 | 161 | a) The modified work must itself be a software library. 162 | 163 | b) You must cause the files modified to carry prominent notices 164 | stating that you changed the files and the date of any change. 165 | 166 | c) You must cause the whole of the work to be licensed at no 167 | charge to all third parties under the terms of this License. 168 | 169 | d) If a facility in the modified Library refers to a function or a 170 | table of data to be supplied by an application program that uses 171 | the facility, other than as an argument passed when the facility 172 | is invoked, then you must make a good faith effort to ensure that, 173 | in the event an application does not supply such function or 174 | table, the facility still operates, and performs whatever part of 175 | its purpose remains meaningful. 176 | 177 | (For example, a function in a library to compute square roots has 178 | a purpose that is entirely well-defined independent of the 179 | application. Therefore, Subsection 2d requires that any 180 | application-supplied function or table used by this function must 181 | be optional: if the application does not supply it, the square 182 | root function must still compute square roots.) 183 | 184 | These requirements apply to the modified work as a whole. If 185 | identifiable sections of that work are not derived from the Library, 186 | and can be reasonably considered independent and separate works in 187 | themselves, then this License, and its terms, do not apply to those 188 | sections when you distribute them as separate works. But when you 189 | distribute the same sections as part of a whole which is a work based 190 | on the Library, the distribution of the whole must be on the terms of 191 | this License, whose permissions for other licensees extend to the 192 | entire whole, and thus to each and every part regardless of who wrote 193 | it. 194 | 195 | Thus, it is not the intent of this section to claim rights or contest 196 | your rights to work written entirely by you; rather, the intent is to 197 | exercise the right to control the distribution of derivative or 198 | collective works based on the Library. 199 | 200 | In addition, mere aggregation of another work not based on the Library 201 | with the Library (or with a work based on the Library) on a volume of 202 | a storage or distribution medium does not bring the other work under 203 | the scope of this License. 204 | 205 | 3. You may opt to apply the terms of the ordinary GNU General Public 206 | License instead of this License to a given copy of the Library. To do 207 | this, you must alter all the notices that refer to this License, so 208 | that they refer to the ordinary GNU General Public License, version 2, 209 | instead of to this License. (If a newer version than version 2 of the 210 | ordinary GNU General Public License has appeared, then you can specify 211 | that version instead if you wish.) Do not make any other change in 212 | these notices. 213 | 214 | Once this change is made in a given copy, it is irreversible for 215 | that copy, so the ordinary GNU General Public License applies to all 216 | subsequent copies and derivative works made from that copy. 217 | 218 | This option is useful when you wish to copy part of the code of 219 | the Library into a program that is not a library. 220 | 221 | 4. You may copy and distribute the Library (or a portion or 222 | derivative of it, under Section 2) in object code or executable form 223 | under the terms of Sections 1 and 2 above provided that you accompany 224 | it with the complete corresponding machine-readable source code, which 225 | must be distributed under the terms of Sections 1 and 2 above on a 226 | medium customarily used for software interchange. 227 | 228 | If distribution of object code is made by offering access to copy 229 | from a designated place, then offering equivalent access to copy the 230 | source code from the same place satisfies the requirement to 231 | distribute the source code, even though third parties are not 232 | compelled to copy the source along with the object code. 233 | 234 | 5. A program that contains no derivative of any portion of the 235 | Library, but is designed to work with the Library by being compiled or 236 | linked with it, is called a "work that uses the Library". Such a 237 | work, in isolation, is not a derivative work of the Library, and 238 | therefore falls outside the scope of this License. 239 | 240 | However, linking a "work that uses the Library" with the Library 241 | creates an executable that is a derivative of the Library (because it 242 | contains portions of the Library), rather than a "work that uses the 243 | library". The executable is therefore covered by this License. 244 | Section 6 states terms for distribution of such executables. 245 | 246 | When a "work that uses the Library" uses material from a header file 247 | that is part of the Library, the object code for the work may be a 248 | derivative work of the Library even though the source code is not. 249 | Whether this is true is especially significant if the work can be 250 | linked without the Library, or if the work is itself a library. The 251 | threshold for this to be true is not precisely defined by law. 252 | 253 | If such an object file uses only numerical parameters, data 254 | structure layouts and accessors, and small macros and small inline 255 | functions (ten lines or less in length), then the use of the object 256 | file is unrestricted, regardless of whether it is legally a derivative 257 | work. (Executables containing this object code plus portions of the 258 | Library will still fall under Section 6.) 259 | 260 | Otherwise, if the work is a derivative of the Library, you may 261 | distribute the object code for the work under the terms of Section 6. 262 | Any executables containing that work also fall under Section 6, 263 | whether or not they are linked directly with the Library itself. 264 | 265 | 6. As an exception to the Sections above, you may also compile or 266 | link a "work that uses the Library" with the Library to produce a 267 | work containing portions of the Library, and distribute that work 268 | under terms of your choice, provided that the terms permit 269 | modification of the work for the customer's own use and reverse 270 | engineering for debugging such modifications. 271 | 272 | You must give prominent notice with each copy of the work that the 273 | Library is used in it and that the Library and its use are covered by 274 | this License. You must supply a copy of this License. If the work 275 | during execution displays copyright notices, you must include the 276 | copyright notice for the Library among them, as well as a reference 277 | directing the user to the copy of this License. Also, you must do one 278 | of these things: 279 | 280 | a) Accompany the work with the complete corresponding 281 | machine-readable source code for the Library including whatever 282 | changes were used in the work (which must be distributed under 283 | Sections 1 and 2 above); and, if the work is an executable linked 284 | with the Library, with the complete machine-readable "work that 285 | uses the Library", as object code and/or source code, so that the 286 | user can modify the Library and then relink to produce a modified 287 | executable containing the modified Library. (It is understood 288 | that the user who changes the contents of definitions files in the 289 | Library will not necessarily be able to recompile the application 290 | to use the modified definitions.) 291 | 292 | b) Accompany the work with a written offer, valid for at 293 | least three years, to give the same user the materials 294 | specified in Subsection 6a, above, for a charge no more 295 | than the cost of performing this distribution. 296 | 297 | c) If distribution of the work is made by offering access to copy 298 | from a designated place, offer equivalent access to copy the above 299 | specified materials from the same place. 300 | 301 | d) Verify that the user has already received a copy of these 302 | materials or that you have already sent this user a copy. 303 | 304 | For an executable, the required form of the "work that uses the 305 | Library" must include any data and utility programs needed for 306 | reproducing the executable from it. However, as a special exception, 307 | the source code distributed need not include anything that is normally 308 | distributed (in either source or binary form) with the major 309 | components (compiler, kernel, and so on) of the operating system on 310 | which the executable runs, unless that component itself accompanies 311 | the executable. 312 | 313 | It may happen that this requirement contradicts the license 314 | restrictions of other proprietary libraries that do not normally 315 | accompany the operating system. Such a contradiction means you cannot 316 | use both them and the Library together in an executable that you 317 | distribute. 318 | 319 | 7. You may place library facilities that are a work based on the 320 | Library side-by-side in a single library together with other library 321 | facilities not covered by this License, and distribute such a combined 322 | library, provided that the separate distribution of the work based on 323 | the Library and of the other library facilities is otherwise 324 | permitted, and provided that you do these two things: 325 | 326 | a) Accompany the combined library with a copy of the same work 327 | based on the Library, uncombined with any other library 328 | facilities. This must be distributed under the terms of the 329 | Sections above. 330 | 331 | b) Give prominent notice with the combined library of the fact 332 | that part of it is a work based on the Library, and explaining 333 | where to find the accompanying uncombined form of the same work. 334 | 335 | 8. You may not copy, modify, sublicense, link with, or distribute 336 | the Library except as expressly provided under this License. Any 337 | attempt otherwise to copy, modify, sublicense, link with, or 338 | distribute the Library is void, and will automatically terminate your 339 | rights under this License. However, parties who have received copies, 340 | or rights, from you under this License will not have their licenses 341 | terminated so long as such parties remain in full compliance. 342 | 343 | 9. You are not required to accept this License, since you have not 344 | signed it. However, nothing else grants you permission to modify or 345 | distribute the Library or its derivative works. These actions are 346 | prohibited by law if you do not accept this License. Therefore, by 347 | modifying or distributing the Library (or any work based on the 348 | Library), you indicate your acceptance of this License to do so, and 349 | all its terms and conditions for copying, distributing or modifying 350 | the Library or works based on it. 351 | 352 | 10. Each time you redistribute the Library (or any work based on the 353 | Library), the recipient automatically receives a license from the 354 | original licensor to copy, distribute, link with or modify the Library 355 | subject to these terms and conditions. You may not impose any further 356 | restrictions on the recipients' exercise of the rights granted herein. 357 | You are not responsible for enforcing compliance by third parties to 358 | this License. 359 | 360 | 11. If, as a consequence of a court judgment or allegation of patent 361 | infringement or for any other reason (not limited to patent issues), 362 | conditions are imposed on you (whether by court order, agreement or 363 | otherwise) that contradict the conditions of this License, they do not 364 | excuse you from the conditions of this License. If you cannot 365 | distribute so as to satisfy simultaneously your obligations under this 366 | License and any other pertinent obligations, then as a consequence you 367 | may not distribute the Library at all. For example, if a patent 368 | license would not permit royalty-free redistribution of the Library by 369 | all those who receive copies directly or indirectly through you, then 370 | the only way you could satisfy both it and this License would be to 371 | refrain entirely from distribution of the Library. 372 | 373 | If any portion of this section is held invalid or unenforceable under any 374 | particular circumstance, the balance of the section is intended to apply, 375 | and the section as a whole is intended to apply in other circumstances. 376 | 377 | It is not the purpose of this section to induce you to infringe any 378 | patents or other property right claims or to contest validity of any 379 | such claims; this section has the sole purpose of protecting the 380 | integrity of the free software distribution system which is 381 | implemented by public license practices. Many people have made 382 | generous contributions to the wide range of software distributed 383 | through that system in reliance on consistent application of that 384 | system; it is up to the author/donor to decide if he or she is willing 385 | to distribute software through any other system and a licensee cannot 386 | impose that choice. 387 | 388 | This section is intended to make thoroughly clear what is believed to 389 | be a consequence of the rest of this License. 390 | 391 | 12. If the distribution and/or use of the Library is restricted in 392 | certain countries either by patents or by copyrighted interfaces, the 393 | original copyright holder who places the Library under this License may add 394 | an explicit geographical distribution limitation excluding those countries, 395 | so that distribution is permitted only in or among countries not thus 396 | excluded. In such case, this License incorporates the limitation as if 397 | written in the body of this License. 398 | 399 | 13. The Free Software Foundation may publish revised and/or new 400 | versions of the Library General Public License from time to time. 401 | Such new versions will be similar in spirit to the present version, 402 | but may differ in detail to address new problems or concerns. 403 | 404 | Each version is given a distinguishing version number. If the Library 405 | specifies a version number of this License which applies to it and 406 | "any later version", you have the option of following the terms and 407 | conditions either of that version or of any later version published by 408 | the Free Software Foundation. If the Library does not specify a 409 | license version number, you may choose any version ever published by 410 | the Free Software Foundation. 411 | 412 | 14. If you wish to incorporate parts of the Library into other free 413 | programs whose distribution conditions are incompatible with these, 414 | write to the author to ask for permission. For software which is 415 | copyrighted by the Free Software Foundation, write to the Free 416 | Software Foundation; we sometimes make exceptions for this. Our 417 | decision will be guided by the two goals of preserving the free status 418 | of all derivatives of our free software and of promoting the sharing 419 | and reuse of software generally. 420 | 421 | NO WARRANTY 422 | 423 | 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO 424 | WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. 425 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR 426 | OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY 427 | KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE 428 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 429 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE 430 | LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME 431 | THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 432 | 433 | 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN 434 | WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY 435 | AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU 436 | FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR 437 | CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE 438 | LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING 439 | RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A 440 | FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF 441 | SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 442 | DAMAGES. 443 | 444 | END OF TERMS AND CONDITIONS 445 | 446 | How to Apply These Terms to Your New Libraries 447 | 448 | If you develop a new library, and you want it to be of the greatest 449 | possible use to the public, we recommend making it free software that 450 | everyone can redistribute and change. You can do so by permitting 451 | redistribution under these terms (or, alternatively, under the terms of the 452 | ordinary General Public License). 453 | 454 | To apply these terms, attach the following notices to the library. It is 455 | safest to attach them to the start of each source file to most effectively 456 | convey the exclusion of warranty; and each file should have at least the 457 | "copyright" line and a pointer to where the full notice is found. 458 | 459 | 460 | Copyright (C) 461 | 462 | This library is free software; you can redistribute it and/or 463 | modify it under the terms of the GNU Library General Public 464 | License as published by the Free Software Foundation; either 465 | version 2 of the License, or (at your option) any later version. 466 | 467 | This library is distributed in the hope that it will be useful, 468 | but WITHOUT ANY WARRANTY; without even the implied warranty of 469 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 470 | Library General Public License for more details. 471 | 472 | You should have received a copy of the GNU Library General Public 473 | License along with this library; if not, write to the Free 474 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 475 | 476 | Also add information on how to contact you by electronic and paper mail. 477 | 478 | You should also get your employer (if you work as a programmer) or your 479 | school, if any, to sign a "copyright disclaimer" for the library, if 480 | necessary. Here is a sample; alter the names: 481 | 482 | Yoyodyne, Inc., hereby disclaims all copyright interest in the 483 | library `Frob' (a library for tweaking knobs) written by James Random Hacker. 484 | 485 | , 1 April 1990 486 | Ty Coon, President of Vice 487 | 488 | That's all there is to it! 489 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #### What is it 2 | 3 | A (not very efficient) Erlang implementation of the 4 | _O(ND) differnence algorithm_ by Eugene W. Myers 5 | see link: https://www.google.com/search?q=eugene+myers+difference+algorithm+filetype:pdf 6 | 7 | #### Example 8 |

 9 | 1> tdiff:diff("A cat ate my hat", "A dog ate my shoe").
10 | [{eq,"A "},
11 |  {del,"cat"},
12 |  {ins,"dog"},
13 |  {eq," ate my "},
14 |  {ins,"s"},
15 |  {eq,"h"},
16 |  {del,"at"},
17 |  {ins,"oe"}]
18 | 
19 | 20 | There is also a debugging engine that generates a series of svg files, 21 | for visualizing the progress of the diff algorithm: 22 | 23 |

24 | 2> tdiff_debug:svg_diff("A cat ate my hat", "A dog ate my shoe",
25 |                         "/tmp/tdiff-trace.svg").
26 | 
27 | unixprompt% firefox /tmp/diff-trace.svg
28 | 
29 | 30 | The resulting svg works with at least Firefox and Chromium/Google Chrome. 31 | 32 | #### On the todo 33 | 34 | The algorithm currently searches only from the beginning to the 35 | end. It does not search in both directions, so it ends up 36 | searching very many diagonals, most often in vain, so there is room for 37 | memory and performance improvements. 38 | 39 | Currently, the algorithm only always takes the tail of the list, so it 40 | could be possible to use a lazy (memoizing) list. 41 | 42 | #### References 43 | 44 | Much good info about diff, match and patch can found at 45 | link: http://neil.fraser.name/writing/diff/ 46 | -------------------------------------------------------------------------------- /example/tdiff-trace-a-cat-ate-my-hat.svg: -------------------------------------------------------------------------------- 1 | 2 | 7 | 15 | 65 | 66 | 74 | 82 | 83 | 84 | 86 | 88 | 90 | 92 | 94 | 96 | 98 | 100 | 102 | 104 | 106 | 108 | 110 | 112 | 114 | 116 | 118 | 120 | 122 | 124 | 126 | 128 | 130 | 132 | 134 | 136 | 138 | 140 | 142 | 144 | 146 | 148 | 150 | 151 | A 153 | 155 | c 157 | a 159 | t 161 | 163 | a 165 | t 167 | e 169 | 171 | m 173 | y 175 | 177 | h 179 | a 181 | t 183 | A 185 | 187 | d 189 | o 191 | g 193 | 195 | a 197 | t 199 | e 201 | 203 | m 205 | y 207 | 209 | s 211 | h 213 | o 215 | e 217 | 218 | 219 | 0 222 | 223 | 224 | 227 | 229 | 230 | 231 | 1 234 | 235 | 236 | 239 | 241 | 244 | 246 | 247 | 248 | 2 251 | 252 | 253 | 256 | 258 | 261 | 263 | 266 | 268 | 269 | 270 | 3 273 | 274 | 275 | 278 | 280 | 283 | 285 | 288 | 290 | 293 | 295 | 296 | 297 | 4 300 | 301 | 302 | 305 | 307 | 310 | 312 | 315 | 317 | 320 | 322 | 325 | 327 | 328 | 329 | 5 332 | 333 | 334 | 337 | 339 | 342 | 344 | 347 | 349 | 352 | 354 | 357 | 359 | 362 | 364 | 365 | 366 | 6 369 | 370 | 371 | 374 | 376 | 379 | 381 | 384 | 386 | 389 | 391 | 394 | 396 | 399 | 401 | 404 | 406 | 407 | 408 | 7 411 | 412 | 413 | 416 | 418 | 421 | 423 | 426 | 428 | 431 | 433 | 436 | 438 | 441 | 443 | 446 | 448 | 451 | 453 | 454 | 455 | 8 458 | 459 | 460 | 463 | 465 | 468 | 470 | 473 | 475 | 478 | 480 | 483 | 485 | 488 | 490 | 493 | 495 | 498 | 500 | 503 | 505 | 506 | 507 | 9 510 | 511 | 512 | 515 | 517 | 520 | 522 | 525 | 527 | 530 | 532 | 535 | 537 | 540 | 542 | 545 | 547 | 550 | 552 | 555 | 557 | 560 | 562 | 563 | 564 | 10 567 | 568 | 569 | 572 | 574 | 577 | 579 | 582 | 584 | 587 | 589 | 592 | 594 | 597 | 599 | 602 | 604 | 607 | 609 | 612 | 614 | 617 | 619 | 622 | 624 | 625 | 626 | 11 629 | 630 | 631 | 634 | 636 | 639 | 641 | 644 | 646 | 649 | 651 | 654 | 656 | 659 | 661 | 664 | 666 | 669 | 670 | 671 | -------------------------------------------------------------------------------- /rebar.config.script: -------------------------------------------------------------------------------- 1 | %% -*- erlang -*- 2 | 3 | %% In Erlang 19, the random module is deprecated 4 | NoHaveRandOpts = try rand:uniform() of 5 | F when is_float(F) -> [] 6 | catch error:undef -> [{d,'NO_HAVE_RAND',true}] 7 | end. 8 | 9 | [{require_otp_vsn, ".*"}, 10 | 11 | %% Erlang compiler options 12 | {erl_opts, [debug_info] ++ NoHaveRandOpts} 13 | ]. 14 | 15 | -------------------------------------------------------------------------------- /src/tdiff.app.src: -------------------------------------------------------------------------------- 1 | %% -*- erlang -*- 2 | 3 | %%% Copyright (C) 2010-2011 Tomas Abrahamsson 4 | %%% 5 | %%% Author: Tomas Abrahamsson 6 | %%% 7 | %%% This library is free software; you can redistribute it and/or 8 | %%% modify it under the terms of the GNU Library General Public 9 | %%% License as published by the Free Software Foundation; either 10 | %%% version 2 of the License, or (at your option) any later version. 11 | %%% 12 | %%% This library is distributed in the hope that it will be useful, 13 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 | %%% Library General Public License for more details. 16 | %%% 17 | %%% You should have received a copy of the GNU Library General Public 18 | %%% License along with this library; if not, write to the Free 19 | %%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 | 21 | {application, tdiff, 22 | [{description, "Difference library"}, 23 | {vsn, "0.1.2"}, 24 | {maintainers, ["Tomas Abrahamsson"]}, 25 | {licenses, ["LGPL 2"]}, 26 | {links,[{"GitHub","https://github.com/tomas-abrahamsson/tdiff"}]}, 27 | {applications, [kernel, stdlib]}, 28 | {modules, []}, 29 | {env, []}]}. 30 | -------------------------------------------------------------------------------- /src/tdiff.erl: -------------------------------------------------------------------------------- 1 | %%% A (simple) diff 2 | 3 | %%% Copyright (C) 2011 Tomas Abrahamsson 4 | %%% 5 | %%% Author: Tomas Abrahamsson 6 | %%% 7 | %%% This library is free software; you can redistribute it and/or 8 | %%% modify it under the terms of the GNU Library General Public 9 | %%% License as published by the Free Software Foundation; either 10 | %%% version 2 of the License, or (at your option) any later version. 11 | %%% 12 | %%% This library is distributed in the hope that it will be useful, 13 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 | %%% Library General Public License for more details. 16 | %%% 17 | %%% You should have received a copy of the GNU Library General Public 18 | %%% License along with this library; if not, write to the Free 19 | %%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 | %%% 21 | 22 | -module(tdiff). 23 | -export([diff/2, diff/3, patch/2]). 24 | -export([diff_files/2, diff_files/3]). 25 | -export([diff_binaries/2, diff_binaries/3, patch_binaries/2]). 26 | -export([format_diff_lines/1]). 27 | -export([print_diff_lines/1]). 28 | 29 | -type filename() :: string(). 30 | -type options() :: [option()]. 31 | -type option() :: {algorithm_tracer, no_tracer | algorithm_tracer()}. 32 | -type algorithm_tracer() :: fun(({d, d()} | 33 | {dpath, dpath()} | 34 | {exhausted_kdiagonals, d()} | 35 | {final_edit_script, edit_script()}) -> _). 36 | 37 | -type d() :: integer(). %% The diagonal number, offset in number of steps from 38 | %% the diagonal through (0,0). 39 | -type dpath() :: dpath(term()). 40 | -type dpath(Elem) :: {X::index(), Y::index(), 41 | SX::[Elem]|oob, SY::[Elem]|oob, 42 | [Elem]}.%% The X and Y are indices along x and y. 43 | %% The SX and SY are accumulated old/new strings 44 | %% The last is a list of elements in reverse 45 | %% order. 46 | -type index() :: non_neg_integer(). 47 | -type edit_script() :: edit_script(term()). 48 | -type edit_script(Elem) :: [{eq, [Elem]} | {ins, [Elem]} | {del, [Elem]}]. 49 | 50 | -export_type([options/0, option/0]). 51 | -export_type([edit_script/0, edit_script/1]). 52 | -export_type([algorithm_tracer/0, d/0, dpath/0, dpath/1, index/0]). 53 | 54 | %% @equiv diff_files(F1, F2, []) 55 | -spec diff_files(filename(), filename()) -> edit_script(Line::string()). 56 | diff_files(F1, F2) -> diff_files(F1, F2, _Opts=[]). 57 | 58 | %% @doc Read the two files into memory, split to lists of lines 59 | %% and compute the edit-script (or diff) for these. 60 | %% The result is a diff for a list of lines/strings. 61 | -spec diff_files(filename(), filename(), options()) -> edit_script(Line) when 62 | Line :: string(). 63 | diff_files(F1, F2, Opts) -> 64 | {ok,B1} = file:read_file(F1), 65 | {ok,B2} = file:read_file(F2), 66 | diff_binaries(B1, B2, Opts). 67 | 68 | %% @equiv diff_binaries(B1, B2, []) 69 | diff_binaries(B1, B2) -> diff_binaries(B1, B2, _Opts=[]). 70 | 71 | %% @doc Split the two binaries into lists of lines (lists of strings), 72 | %% and compute the edit-script (or diff) for these. 73 | %% The result is a diff for a list of lines/strings, 74 | %% not for a list of binaries. 75 | -spec diff_binaries(binary(), binary(), options()) -> edit_script(Line) when 76 | Line :: string(). 77 | diff_binaries(B1, B2, Opts) -> 78 | diff(split_bin_to_lines(B1), split_bin_to_lines(B2), Opts). 79 | 80 | 81 | split_bin_to_lines(B) -> sbtl(binary_to_list(B), "", []). 82 | 83 | sbtl("\n" ++ Rest, L, Acc) -> sbtl(Rest, "", [lists:reverse("\n"++L) | Acc]); 84 | sbtl([C|Rest], L, Acc) -> sbtl(Rest, [C|L], Acc); 85 | sbtl("", "", Acc) -> lists:reverse(Acc); 86 | sbtl("", L, Acc) -> lists:reverse([lists:reverse(L) | Acc]). 87 | 88 | 89 | %% @doc Print an edit-script, or diff. See {@link format_diff_lines/1} 90 | %% for info on the output. 91 | -spec print_diff_lines(edit_script(char())) -> _. 92 | print_diff_lines(Diff) -> io:format("~s~n", [format_diff_lines(Diff)]). 93 | 94 | %% @doc Format an edit-script or diff of lines to text, so that it looks like 95 | %% a diff. The result will look like this if printed: 96 | %%
 new line 1
102 | %%    678
103 | %%    > new line 2
104 | %% ]]>
105 | -spec format_diff_lines(edit_script(char())) -> iodata(). 106 | format_diff_lines(Diff) -> fdl(Diff, 1,1). 107 | 108 | fdl([{del,Ls1},{ins,Ls2}|T], X, Y) -> 109 | Addr = io_lib:format("~sc~s~n", [fmt_addr(X,Ls1), fmt_addr(Y, Ls2)]), 110 | Del = format_lines("< ", Ls1), 111 | Sep = io_lib:format("---~n", []), 112 | Ins = format_lines("> ", Ls2), 113 | [Addr, Del, Sep, Ins | fdl(T, X+length(Ls1), Y+length(Ls2))]; 114 | fdl([{del,Ls}|T], X, Y) -> 115 | Addr = io_lib:format("~w,~wd~w~n", [X,X+length(Ls), Y]), 116 | Del = format_lines("< ", Ls), 117 | [Addr, Del | fdl(T, X+length(Ls), Y)]; 118 | fdl([{ins,Ls}|T], X, Y) -> 119 | Addr = io_lib:format("~wa~w,~w~n", [X,Y,Y+length(Ls)]), 120 | Ins = format_lines("> ", Ls), 121 | [Addr, Ins | fdl(T, X, Y+length(Ls))]; 122 | fdl([{eq,Ls}|T], X, Y) -> 123 | fdl(T, X+length(Ls), Y+length(Ls)); 124 | fdl([], _X, _Y) -> 125 | []. 126 | 127 | fmt_addr(N, Ls) when length(Ls) == 1 -> f("~w", [N]); 128 | fmt_addr(N, Ls) -> f("~w,~w", [N,N+length(Ls)-1]). 129 | 130 | f(F,A) -> lists:flatten(io_lib:format(F,A)). 131 | 132 | format_lines(Indicator, Lines) -> 133 | lists:map(fun(Line) -> io_lib:format("~s~s", [Indicator, Line]) end, 134 | Lines). 135 | 136 | %% Algorithm: "An O(ND) Difference Algorithm and Its Variations" 137 | %% by E. Myers, 1986. 138 | %% 139 | %% Some good info can also be found at http://neil.fraser.name/writing/diff/ 140 | %% 141 | %% General principle of the algorithm: 142 | %% 143 | %% We are about to produce a diff (or editscript) on what differs (or 144 | %% how to get from) string Sx to Sy. We lay out a grid with the 145 | %% symbols from Sx on the x-axis and the symbols from Sy on the Y 146 | %% axis. The first symbol of Sx and Sy is at (0,0). 147 | %% 148 | %% (The Sx and Sy are strings of symbols: lists of lines or lists of 149 | %% characters, or lists of works, or whatever is suitable.) 150 | %% 151 | %% Example: Sx="aXcccXe", Sy="aYcccYe" ==> the following grid is formed: 152 | %% 153 | %% Sx 154 | %% aXcccXe 155 | %% Sy a\ 156 | %% Y 157 | %% c \\\ 158 | %% c \\\ 159 | %% c \\\ 160 | %% Y 161 | %% e \ 162 | %% 163 | %% Our plan now is go from corner to corner: from (0,0) to (7,7). 164 | %% We can move diagonally whenever the character on the x-axis and the 165 | %% character on the y-axis are identical. Those are symbolized by the 166 | %% \-edges in the grid above. 167 | %% 168 | %% When it is not possible to go diagonally (because the characters on 169 | %% the x- and y-axis are not identical), we have to go horizontally 170 | %% and vertically. This corresponds to deleting characters from Sx and 171 | %% inserting characters from Sy. 172 | %% 173 | %% Definitions (from the "O(ND) ..." paper by E.Myers): 174 | %% 175 | %% * A D-path is a path with D non-diagonal edges (ie: edges that are 176 | %% vertical and/or horizontal). 177 | %% * K-diagonal: the diagonal such that K=X-Y 178 | %% (Thus, the 0-diagonal is the one starting at (0,0), going 179 | %% straight down-right. The 1-diagonal is the one just to the right of 180 | %% the 0-diagonal: starting at (1,0) going straight down-right. 181 | %% There are negative diagonals as well: the -1-diagonal is the one starting 182 | %% at (0,1), and so on. 183 | %% * Snake: a sequence of only-diagonal steps 184 | %% 185 | %% The algorithm loops over D and over the K-diagonals: 186 | %% D = 0..(length(Sx)+length(Sy)) 187 | %% K = -D..D in steps of 2 188 | %% For every such K-diagonal, we choose between the (D-1)-paths 189 | %% whose end-points are currently on the adjacent (K-1)- and 190 | %% (K+1)-diagonals: we pick the one that have gone furthest along 191 | %% its diagonal. 192 | %% 193 | %% This means taking that (D-1)-path and going right (if 194 | %% we pick the (D-1)-path on the (K-1)-diagonal) or down (if we 195 | %% pick the (D-1)-path on the (K+1)-diagonal), thus forming a 196 | %% D-path from a (D-1)-path. 197 | %% 198 | %% After this, we try to extend the snake as far as possible along 199 | %% the K-diagonal. 200 | %% 201 | %% Note that this means that when we choose between the 202 | %% (D-1)-paths along the (K-1)- and (K+1)-diagonals, we choose 203 | %% between two paths, whose snakes have been extended as far as 204 | %% possible, ie: they are at a point where the characters Sx and 205 | %% Sy don't match. 206 | %% 207 | %% Note that with this algorithm, we always do comparions further 208 | %% right into the strings Sx and Sy. The algorithm never goes towards 209 | %% the beginning of either Sx or Sy do do further comparisons. This is 210 | %% good, because this fits the way lists are built in functional 211 | %% programming languages. 212 | 213 | %% @equiv diff(Sx, Sy, []) 214 | -spec diff(Old::[Elem], New::[Elem]) -> edit_script(Elem) when Elem::term(). 215 | diff(Sx, Sy) -> diff(Sx, Sy, _Opts=[]). 216 | 217 | %% @doc Compute an edit-script between two sequences of elements, 218 | %% such as two strings, lists of lines, or lists of elements more generally. 219 | %% The result is a list of operations add/del/eq that can transform 220 | %% `Old' to `New' 221 | %% 222 | %% The algorithm is "An O(ND) Difference Algorithm and Its Variations" 223 | %% by E. Myers, 1986. 224 | %% 225 | %% Note: This implementation currently searches only forwards. For 226 | %% large inputs (such as thousands of elements) that differ very much, 227 | %% this implementation will take unnecessarily long time, and may not 228 | %% complete within reasonable time. 229 | %% 230 | %% @end 231 | %% Todo for optimization to handle large inputs (see the paper for details) 232 | %% * Search from both ends as described in the paper. 233 | %% When passing half of distance, search from the end (reversing 234 | %% the strings). Stop again at half. If snakes don't meet, 235 | %% pick the best (or all?) snakes from both ends, search 236 | %% recursively from both ends within this space. 237 | %% * Keep track of visited coordinates. 238 | %% If already visited, consider the snake/diagonal dead and don't follow it. 239 | 240 | -spec diff(Old::[Elem], New::[Elem], options()) -> edit_script(Elem) when 241 | Elem::term(). 242 | diff(Sx, Sy, Opts) -> 243 | SxLen = length(Sx), 244 | SyLen = length(Sy), 245 | DMax = SxLen + SyLen, 246 | Tracer = proplists:get_value(algorithm_tracer, Opts, no_tracer), 247 | EditScript = case try_dpaths(0, DMax, [{0, 0, Sx, Sy, []}], Tracer) of 248 | no -> [{del,Sx},{ins,Sy}]; 249 | {ed,EditOpsR} -> edit_ops_to_edit_script(EditOpsR) 250 | end, 251 | t_final_script(Tracer, EditScript), 252 | EditScript. 253 | 254 | 255 | try_dpaths(D, DMax, D1Paths, Tracer) when D =< DMax -> 256 | t_d(Tracer, D), 257 | case try_kdiagonals(-D, D, D1Paths, [], Tracer) of 258 | {ed, E} -> {ed, E}; 259 | {dpaths, DPaths} -> try_dpaths(D+1, DMax, DPaths, Tracer) 260 | end; 261 | try_dpaths(_, _DMax, _DPaths, _Tracer) -> 262 | no. 263 | 264 | try_kdiagonals(K, D, D1Paths, DPaths, Tracer) when K =< D -> 265 | DPath = if D == 0 -> hd(D1Paths); 266 | true -> pick_best_dpath(K, D, D1Paths) 267 | end, 268 | case follow_snake(DPath) of 269 | {ed, E} -> 270 | {ed, E}; 271 | {dpath, DPath2} when K =/= -D -> 272 | t_dpath(Tracer, DPath2), 273 | try_kdiagonals(K+2, D, tl(D1Paths), [DPath2 | DPaths], Tracer); 274 | {dpath, DPath2} when K =:= -D -> 275 | t_dpath(Tracer, DPath2), 276 | try_kdiagonals(K+2, D, D1Paths, [DPath2 | DPaths], Tracer) 277 | end; 278 | try_kdiagonals(_, D, _, DPaths, Tracer) -> 279 | t_exhausted_kdiagonals(Tracer, D), 280 | {dpaths, lists:reverse(DPaths)}. 281 | 282 | follow_snake({X, Y, [H|Tx], [H|Ty], Cs}) -> follow_snake({X+1,Y+1, Tx,Ty, 283 | [{e,H} | Cs]}); 284 | follow_snake({_X,_Y,[], [], Cs}) -> {ed, Cs}; 285 | follow_snake({X, Y, [], Sy, Cs}) -> {dpath, {X, Y, [], Sy, Cs}}; 286 | follow_snake({X, Y, oob, Sy, Cs}) -> {dpath, {X, Y, oob, Sy, Cs}}; 287 | follow_snake({X, Y, Sx, [], Cs}) -> {dpath, {X, Y, Sx, [], Cs}}; 288 | follow_snake({X, Y, Sx, oob, Cs}) -> {dpath, {X, Y, Sx, oob, Cs}}; 289 | follow_snake({X, Y, Sx, Sy, Cs}) -> {dpath, {X, Y, Sx, Sy, Cs}}. 290 | 291 | pick_best_dpath(K, D, DPs) -> pbd(K, D, DPs). 292 | 293 | pbd( K, D, [DP|_]) when K==-D -> go_inc_y(DP); 294 | pbd( K, D, [DP]) when K==D -> go_inc_x(DP); 295 | pbd(_K,_D, [DP1,DP2|_]) -> pbd2(DP1,DP2). 296 | 297 | pbd2({_,Y1,_,_,_}=DP1, {_,Y2,_,_,_}) when Y1 > Y2 -> go_inc_x(DP1); 298 | pbd2(_DP1 , DP2) -> go_inc_y(DP2). 299 | 300 | go_inc_y({X, Y, [H|Tx], Sy, Cs}) -> {X, Y+1, Tx, Sy, [{y,H}|Cs]}; 301 | go_inc_y({X, Y, [], Sy, Cs}) -> {X, Y+1, oob, Sy, Cs}; 302 | go_inc_y({X, Y, oob, Sy, Cs}) -> {X, Y+1, oob, Sy, Cs}. 303 | 304 | go_inc_x({X, Y, Sx, [H|Ty], Cs}) -> {X+1, Y, Sx, Ty, [{x,H}|Cs]}; 305 | go_inc_x({X, Y, Sx, [], Cs}) -> {X+1, Y, Sx, oob, Cs}; 306 | go_inc_x({X, Y, Sx, oob, Cs}) -> {X+1, Y, Sx, oob, Cs}. 307 | 308 | 309 | edit_ops_to_edit_script(EditOps) -> e2e(EditOps, _Acc=[]). 310 | 311 | e2e([{x,C}|T], [{ins,R}|Acc]) -> e2e(T, [{ins,[C|R]}|Acc]); 312 | e2e([{y,C}|T], [{del,R}|Acc]) -> e2e(T, [{del,[C|R]}|Acc]); 313 | e2e([{e,C}|T], [{eq,R}|Acc]) -> e2e(T, [{eq, [C|R]}|Acc]); 314 | e2e([{x,C}|T], Acc) -> e2e(T, [{ins,[C]}|Acc]); 315 | e2e([{y,C}|T], Acc) -> e2e(T, [{del,[C]}|Acc]); 316 | e2e([{e,C}|T], Acc) -> e2e(T, [{eq, [C]}|Acc]); 317 | e2e([], Acc) -> Acc. 318 | 319 | %% @doc Apply a patch, in the form of an edit-script, to a string or 320 | %% list of lines (or list of elements more generally) 321 | -spec patch([Elem], edit_script(Elem)) -> [Elem] when Elem::term(). 322 | patch(S, Diff) -> p2(S, Diff, []). 323 | 324 | %% @doc Apply a patch to a binary. The binary is first split to list 325 | %% of lines (list of strings), and the edit-script is expected to be 326 | %% for lists of strings/lines. The result is a list of strings. 327 | -spec patch_binaries(binary(), edit_script(Line)) -> [Line] when 328 | Line::string(). 329 | patch_binaries(B, Diff) -> 330 | patch(split_bin_to_lines(B), Diff). 331 | 332 | p2(S, [{eq,T}|Rest], Acc) -> p2_eq(S, T, Rest, Acc); 333 | p2(S, [{ins,T}|Rest], Acc) -> p2_ins(S, T, Rest, Acc); 334 | p2(S, [{del,T}|Rest], Acc) -> p2_del(S, T, Rest, Acc); 335 | p2([],[], Acc) -> lists:reverse(Acc). 336 | 337 | p2_eq([H|S], [H|T], Rest, Acc) -> p2_eq(S, T, Rest, [H|Acc]); 338 | p2_eq(S, [], Rest, Acc) -> p2(S, Rest, Acc). 339 | 340 | p2_ins(S, [H|T], Rest, Acc) -> p2_ins(S, T, Rest, [H|Acc]); 341 | p2_ins(S, [], Rest, Acc) -> p2(S, Rest, Acc). 342 | 343 | p2_del([H|S], [H|T], Rest, Acc) -> p2_del(S, T, Rest, Acc); 344 | p2_del(S, [], Rest, Acc) -> p2(S, Rest, Acc). 345 | 346 | 347 | t_final_script(no_tracer, _) -> ok; 348 | t_final_script(Tracer, EditScript) -> Tracer({final_edit_script, EditScript}). 349 | 350 | t_d(no_tracer, _) -> ok; 351 | t_d(Tracer, D) -> Tracer({d,D}). 352 | 353 | t_dpath(no_tracer, _) -> ok; 354 | t_dpath(Tracer, DPath) -> Tracer({dpath,DPath}). 355 | 356 | t_exhausted_kdiagonals(no_tracer, _) -> ok; 357 | t_exhausted_kdiagonals(Tracer, D) -> Tracer({exhausted_kdiagonals, D}). 358 | -------------------------------------------------------------------------------- /src/tdiff_benchmark.erl: -------------------------------------------------------------------------------- 1 | %%% Copyright (C) 2010-2011 Tomas Abrahamsson 2 | %%% 3 | %%% Author: Tomas Abrahamsson 4 | %%% 5 | %%% This library is free software; you can redistribute it and/or 6 | %%% modify it under the terms of the GNU Library General Public 7 | %%% License as published by the Free Software Foundation; either 8 | %%% version 2 of the License, or (at your option) any later version. 9 | %%% 10 | %%% This library is distributed in the hope that it will be useful, 11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | %%% Library General Public License for more details. 14 | %%% 15 | %%% You should have received a copy of the GNU Library General Public 16 | %%% License along with this library; if not, write to the Free 17 | %%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 | 19 | -module(tdiff_benchmark). 20 | -import(lists, [map/2, foreach/2]). 21 | -export([start/0, start/1]). 22 | %%-compile(export_all). 23 | 24 | -record(telem, {sz :: integer(), 25 | variation :: float(), %% 0..1 26 | s1 :: string(), 27 | s2 :: string()}). 28 | 29 | 30 | start() -> 31 | start(_Opts=[]). 32 | 33 | start(Opts) -> 34 | rand_seed(Opts), 35 | Lengths = map(fun power_of_ten/1, lists:seq(1,3)), 36 | OrigStrs = map(fun create_string_of_len/1, Lengths), 37 | Changes = lists:append( 38 | map(fun(OrigStr) -> 39 | lists:append( 40 | map(fun(V) -> 41 | map(fun(_) -> 42 | create_test_elem( 43 | OrigStr, V) 44 | end, 45 | lists:seq(1,_NumPairs=3)) 46 | end, 47 | _Variations=[0.05, 0.25, 0.50, 0.75])) 48 | end, 49 | OrigStrs)), 50 | 51 | run_tdiff_bm(Changes). 52 | 53 | 54 | power_of_ten(0) -> 1; 55 | power_of_ten(N) -> 10 * power_of_ten(N-1). 56 | 57 | create_string_of_len(N) -> lists:duplicate(N, $x). 58 | 59 | create_test_elem(Str, DegreeOfVariation) -> 60 | S1 = create_variation(Str, DegreeOfVariation), 61 | S2 = create_variation(Str, DegreeOfVariation), 62 | #telem{sz = length(Str), 63 | variation = DegreeOfVariation, 64 | s1 = S1, 65 | s2 = S2}. 66 | 67 | create_variation("", _ChangePercent) -> 68 | ""; 69 | create_variation(Str, ChangePercent) -> 70 | NumChangesPerStr = length(Str) * ChangePercent, 71 | VariationChancePerChar = NumChangesPerStr / length(Str), 72 | c_v_2(Str, VariationChancePerChar). 73 | 74 | c_v_2([C|T]=Str, VariationChancePerChar) -> 75 | DiceRoll = rand_uniform(), 76 | if DiceRoll < VariationChancePerChar -> 77 | case get_random_change() of 78 | delete -> c_v_2(T, VariationChancePerChar); 79 | {insert,C2} -> [C2 | c_v_2(Str, VariationChancePerChar)]; 80 | {change,C2} -> [C2 | c_v_2(T, VariationChancePerChar)] 81 | end; 82 | true -> 83 | [C | c_v_2(T, VariationChancePerChar)] 84 | end; 85 | c_v_2("", _VariationChancePerChar) -> 86 | "". 87 | 88 | get_random_change() -> 89 | case rand_uniform(3) of 90 | 1 -> delete; 91 | 2 -> {insert, get_random_char()}; 92 | 3 -> {change, get_random_char()} 93 | end. 94 | 95 | get_random_char() -> $a + rand_uniform(26) - 1. 96 | 97 | 98 | run_tdiff_bm(TestElems) -> 99 | NumRuns = 10, 100 | print_results( 101 | lists:sort( 102 | dict:to_list( 103 | lists:foldl(fun(#telem{sz=Size,variation=V,s1=S1,s2=S2}, D) -> 104 | {US,GC} = run_num_times(NumRuns, 105 | fun tdiff:diff/2, 106 | [S1,S2]), 107 | Key = {Size,V}, 108 | {AccTime, AccGC, N} = case dict:find(Key, D) of 109 | {ok,X} -> X; 110 | error -> {0,0,0} 111 | end, 112 | NewValue = {AccTime+US, AccGC+GC, N+NumRuns}, 113 | dict:store(Key, NewValue, D) 114 | end, 115 | dict:new(), 116 | TestElems)))). 117 | 118 | print_results(Results) -> 119 | io:format(" size variation avg time avg garbage~n"), 120 | WordSize = erlang:system_info(wordsize), 121 | foreach( 122 | fun({{Size, Variation}, {US,GC,N}}) -> 123 | if GC < 0 -> io:format("*** GC counter wrapped!~n" 124 | "*** Please rerun the test!~n"); 125 | true -> ok 126 | end, 127 | io:format("~6w ~.2f ~10s ~15s~n", 128 | [Size, Variation, 129 | micros_to_pretty_time(round(US/N)), 130 | bytes_to_pretty_size(round(GC*WordSize/N))]) 131 | end, 132 | Results), 133 | Results. 134 | 135 | micros_to_pretty_time(US) when US >= 1000*1000 -> 136 | f("~.2f s", [US/(1000*1000)]); 137 | micros_to_pretty_time(US) when US >= 1000 -> 138 | f("~.2f ms", [US/1000]); 139 | micros_to_pretty_time(US) -> 140 | f("~w us", [US]). 141 | 142 | bytes_to_pretty_size(N) when N >= 1024*1024 -> 143 | f("~.2f MB", [N / (1024*1024)]); 144 | bytes_to_pretty_size(N) when N >= 1024 -> 145 | f("~.2f kB", [N / 1024]); 146 | bytes_to_pretty_size(N) -> 147 | f("~w bytes", [N]). 148 | 149 | f(F,A) -> lists:flatten(io_lib:format(F,A)). 150 | 151 | run_num_times(N, Fun, Args) -> 152 | run_num_times_2(N, Fun, Args, {0,0}). 153 | 154 | run_num_times_2(N, Fun, Args, {AccTime, AccGC}) when N > 0 -> 155 | {US, GC} = run_once(Fun,Args), 156 | run_num_times_2(N-1, Fun, Args, {AccTime+US, AccGC+GC}); 157 | run_num_times_2(0, _Fun, _Args, Acc) -> 158 | Acc. 159 | 160 | run_once(Fun, Args) -> 161 | Master = self(), 162 | P = spawn(fun() -> 163 | Ps = processes(), 164 | [garbage_collect(P) || P <- Ps], 165 | [garbage_collect(P) || P <- Ps], 166 | {_,Reclaimed0,_} = erlang:statistics(garbage_collection), 167 | {US, _Res} = timer:tc(erlang, apply, [Fun,Args]), 168 | garbage_collect(), 169 | garbage_collect(), 170 | garbage_collect(), 171 | {_,Reclaimed1,_} = erlang:statistics(garbage_collection), 172 | GC = Reclaimed1 - Reclaimed0, 173 | if GC < 0 -> 174 | io:format("Reclaimed0=~w~nReclaimed1=~w~n", 175 | [Reclaimed0, Reclaimed1]); 176 | true -> 177 | ok 178 | end, 179 | Master ! {self(), {US, GC}} 180 | end), 181 | receive 182 | {P, Data} -> Data 183 | end. 184 | 185 | 186 | -ifndef(NO_HAVE_RAND). 187 | %% Erlang 19 or later 188 | rand_uniform(Limit) -> rand:uniform(Limit). 189 | rand_uniform() -> rand:uniform(). 190 | rand_seed(Opts) -> 191 | case proplists:get_value(random_seed, Opts) of 192 | undefined -> 193 | _ = rand:uniform(); 194 | {Alg, Seed} -> 195 | rand:seed(Alg, Seed); 196 | AlgOrStateOrExpState -> 197 | rand:seed(AlgOrStateOrExpState) 198 | end. 199 | -else. 200 | %% Erlang 18 or earlier 201 | rand_uniform(Limit) -> random:uniform(Limit). 202 | rand_uniform() -> random:uniform(). 203 | rand_seed(Opts) -> 204 | Seed = proplists:get_value(random_seed, Opts, random:seed0()), 205 | set_random_seed(Seed). 206 | 207 | set_random_seed({A,B,C}) -> 208 | random:seed(A,B,C). 209 | -endif. % NO_HAVE_RAND 210 | -------------------------------------------------------------------------------- /src/tdiff_debug.erl: -------------------------------------------------------------------------------- 1 | -module(tdiff_debug). 2 | 3 | -export([svg_diff/3]). 4 | 5 | %% @doc Example: 6 | %% ``` 7 | %% svg_diff("A cat ate my hat", "A dog ate my shoe", "tdiff-trace.svg"). 8 | %% ''' 9 | %% The resulting svg file works well in Chrome and Opera. 10 | %% 11 | %% The Sx is (usually) the "old" string. 12 | %% The Sy is (usually) the "new" string. 13 | %% The blue number in the upper left is the D number. 14 | %% Taking a step to the right means deleting that character. 15 | %% Taking a step down means inserting that character. 16 | %% 17 | %% The tdiff algorithm can diff any list of objects, such as lists of 18 | %% lines, lists of words or whatever. However, this tracer is only 19 | %% good at strings (lists of characters). It might produce funny or 20 | %% strange svg files in other cases. 21 | %% 22 | svg_diff(Sx, Sy, DestFileName) -> 23 | Tracer = start_svg_tracer(Sx, Sy, DestFileName), 24 | Res = tdiff:diff(Sx, Sy, [{algorithm_tracer, fun(Ev) -> Tracer ! Ev end}]), 25 | stop_tracer(Tracer), 26 | Res. 27 | 28 | start_svg_tracer(Sx, Sy, DestFileName) -> 29 | proc_lib:spawn(fun() -> init_svg_tracer(Sx, Sy, DestFileName) end). 30 | 31 | stop_tracer(Pid) -> 32 | MRef = erlang:monitor(process, Pid), 33 | Pid ! {stop, self()}, 34 | receive 35 | {Pid, Res} -> 36 | erlang:demonitor(MRef, [flush]), 37 | Res; 38 | {'DOWN', MRef, _, _, Reason} -> 39 | erlang:error({terminated,Reason}) 40 | end. 41 | 42 | init_svg_tracer(Sx, Sy, DestFileName) -> 43 | FdInfo = open_svg_file(Sx, Sy, DestFileName), 44 | loop_svg_tracer(FdInfo, _D=unknown, Sx, Sy). 45 | 46 | loop_svg_tracer(FdInfo, D, Sx, Sy) -> 47 | receive 48 | {final_edit_script, EditScript} -> 49 | print_final_edit_script(FdInfo, EditScript), 50 | loop_svg_tracer(FdInfo, D, Sx, Sy); 51 | {exhausted_kdiagonals, _D} -> 52 | print_svg_d_closed(FdInfo), 53 | loop_svg_tracer(FdInfo, D, Sx, Sy); 54 | {d, NewD} -> 55 | print_svg_d(FdInfo, NewD), 56 | loop_svg_tracer(FdInfo, NewD, Sx, Sy); 57 | {dpath, DPath} -> 58 | print_svg_dpath(FdInfo, DPath), 59 | loop_svg_tracer(FdInfo, D, Sx, Sy); 60 | {stop, From} -> 61 | print_svg_dmax(FdInfo, D), 62 | close_svg_file(FdInfo), 63 | From ! {self(),ok} 64 | end. 65 | 66 | -record(fdinfo, {fd, xoffs, yoffs, dx, dy, 67 | pathlinestyle, endpointstyle, 68 | final_pathlinestyle, final_endpointstyle}). 69 | 70 | open_svg_file(Sx, Sy, DestFileName) -> 71 | SxLen = length(Sx), 72 | SyLen = length(Sy), 73 | DMax = SxLen + SyLen, 74 | Dx = 10, 75 | Dy = 10, 76 | XOffs = 20, 77 | YOffs = 10, 78 | {ok,Fd} = file:open(DestFileName, [write]), 79 | io:format( 80 | Fd, 81 | "\n" 82 | "\n" 87 | " \n" 95 | " \n" 145 | ,[SxLen*Dx+2*XOffs+10, SyLen*Dy+2*YOffs, DMax]), 146 | 147 | io:format(Fd, 148 | " \n", []), 149 | io:format(Fd, 150 | " \n" 158 | , [round(Dx/4),0, 159 | Dy, 160 | -round(Dx/2), -round(Dy/2)]), 161 | io:format(Fd, 162 | " \n" 170 | , [SxLen*Dx+2*XOffs-Dx,0, 171 | Dy, 172 | round(Dx/2), -round(Dy/2)]), 173 | io:format(Fd, 174 | " \n", []), 175 | 176 | BoxLineStyle = o([{class,border}]), 177 | GridLineStyle = o([{class,grid}]), 178 | PathLineStyle = o([{class,path}]), 179 | EndPtStyle = o([{class,endpt}]), 180 | FinalPathLineStyle = o([{class,fpath}]), 181 | FinalEndPtStyle = o([{class,fendpt}]), 182 | 183 | io:format(Fd, " \n", []), 184 | 185 | %% Grid lines... 186 | lists:foreach(fun(X) -> io:format(Fd, 187 | " \n" 188 | ,[X*Dx+XOffs, YOffs, SyLen*Dy, 189 | GridLineStyle]) 190 | end, 191 | lists:seq(1,SxLen)), 192 | 193 | lists:foreach(fun(Y) -> io:format(Fd, 194 | " \n" 195 | ,[XOffs, Y*Dy+YOffs, SxLen*Dx, 196 | GridLineStyle]) 197 | end, 198 | lists:seq(1,SyLen)), 199 | 200 | %% Box around 201 | io:format( 202 | Fd, 203 | " \n" 204 | ,[XOffs, YOffs, SyLen*Dy, SxLen*Dx, SyLen*Dy, BoxLineStyle]), 205 | 206 | %% Symbols for each X/Y 207 | lists:foreach( 208 | fun({X,C}) -> 209 | io:format( 210 | Fd, 211 | " ~c\n", 213 | [XOffs+Dx*X+round(Dx/2), YOffs-YOffs/4, C]) 214 | end, 215 | index0seq(Sx)), 216 | lists:foreach( 217 | fun({Y,C}) -> 218 | io:format( 219 | Fd, 220 | " ~c\n", 222 | [XOffs-Dx/2, YOffs+Dy*(Y+1)-YOffs/4, C]) 223 | end, 224 | index0seq(Sy)), 225 | 226 | io:format(Fd, " \n", []), 227 | 228 | 229 | #fdinfo{fd = Fd, 230 | xoffs = XOffs, 231 | yoffs = YOffs, 232 | dx = Dx, 233 | dy = Dy, 234 | pathlinestyle = PathLineStyle, 235 | endpointstyle = EndPtStyle, 236 | final_pathlinestyle = FinalPathLineStyle, 237 | final_endpointstyle = FinalEndPtStyle}. 238 | 239 | index0seq([]) -> []; 240 | index0seq(L) -> lists:zip(lists:seq(0,length(L)-1), L). 241 | 242 | print_svg_d(#fdinfo{fd=Fd, xoffs=XOffs,yoffs=YOffs, dx=Dx}, D) -> 243 | io:format(Fd, 244 | " \n" 245 | " ~w\n" 248 | " \n" 249 | ,[D, if D == 0 -> "visible"; true -> "hidden" end, 250 | XOffs-Dx*0.25, YOffs-YOffs/4, D]), 251 | io:format(Fd, 252 | " \n" 253 | ,[D, if D == 0 -> "visible"; true -> "hidden" end]). 254 | 255 | print_svg_d_closed(#fdinfo{fd=Fd}) -> 256 | io:format(Fd, " \n", []). 257 | 258 | 259 | print_svg_dpath(FdInfo, {_X, _Y, _Sx, _Sy, RSteps}=_DPath) -> 260 | #fdinfo{pathlinestyle = PathLineStyle, 261 | endpointstyle = EndPtStyle} = FdInfo, 262 | print_svg_dpath_2(FdInfo, RSteps, PathLineStyle, EndPtStyle). 263 | 264 | print_svg_dpath_2(FdInfo, RSteps, PathLineStyle, EndPtStyle) -> 265 | #fdinfo{fd = Fd, 266 | xoffs = XOffs, 267 | yoffs = YOffs, 268 | dx = Dx, 269 | dy = Dy} = FdInfo, 270 | 271 | io:format(Fd, " 274 | %% X,Y seems to be reversed below [1] 275 | %% but they are not. This is because of 276 | %% how x,y are defined (removing a char 277 | %% from Sx inserting a char from Sy, 278 | %% respectively). 279 | {X1,Y1} = if Dir == e -> {1,1}; 280 | Dir == x -> {0,1}; %% [1] here... 281 | Dir == y -> {1,0} %% ... and here 282 | end, 283 | io:format(Fd, " l ~w,~w", [X1*Dx, Y1*Dx]), 284 | {X0+X1, Y0+Y1} 285 | end, 286 | {0,0}, 287 | lists:reverse(RSteps)), 288 | io:format(Fd, "\"\n ~s\n />\n", [PathLineStyle]), 289 | io:format(Fd, 290 | " \n" 292 | ,[XOffs+X*Dx,YOffs+Y*Dy, Dx/4, EndPtStyle]). 293 | 294 | print_final_edit_script(FdInfo, EditScript) -> 295 | #fdinfo{final_pathlinestyle = PathLineStyle, 296 | final_endpointstyle = EndPtStyle} = FdInfo, 297 | RSteps1 = lists:append( 298 | lists:map(fun({ins,Elems}) -> [{x,E} || E <- Elems]; 299 | ({del,Elems}) -> [{y,E} || E <- Elems]; 300 | ({eq,Elems}) -> [{e,E} || E <- Elems] 301 | end, 302 | EditScript)), 303 | RSteps2 = lists:reverse(RSteps1), 304 | print_svg_dpath_2(FdInfo, RSteps2, PathLineStyle, EndPtStyle). 305 | 306 | print_svg_dmax(#fdinfo{fd=Fd}, D) -> 307 | io:format( 308 | Fd, 309 | " \n" 312 | ,[D]). 313 | 314 | close_svg_file(#fdinfo{fd=Fd}) -> 315 | %% Trailer 316 | io:format( 317 | Fd, 318 | " \n" 319 | "\n" 320 | ,[]), 321 | file:close(Fd), 322 | ok. 323 | 324 | o(L) -> case o2(L) of 325 | " "++Rest -> Rest; 326 | Os -> Os 327 | end. 328 | 329 | o2(L) -> 330 | lists:flatten( 331 | lists:map( 332 | fun({K,V}) when is_atom(V) -> [" ",a2s(K), "=\"", a2s(V), "\""]; 333 | ({K,V}) when is_integer(V) -> [" ",a2s(K), "=\"", i2s(V), "\""]; 334 | ({K,V}) when is_list(V) -> [" ",a2s(K), "=\"", V, "\""] 335 | end, 336 | L)). 337 | 338 | a2s(A) -> atom_to_list(A). 339 | i2s(I) -> integer_to_list(I). 340 | -------------------------------------------------------------------------------- /test/tdiff_tests.erl: -------------------------------------------------------------------------------- 1 | %%% Copyright (C) 2011 Tomas Abrahamsson 2 | %%% 3 | %%% Author: Tomas Abrahamsson 4 | %%% 5 | %%% This library is free software; you can redistribute it and/or 6 | %%% modify it under the terms of the GNU Library General Public 7 | %%% License as published by the Free Software Foundation; either 8 | %%% version 2 of the License, or (at your option) any later version. 9 | %%% 10 | %%% This library is distributed in the hope that it will be useful, 11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | %%% Library General Public License for more details. 14 | %%% 15 | %%% You should have received a copy of the GNU Library General Public 16 | %%% License along with this library; if not, write to the Free 17 | %%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 | -module(tdiff_tests). 19 | -include_lib("eunit/include/eunit.hrl"). 20 | 21 | 22 | simple_diff_test() -> 23 | [{eq,"a"},{del,"B"},{ins,"X"},{eq,"ccc"},{del,"D"},{ins,"Y"},{eq,"e"}] = 24 | tdiff:diff("aBcccDe", "aXcccYe"). 25 | 26 | completely_mismatching_test() -> 27 | [{del,"aaa"}, {ins,"bbb"}] = tdiff:diff("aaa", "bbb"). 28 | 29 | empty_inputs_produces_empty_diff_test() -> 30 | [] = tdiff:diff("", ""). 31 | 32 | only_additions_test() -> 33 | [{ins,"aaa"}] = tdiff:diff("", "aaa"), 34 | [{eq,"a"},{ins,"b"},{eq,"a"},{ins,"b"},{eq,"a"},{ins,"b"},{eq,"a"}] = 35 | tdiff:diff("aaaa", "abababa"). 36 | 37 | only_deletions_test() -> 38 | [{del,"aaa"}] = tdiff:diff("aaa", ""), 39 | [{eq,"a"},{del,"b"},{eq,"a"},{del,"b"},{eq,"a"},{del,"b"},{eq,"a"}] = 40 | tdiff:diff("abababa", "aaaa"). 41 | 42 | patch_test() -> 43 | Diff = tdiff:diff(Old="a cat ate my hat", New="a dog ate my shoe"), 44 | New = tdiff:patch(Old, Diff). 45 | 46 | diff_patch_binaries_test() -> 47 | [{del,["The Naming of Cats is a difficult matter,\n"]}, 48 | {ins,["The Naming of Dogs is a different matter,\n"]}, 49 | {eq,["It isn't just one of your holiday games;\n", 50 | "You may think at first I'm as mad as a hatter\n"]}, 51 | {del,["When I tell you, a cat must have THREE DIFFERENT NAMES.\n"]}] = 52 | Diff = 53 | tdiff:diff_binaries( 54 | %% T.S. Elliot: 55 | Old = 56 | <<"The Naming of Cats is a difficult matter,\n" 57 | "It isn't just one of your holiday games;\n" 58 | "You may think at first I'm as mad as a hatter\n" 59 | "When I tell you, a cat must have THREE DIFFERENT NAMES.\n">>, 60 | %% Not T.S. Elliot (of course): 61 | New = 62 | <<"The Naming of Dogs is a different matter,\n" 63 | "It isn't just one of your holiday games;\n" 64 | "You may think at first I'm as mad as a hatter\n">>), 65 | New = list_to_binary(tdiff:patch_binaries(Old, Diff)). 66 | --------------------------------------------------------------------------------