├── lib └── diff │ ├── lcs │ ├── ldiff.rb │ ├── version.rb │ ├── string.rb │ ├── array.rb │ ├── block.rb │ ├── change.rb │ ├── hunk.rb │ └── callbacks.rb │ └── lcs.rb ├── .gitignore ├── Rakefile ├── bin ├── ldiff └── htmldiff ├── ChangeLog ├── README.md └── test └── test_diff-lcs.rb /lib/diff/lcs/ldiff.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gioele/diff-lcs/HEAD/lib/diff/lcs/ldiff.rb -------------------------------------------------------------------------------- /lib/diff/lcs/version.rb: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010 Gioele Barabucci 2 | # 3 | # This program is free software. It may be redistributed and/or modified under 4 | # the terms of the GPL version 2 (or later), the Perl Artistic licence, or the 5 | # Ruby licence. 6 | 7 | module Diff 8 | module LCS 9 | VERSION = '1.1.2' 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /lib/diff/lcs/string.rb: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2004 Austin Ziegler 2 | # 3 | # This program is free software. It may be redistributed and/or modified under 4 | # the terms of the GPL version 2 (or later), the Perl Artistic licence, or the 5 | # Ruby licence. 6 | 7 | # Includes Diff::LCS into String. 8 | 9 | class String 10 | include Diff::LCS 11 | end 12 | -------------------------------------------------------------------------------- /lib/diff/lcs/array.rb: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2004 Austin Ziegler 2 | # 3 | # This program is free software. It may be redistributed and/or modified under 4 | # the terms of the GPL version 2 (or later), the Perl Artistic licence, or the 5 | # Ruby licence. 6 | 7 | # Includes Diff::LCS into the Array built-in class. 8 | 9 | require 'diff/lcs' 10 | 11 | class Array 12 | include Diff::LCS 13 | end 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # The list of files that should be ignored by Mr Bones. 2 | # Lines that start with '#' are comments. 3 | # 4 | # A .gitignore file can be used instead by setting it as the ignore 5 | # file in your Rakefile: 6 | # 7 | # Bones { 8 | # ignore_file '.gitignore' 9 | # } 10 | # 11 | # For a project with a C extension, the following would be a good set of 12 | # exclude patterns (uncomment them if you want to use them): 13 | # *.[oa] 14 | # *~ 15 | /announcement.txt 16 | /.specification 17 | /*.gemspec 18 | 19 | /coverage/ 20 | /doc/ 21 | /pkg/ 22 | 23 | .*.swp 24 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010 Gioele Barabucci 2 | # 3 | # This program is free software. It may be redistributed and/or modified under 4 | # the terms of the GPL version 2 (or later), the Perl Artistic licence, or the 5 | # Ruby licence. 6 | 7 | begin 8 | require 'bones' 9 | rescue LoadError 10 | abort '### Please install the "bones" gem ###' 11 | end 12 | 13 | task :default => 'test:run' 14 | task 'gem:release' => 'test:run' 15 | 16 | require 'lib/diff/lcs/version.rb' 17 | 18 | Bones { 19 | name 'diff-lcs' 20 | authors ['Austin Ziegler'] 21 | email ['diff-lcs@halostatue.ca'] 22 | url 'http://rubyforge.org/projects/ruwiki/' 23 | 24 | version Diff::LCS::VERSION 25 | 26 | readme_file 'README.md' 27 | history_file 'ChangeLog' 28 | ignore_file '.gitignore' 29 | } 30 | 31 | -------------------------------------------------------------------------------- /bin/ldiff: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # Copyright 2004 Austin Ziegler 3 | # 4 | # This program is free software. It may be redistributed and/or modified under 5 | # the terms of the GPL version 2 (or later), the Perl Artistic licence, or the 6 | # Ruby licence. 7 | 8 | # 1) Try to load Ruwiki from the gem. 9 | # 2) Try to load Ruwiki from $LOAD_PATH. 10 | # 3) Modify $LOAD_PATH and try to load it from the modified $LOAD_PATH. 11 | # 4) Fail hard. 12 | load_state = 1 13 | begin 14 | if 1 == load_state 15 | require 'rubygems' 16 | require_gem 'diff-lcs', '= 1.1.1' 17 | else 18 | require 'diff/lcs' 19 | end 20 | rescue LoadError 21 | load_state += 1 22 | 23 | case load_state 24 | when 3 25 | $LOAD_PATH.unshift "#{File.dirname($0)}/../lib" 26 | when 4 27 | $LOAD_PATH.shift 28 | $LOAD_PATH.unshift "#{File.dirname(__FILE__)}/../lib" 29 | when 5 30 | raise 31 | end 32 | retry 33 | end 34 | 35 | require 'diff/lcs/ldiff' 36 | 37 | exit Diff::LCS::Ldiff.run(ARGV) 38 | -------------------------------------------------------------------------------- /lib/diff/lcs/block.rb: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2004 Austin Ziegler 2 | # 3 | # This program is free software. It may be redistributed and/or modified under 4 | # the terms of the GPL version 2 (or later), the Perl Artistic licence, or the 5 | # Ruby licence. 6 | 7 | # Contains Diff::LCS::Block for bin/ldiff. 8 | 9 | # A block is an operation removing, adding, or changing a group of items. 10 | # Basically, this is just a list of changes, where each change adds or 11 | # deletes a single item. Used by bin/ldiff. 12 | class Diff::LCS::Block 13 | attr_reader :changes, :insert, :remove 14 | 15 | def initialize(chunk) 16 | @changes = [] 17 | @insert = [] 18 | @remove = [] 19 | 20 | chunk.each do |item| 21 | @changes << item 22 | @remove << item if item.deleting? 23 | @insert << item if item.adding? 24 | end 25 | end 26 | 27 | def diff_size 28 | @insert.size - @remove.size 29 | end 30 | 31 | def op 32 | case [@remove.empty?, @insert.empty?] 33 | when [false, false] 34 | '!' 35 | when [false, true] 36 | '-' 37 | when [true, false] 38 | '+' 39 | else # [true, true] 40 | '^' 41 | end 42 | end 43 | end 44 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | Revision history for Ruby library Diff::LCS. Unless explicitly noted otherwise, 2 | all changes are produced by Austin Ziegler . 3 | 4 | == Diff::LCS 1.1.2 5 | * Fixed a problem reported by Mauricio Fernandez in htmldiff. Future versions 6 | of Diff::LCS will be removing this program. 7 | 8 | == Diff::LCS 1.1.1 9 | * Fixed bug #891: 10 | http://rubyforge.org/tracker/?func=detail&atid=407&aid=891&group_id=84 11 | * Fixed a problem with callback initialisation code (it assumed that all 12 | callbacks passed as classes can be initialised; now, it rescues 13 | NoMethodError in the event of private :new being called). 14 | * Modified the non-initialisable callbacks to have a private #new method. 15 | * Moved ldiff core code to Diff::LCS::Ldiff (diff/lcs/ldiff.rb). 16 | 17 | == Diff::LCS 1.1.0 18 | * Eliminated the need for Diff::LCS::Event and removed it. 19 | * Added a contextual diff callback, Diff::LCS::ContextDiffCallback. 20 | * Implemented patching/unpatching for standard Diff callback output formats 21 | with both #diff and #sdiff. 22 | * Extensive documentation changes. 23 | 24 | == Diff::LCS 1.0.4 25 | * Fixed a problem with bin/ldiff output, especially for unified format. 26 | Newlines that should have been present weren't. 27 | * Changed the .tar.gz installer to generate Windows batch files if ones do not 28 | exist already. Removed the existing batch files as they didn't work. 29 | 30 | == Diff::LCS 1.0.3 31 | * Fixed a problem with #traverse_sequences where the first difference from the 32 | left sequence might not be appropriately captured. 33 | 34 | == Diff::LCS 1.0.2 35 | * Fixed an issue with ldiff not working because actions were changed from 36 | symbols to strings. 37 | 38 | == Diff::LCS 1.0.1 39 | * Minor modifications to the gemspec, the README. 40 | * Renamed the diff program to ldiff (as well as the companion batch file) so as 41 | to not collide with the standard diff program. 42 | * Fixed issues with RubyGEMs. Requires RubyGems > 0.6.1 or >= 0.6.1 with the 43 | latest CVS version. 44 | 45 | == Diff::LCS 1.0 46 | * Initial release based mostly on Perl's Algorithm::Diff. 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Diff::LCS 2 | ========= 3 | 4 | Compute differences using the McIlroy-Hunt LCS algorithm. 5 | 6 | Description 7 | ----------- 8 | 9 | Diff::LCS is a port of Perl's Algorithm::Diff that uses the McIlroy-Hunt 10 | longest common subsequence (LCS) algorithm to compute intelligent 11 | differences between two sequenced enumerable containers. 12 | 13 | The implementation is based on Mario I. Wolczko's Smalltalk version (1.2, 14 | 1993) and Ned Konz's Perl version (Algorithm::Diff). 15 | 16 | How to use 17 | ---------- 18 | 19 | Using this module is quite simple. By default, Diff::LCS does not extend 20 | objects with the Diff::LCS interface, but will be called as if it were a 21 | function: 22 | 23 | require 'diff/lcs' 24 | 25 | seq1 = %w(a b c e h j l m n p) 26 | seq2 = %w(b c d e f j k l m r s t) 27 | 28 | lcs = Diff::LCS.LCS(seq1, seq2) 29 | diffs = Diff::LCS.diff(seq1, seq2) 30 | sdiff = Diff::LCS.sdiff(seq1, seq2) 31 | seq = Diff::LCS.traverse_sequences(seq1, seq2, callback_obj) 32 | bal = Diff::LCS.traverse_balanced(seq1, seq2, callback_obj) 33 | seq2 == Diff::LCS.patch!(seq1, diffs) 34 | seq1 == Diff::LCS.unpatch!(seq2, diffs) 35 | seq2 == Diff::LCS.patch!(seq1, sdiff) 36 | seq1 == Diff::LCS.unpatch!(seq2, sdiff) 37 | 38 | Objects can be extended with Diff::LCS: 39 | 40 | seq1.extend(Diff::LCS) 41 | lcs = seq1.lcs(seq2) 42 | diffs = seq1.diff(seq2) 43 | sdiff = seq1.sdiff(seq2) 44 | seq = seq1.traverse_sequences(seq2, callback_obj) 45 | bal = seq1.traverse_balanced(seq2, callback_obj) 46 | seq2 == seq1.patch!(diffs) 47 | seq1 == seq2.unpatch!(diffs) 48 | seq2 == seq1.patch!(sdiff) 49 | seq1 == seq2.unpatch!(sdiff) 50 | 51 | By requiring `diff/lcs/array` or `diff/lcs/string`, `Array` or `String` 52 | will be extended for use this way. 53 | 54 | Authors 55 | ------- 56 | 57 | Original author: Austin Ziegler. 58 | 59 | Contributors: 60 | 61 | * Alan Chen 62 | * Gioele Barabucci 63 | 64 | License 65 | ------- 66 | 67 | Copyright 2004 Austin Ziegler 68 | 69 | This program is free software. It may be redistributed and/or modified 70 | under the terms of the GPL version 2 (or later), the Perl Artistic 71 | licence, or the Ruby licence. 72 | 73 | Implements McIlroy-Hunt diff algorithm; adapted from: 74 | 75 | * Perl's Algorithm::Diff by Ned Konz 76 | * Smalltalk by Mario I. Wolczko 77 | 78 | -------------------------------------------------------------------------------- /bin/htmldiff: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # Copyright 2004 Austin Ziegler 3 | # 4 | # This program is free software. It may be redistributed and/or modified under 5 | # the terms of the GPL version 2 (or later), the Perl Artistic licence, or the 6 | # Ruby licence. 7 | 8 | begin 9 | require 'rubygems' 10 | require_gem 'diff-lcs', "1.1.1" 11 | require 'diff/lcs/string' 12 | rescue LoadError 13 | require 'diff/lcs' 14 | require 'diff/lcs/string' 15 | end 16 | 17 | require 'text/format' 18 | 19 | class HTMLDiff #:nodoc: 20 | attr_accessor :output 21 | 22 | def initialize(output) 23 | @output = output 24 | end 25 | 26 | # This will be called with both lines are the same 27 | def match(event) 28 | @output << %Q|
#{event.old_element}
\n| 29 | end 30 | 31 | # This will be called when there is a line in A that isn't in B 32 | def discard_a(event) 33 | @output << %Q|
#{event.old_element}
\n| 34 | end 35 | 36 | # This will be called when there is a line in B that isn't in A 37 | def discard_b(event) 38 | @output << %Q|
#{event.new_element}
\n| 39 | end 40 | end 41 | 42 | if ARGV.size != 2 43 | puts "usage: #{File.basename($0)} old new > output.html" 44 | exit 255 45 | end 46 | 47 | hd = HTMLDiff.new($stdout) 48 | tf = Text::Format.new 49 | tf.tabstop = 4 50 | 51 | preprocess = lambda { |line| tf.expand(line.chomp) } 52 | 53 | a = IO.readlines(ARGV[0]).map(&preprocess) 54 | b = IO.readlines(ARGV[1]).map(&preprocess) 55 | 56 | $stdout.write <<-START 57 | 58 | 59 | diff #{ARGV[0]} #{ARGV[1]} 60 | 89 | 90 | 91 |

diff  92 | #{ARGV[0]}  93 | #{ARGV[1]} 94 |

95 |
96 | START 97 | 98 | Diff::LCS.traverse_sequences(a, b, hd) 99 | 100 | $stdout.write <<-END 101 |
102 | 103 | 104 | END 105 | -------------------------------------------------------------------------------- /lib/diff/lcs/change.rb: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2004 Austin Ziegler 2 | # 3 | # This program is free software. It may be redistributed and/or modified under 4 | # the terms of the GPL version 2 (or later), the Perl Artistic licence, or the 5 | # Ruby licence. 6 | 7 | # Provides Diff::LCS::Change and Diff::LCS::ContextChange. 8 | 9 | # Centralises the change test code in Diff::LCS::Change and 10 | # Diff::LCS::ContextChange, since it's the same for both classes. 11 | module Diff::LCS::ChangeTypeTests 12 | def deleting? 13 | @action == '-' 14 | end 15 | 16 | def adding? 17 | @action == '+' 18 | end 19 | 20 | def unchanged? 21 | @action == '=' 22 | end 23 | 24 | def changed? 25 | @changed == '!' 26 | end 27 | 28 | def finished_a? 29 | @changed == '>' 30 | end 31 | 32 | def finished_b? 33 | @changed == '<' 34 | end 35 | end 36 | 37 | # Represents a simplistic (non-contextual) change. Represents the removal or 38 | # addition of an element from either the old or the new sequenced enumerable. 39 | class Diff::LCS::Change 40 | # Returns the action this Change represents. Can be '+' (#adding?), '-' 41 | # (#deleting?), '=' (#unchanged?), # or '!' (#changed?). When created by 42 | # Diff::LCS#diff or Diff::LCS#sdiff, it may also be '>' (#finished_a?) or 43 | # '<' (#finished_b?). 44 | attr_reader :action 45 | attr_reader :position 46 | attr_reader :element 47 | 48 | include Comparable 49 | def ==(other) 50 | (self.action == other.action) and 51 | (self.position == other.position) and 52 | (self.element == other.element) 53 | end 54 | 55 | def <=>(other) 56 | r = self.action <=> other.action 57 | r = self.position <=> other.position if r.zero? 58 | r = self.element <=> other.element if r.zero? 59 | r 60 | end 61 | 62 | def initialize(action, position, element) 63 | @action = action 64 | @position = position 65 | @element = element 66 | end 67 | 68 | # Creates a Change from an array produced by Change#to_a. 69 | def to_a 70 | [@action, @position, @element] 71 | end 72 | 73 | def self.from_a(arr) 74 | Diff::LCS::Change.new(arr[0], arr[1], arr[2]) 75 | end 76 | 77 | include Diff::LCS::ChangeTypeTests 78 | end 79 | 80 | # Represents a contextual change. Contains the position and values of the 81 | # elements in the old and the new sequenced enumerables as well as the action 82 | # taken. 83 | class Diff::LCS::ContextChange 84 | # Returns the action this Change represents. Can be '+' (#adding?), '-' 85 | # (#deleting?), '=' (#unchanged?), # or '!' (#changed?). When 86 | # created by Diff::LCS#diff or Diff::LCS#sdiff, it may also be '>' 87 | # (#finished_a?) or '<' (#finished_b?). 88 | attr_reader :action 89 | attr_reader :old_position 90 | attr_reader :old_element 91 | attr_reader :new_position 92 | attr_reader :new_element 93 | 94 | include Comparable 95 | 96 | def ==(other) 97 | (@action == other.action) and 98 | (@old_position == other.old_position) and 99 | (@new_position == other.new_position) and 100 | (@old_element == other.old_element) and 101 | (@new_element == other.new_element) 102 | end 103 | 104 | def inspect(*args) 105 | %Q(#<#{self.class.name}:#{__id__} @action=#{action} positions=#{old_position},#{new_position} elements=#{old_element.inspect},#{new_element.inspect}>) 106 | end 107 | 108 | def <=>(other) 109 | r = @action <=> other.action 110 | r = @old_position <=> other.old_position if r.zero? 111 | r = @new_position <=> other.new_position if r.zero? 112 | r = @old_element <=> other.old_element if r.zero? 113 | r = @new_element <=> other.new_element if r.zero? 114 | r 115 | end 116 | 117 | def initialize(action, old_position, old_element, new_position, new_element) 118 | @action = action 119 | @old_position = old_position 120 | @old_element = old_element 121 | @new_position = new_position 122 | @new_element = new_element 123 | end 124 | 125 | def to_a 126 | [@action, [@old_position, @old_element], [@new_position, @new_element]] 127 | end 128 | 129 | # Creates a ContextChange from an array produced by ContextChange#to_a. 130 | def self.from_a(arr) 131 | if arr.size == 5 132 | Diff::LCS::ContextChange.new(arr[0], arr[1], arr[2], arr[3], arr[4]) 133 | else 134 | Diff::LCS::ContextChange.new(arr[0], arr[1][0], arr[1][1], arr[2][0], 135 | arr[2][1]) 136 | end 137 | end 138 | 139 | # Simplifies a context change for use in some diff callbacks. '<' actions 140 | # are converted to '-' and '>' actions are converted to '+'. 141 | def self.simplify(event) 142 | ea = event.to_a 143 | 144 | case ea[0] 145 | when '-' 146 | ea[2][1] = nil 147 | when '<' 148 | ea[0] = '-' 149 | ea[2][1] = nil 150 | when '+' 151 | ea[1][1] = nil 152 | when '>' 153 | ea[0] = '+' 154 | ea[1][1] = nil 155 | end 156 | 157 | Diff::LCS::ContextChange.from_a(ea) 158 | end 159 | 160 | include Diff::LCS::ChangeTypeTests 161 | end 162 | -------------------------------------------------------------------------------- /lib/diff/lcs/hunk.rb: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2004 Austin Ziegler 2 | # Copyright (c) 2010 Gioele Barabucci 3 | # 4 | # This program is free software. It may be redistributed and/or modified under 5 | # the terms of the GPL version 2 (or later), the Perl Artistic licence, or the 6 | # Ruby licence. 7 | 8 | # Contains Diff::LCS::Hunk for bin/ldiff. 9 | 10 | require 'diff/lcs/block' 11 | 12 | # A Hunk is a group of Blocks which overlap because of the context 13 | # surrounding each block. (So if we're not using context, every hunk will 14 | # contain one block.) Used in the diff program (bin/diff). 15 | class Diff::LCS::Hunk 16 | # Create a hunk using references to both the old and new data, as well as 17 | # the piece of data 18 | def initialize(data_old, data_new, piece, context, file_length_difference) 19 | # At first, a hunk will have just one Block in it 20 | @blocks = [ Diff::LCS::Block.new(piece) ] 21 | @data_old = data_old 22 | @data_new = data_new 23 | 24 | before = after = file_length_difference 25 | after += @blocks[0].diff_size 26 | @file_length_difference = after # The caller must get this manually 27 | 28 | # Save the start & end of each array. If the array doesn't exist 29 | # (e.g., we're only adding items in this block), then figure out the 30 | # line number based on the line number of the other file and the 31 | # current difference in file lengths. 32 | if @blocks[0].remove.empty? 33 | a1 = a2 = nil 34 | else 35 | a1 = @blocks[0].remove[0].position 36 | a2 = @blocks[0].remove[-1].position 37 | end 38 | 39 | if @blocks[0].insert.empty? 40 | b1 = b2 = nil 41 | else 42 | b1 = @blocks[0].insert[0].position 43 | b2 = @blocks[0].insert[-1].position 44 | end 45 | 46 | @start_old = a1 || (b1 - before) 47 | @start_new = b1 || (a1 + before) 48 | @end_old = a2 || (b2 - after) 49 | @end_new = b2 || (a2 + after) 50 | 51 | self.flag_context = context 52 | end 53 | 54 | attr_reader :blocks 55 | attr_reader :start_old, :start_new 56 | attr_reader :end_old, :end_new 57 | attr_reader :file_length_difference 58 | 59 | # Change the "start" and "end" fields to note that context should be added 60 | # to this hunk 61 | attr_reader :flag_context 62 | def flag_context=(context) #:nodoc: 63 | return if context.nil? or context.zero? 64 | 65 | add_start = (context > @start_old) ? @start_old : context 66 | @start_old -= add_start 67 | @start_new -= add_start 68 | 69 | if (@end_old + context) > @data_old.size 70 | add_end = @data_old.size - @end_old 71 | else 72 | add_end = context 73 | end 74 | @end_old += add_end 75 | @end_new += add_end 76 | end 77 | 78 | def unshift(hunk) 79 | @start_old = hunk.start_old 80 | @start_new = hunk.start_new 81 | blocks.unshift(*hunk.blocks) 82 | end 83 | 84 | # Is there an overlap between hunk arg0 and old hunk arg1? Note: if end 85 | # of old hunk is one less than beginning of second, they overlap 86 | def overlaps?(hunk = nil) 87 | return nil if hunk.nil? 88 | 89 | a = (@start_old - hunk.end_old) <= 1 90 | b = (@start_new - hunk.end_new) <= 1 91 | return (a or b) 92 | end 93 | 94 | def diff(format) 95 | case format 96 | when :old 97 | old_diff 98 | when :unified 99 | unified_diff 100 | when :context 101 | context_diff 102 | when :ed 103 | self 104 | when :reverse_ed, :ed_finish 105 | ed_diff(format) 106 | else 107 | raise "Unknown diff format #{format}." 108 | end 109 | end 110 | 111 | def each_old(block) 112 | @data_old[@start_old .. @end_old].each { |e| yield e } 113 | end 114 | 115 | private 116 | # Note that an old diff can't have any context. Therefore, we know that 117 | # there's only one block in the hunk. 118 | def old_diff 119 | warn "Expecting only one block in an old diff hunk!" if @blocks.size > 1 120 | op_act = { "+" => 'a', "-" => 'd', "!" => "c" } 121 | 122 | block = @blocks[0] 123 | 124 | # Calculate item number range. Old diff range is just like a context 125 | # diff range, except the ranges are on one line with the action between 126 | # them. 127 | s = "#{context_range(:old)}#{op_act[block.op]}#{context_range(:new)}\n" 128 | # If removing anything, just print out all the remove lines in the hunk 129 | # which is just all the remove lines in the block. 130 | @data_old[@start_old .. @end_old].each { |e| s << "< #{e}\n" } unless block.remove.empty? 131 | s << "---\n" if block.op == "!" 132 | @data_new[@start_new .. @end_new].each { |e| s << "> #{e}\n" } unless block.insert.empty? 133 | s 134 | end 135 | 136 | def unified_diff 137 | # Calculate item number range. 138 | s = "@@ -#{unified_range(:old)} +#{unified_range(:new)} @@\n" 139 | 140 | # Outlist starts containing the hunk of the old file. Removing an item 141 | # just means putting a '-' in front of it. Inserting an item requires 142 | # getting it from the new file and splicing it in. We splice in 143 | # +num_added+ items. Remove blocks use +num_added+ because splicing 144 | # changed the length of outlist. 145 | # 146 | # We remove +num_removed+ items. Insert blocks use +num_removed+ 147 | # because their item numbers -- corresponding to positions in the NEW 148 | # file -- don't take removed items into account. 149 | lo, hi, num_added, num_removed = @start_old, @end_old, 0, 0 150 | 151 | outlist = @data_old[lo .. hi].collect { |e| e.gsub(/^/, ' ') } 152 | 153 | @blocks.each do |block| 154 | block.remove.each do |item| 155 | op = item.action.to_s # - 156 | offset = item.position - lo + num_added 157 | outlist[offset].gsub!(/^ /, op.to_s) 158 | num_removed += 1 159 | end 160 | block.insert.each do |item| 161 | op = item.action.to_s # + 162 | offset = item.position - @start_new + num_removed 163 | outlist[offset, 0] = "#{op}#{@data_new[item.position]}" 164 | num_added += 1 165 | end 166 | end 167 | 168 | s << outlist.join("\n") 169 | end 170 | 171 | def context_diff 172 | s = "***************\n" 173 | s << "*** #{context_range(:old)} ****\n" 174 | r = context_range(:new) 175 | 176 | # Print out file 1 part for each block in context diff format if there 177 | # are any blocks that remove items 178 | lo, hi = @start_old, @end_old 179 | removes = @blocks.select { |e| not e.remove.empty? } 180 | if removes 181 | outlist = @data_old[lo .. hi].collect { |e| e.gsub(/^/, ' ') } 182 | removes.each do |block| 183 | block.remove.each do |item| 184 | outlist[item.position - lo].gsub!(/^ /) { block.op } # - or ! 185 | end 186 | end 187 | s << outlist.join("\n") 188 | end 189 | 190 | s << "\n--- #{r} ----\n" 191 | lo, hi = @start_new, @end_new 192 | inserts = @blocks.select { |e| not e.insert.empty? } 193 | if inserts 194 | outlist = @data_new[lo .. hi].collect { |e| e.gsub(/^/, ' ') } 195 | inserts.each do |block| 196 | block.insert.each do |item| 197 | outlist[item.position - lo].gsub!(/^ /) { block.op } # + or ! 198 | end 199 | end 200 | s << outlist.join("\n") 201 | end 202 | s 203 | end 204 | 205 | def ed_diff(format) 206 | op_act = { "+" => 'a', "-" => 'd', "!" => "c" } 207 | warn "Expecting only one block in an old diff hunk!" if @blocks.size > 1 208 | 209 | if format == :reverse_ed 210 | s = "#{op_act[@blocks[0].op]}#{context_range(:old)}\n" 211 | else 212 | s = "#{context_range(:old).gsub(/,/, ' ')}#{op_act[@blocks[0].op]}\n" 213 | end 214 | 215 | unless @blocks[0].insert.empty? 216 | @data_new[@start_new .. @end_new].each { |e| s << "#{e}\n" } 217 | s << ".\n" 218 | end 219 | s 220 | end 221 | 222 | # Generate a range of item numbers to print. Only print 1 number if the 223 | # range has only one item in it. Otherwise, it's 'start,end' 224 | def context_range(mode) 225 | case mode 226 | when :old 227 | s, e = (@start_old + 1), (@end_old + 1) 228 | when :new 229 | s, e = (@start_new + 1), (@end_new + 1) 230 | end 231 | 232 | (s < e) ? "#{s},#{e}" : "#{e}" 233 | end 234 | 235 | # Generate a range of item numbers to print for unified diff. Print 236 | # number where block starts, followed by number of lines in the block 237 | # (don't print number of lines if it's 1) 238 | def unified_range(mode) 239 | case mode 240 | when :old 241 | s, e = (@start_old + 1), (@end_old + 1) 242 | when :new 243 | s, e = (@start_new + 1), (@end_new + 1) 244 | end 245 | 246 | length = e - s + 1 247 | first = (length < 2) ? e : s # "strange, but correct" 248 | (length == 1) ? "#{first}" : "#{first},#{length}" 249 | end 250 | end 251 | -------------------------------------------------------------------------------- /lib/diff/lcs/callbacks.rb: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2004 Austin Ziegler 2 | # 3 | # This program is free software. It may be redistributed and/or modified under 4 | # the terms of the GPL version 2 (or later), the Perl Artistic licence, or the 5 | # Ruby licence. 6 | 7 | # Contains definitions for all default callback objects. 8 | 9 | require 'diff/lcs/change' 10 | 11 | module Diff::LCS 12 | # This callback object implements the default set of callback events, which 13 | # only returns the event itself. Note that #finished_a and #finished_b are 14 | # not implemented -- I haven't yet figured out where they would be useful. 15 | # 16 | # Note that this is intended to be called as is, e.g., 17 | # 18 | # Diff::LCS.LCS(seq1, seq2, Diff::LCS::DefaultCallbacks) 19 | class DefaultCallbacks 20 | class << self 21 | # Called when two items match. 22 | def match(event) 23 | event 24 | end 25 | # Called when the old value is discarded in favour of the new value. 26 | def discard_a(event) 27 | event 28 | end 29 | # Called when the new value is discarded in favour of the old value. 30 | def discard_b(event) 31 | event 32 | end 33 | # Called when both the old and new values have changed. 34 | def change(event) 35 | event 36 | end 37 | 38 | private :new 39 | end 40 | end 41 | 42 | # An alias for DefaultCallbacks that is used in Diff::LCS#traverse_sequences. 43 | # 44 | # Diff::LCS.LCS(seq1, seq2, Diff::LCS::SequenceCallbacks) 45 | SequenceCallbacks = DefaultCallbacks 46 | # An alias for DefaultCallbacks that is used in Diff::LCS#traverse_balanced. 47 | # 48 | # Diff::LCS.LCS(seq1, seq2, Diff::LCS::BalancedCallbacks) 49 | BalancedCallbacks = DefaultCallbacks 50 | end 51 | 52 | # This will produce a compound array of simple diff change objects. Each 53 | # element in the #diffs array is a +hunk+ or +hunk+ array, where each 54 | # element in each +hunk+ array is a single Change object representing the 55 | # addition or removal of a single element from one of the two tested 56 | # sequences. The +hunk+ provides the full context for the changes. 57 | # 58 | # diffs = Diff::LCS.diff(seq1, seq2) 59 | # # This example shows a simplified array format. 60 | # # [ [ [ '-', 0, 'a' ] ], # 1 61 | # # [ [ '+', 2, 'd' ] ], # 2 62 | # # [ [ '-', 4, 'h' ], # 3 63 | # # [ '+', 4, 'f' ] ], 64 | # # [ [ '+', 6, 'k' ] ], # 4 65 | # # [ [ '-', 8, 'n' ], # 5 66 | # # [ '-', 9, 'p' ], 67 | # # [ '+', 9, 'r' ], 68 | # # [ '+', 10, 's' ], 69 | # # [ '+', 11, 't' ] ] ] 70 | # 71 | # There are five hunks here. The first hunk says that the +a+ at position 0 72 | # of the first sequence should be deleted ('-'). The second hunk 73 | # says that the +d+ at position 2 of the second sequence should be inserted 74 | # ('+'). The third hunk says that the +h+ at position 4 of the 75 | # first sequence should be removed and replaced with the +f+ from position 4 76 | # of the second sequence. The other two hunks are described similarly. 77 | # 78 | # === Use 79 | # This callback object must be initialised and is used by the Diff::LCS#diff 80 | # method. 81 | # 82 | # cbo = Diff::LCS::DiffCallbacks.new 83 | # Diff::LCS.LCS(seq1, seq2, cbo) 84 | # cbo.finish 85 | # 86 | # Note that the call to #finish is absolutely necessary, or the last set of 87 | # changes will not be visible. Alternatively, can be used as: 88 | # 89 | # cbo = Diff::LCS::DiffCallbacks.new { |tcbo| Diff::LCS.LCS(seq1, seq2, tcbo) } 90 | # 91 | # The necessary #finish call will be made. 92 | # 93 | # === Simplified Array Format 94 | # The simplified array format used in the example above can be obtained 95 | # with: 96 | # 97 | # require 'pp' 98 | # pp diffs.map { |e| e.map { |f| f.to_a } } 99 | class Diff::LCS::DiffCallbacks 100 | # Returns the difference set collected during the diff process. 101 | attr_reader :diffs 102 | 103 | def initialize # :yields self: 104 | @hunk = [] 105 | @diffs = [] 106 | 107 | if block_given? 108 | begin 109 | yield self 110 | ensure 111 | self.finish 112 | end 113 | end 114 | end 115 | 116 | # Finalizes the diff process. If an unprocessed hunk still exists, then it 117 | # is appended to the diff list. 118 | def finish 119 | add_nonempty_hunk 120 | end 121 | 122 | def match(event) 123 | add_nonempty_hunk 124 | end 125 | 126 | def discard_a(event) 127 | @hunk << Diff::LCS::Change.new('-', event.old_position, event.old_element) 128 | end 129 | 130 | def discard_b(event) 131 | @hunk << Diff::LCS::Change.new('+', event.new_position, event.new_element) 132 | end 133 | 134 | private 135 | def add_nonempty_hunk 136 | @diffs << @hunk unless @hunk.empty? 137 | @hunk = [] 138 | end 139 | end 140 | 141 | # This will produce a compound array of contextual diff change objects. Each 142 | # element in the #diffs array is a "hunk" array, where each element in each 143 | # "hunk" array is a single change. Each change is a Diff::LCS::ContextChange 144 | # that contains both the old index and new index values for the change. The 145 | # "hunk" provides the full context for the changes. Both old and new objects 146 | # will be presented for changed objects. +nil+ will be substituted for a 147 | # discarded object. 148 | # 149 | # seq1 = %w(a b c e h j l m n p) 150 | # seq2 = %w(b c d e f j k l m r s t) 151 | # 152 | # diffs = Diff::LCS.diff(seq1, seq2, Diff::LCS::ContextDiffCallbacks) 153 | # # This example shows a simplified array format. 154 | # # [ [ [ '-', [ 0, 'a' ], [ 0, nil ] ] ], # 1 155 | # # [ [ '+', [ 3, nil ], [ 2, 'd' ] ] ], # 2 156 | # # [ [ '-', [ 4, 'h' ], [ 4, nil ] ], # 3 157 | # # [ '+', [ 5, nil ], [ 4, 'f' ] ] ], 158 | # # [ [ '+', [ 6, nil ], [ 6, 'k' ] ] ], # 4 159 | # # [ [ '-', [ 8, 'n' ], [ 9, nil ] ], # 5 160 | # # [ '+', [ 9, nil ], [ 9, 'r' ] ], 161 | # # [ '-', [ 9, 'p' ], [ 10, nil ] ], 162 | # # [ '+', [ 10, nil ], [ 10, 's' ] ], 163 | # # [ '+', [ 10, nil ], [ 11, 't' ] ] ] ] 164 | # 165 | # The five hunks shown are comprised of individual changes; if there is a 166 | # related set of changes, they are still shown individually. 167 | # 168 | # This callback can also be used with Diff::LCS#sdiff, which will produce 169 | # results like: 170 | # 171 | # diffs = Diff::LCS.sdiff(seq1, seq2, Diff::LCS::ContextCallbacks) 172 | # # This example shows a simplified array format. 173 | # # [ [ [ "-", [ 0, "a" ], [ 0, nil ] ] ], # 1 174 | # # [ [ "+", [ 3, nil ], [ 2, "d" ] ] ], # 2 175 | # # [ [ "!", [ 4, "h" ], [ 4, "f" ] ] ], # 3 176 | # # [ [ "+", [ 6, nil ], [ 6, "k" ] ] ], # 4 177 | # # [ [ "!", [ 8, "n" ], [ 9, "r" ] ], # 5 178 | # # [ "!", [ 9, "p" ], [ 10, "s" ] ], 179 | # # [ "+", [ 10, nil ], [ 11, "t" ] ] ] ] 180 | # 181 | # The five hunks are still present, but are significantly shorter in total 182 | # presentation, because changed items are shown as changes ("!") instead of 183 | # potentially "mismatched" pairs of additions and deletions. 184 | # 185 | # The result of this operation is similar to that of 186 | # Diff::LCS::SDiffCallbacks. They may be compared as: 187 | # 188 | # s = Diff::LCS.sdiff(seq1, seq2).reject { |e| e.action == "=" } 189 | # c = Diff::LCS.sdiff(seq1, seq2, Diff::LCS::ContextDiffCallbacks).flatten 190 | # 191 | # s == c # -> true 192 | # 193 | # === Use 194 | # This callback object must be initialised and can be used by the 195 | # Diff::LCS#diff or Diff::LCS#sdiff methods. 196 | # 197 | # cbo = Diff::LCS::ContextDiffCallbacks.new 198 | # Diff::LCS.LCS(seq1, seq2, cbo) 199 | # cbo.finish 200 | # 201 | # Note that the call to #finish is absolutely necessary, or the last set of 202 | # changes will not be visible. Alternatively, can be used as: 203 | # 204 | # cbo = Diff::LCS::ContextDiffCallbacks.new { |tcbo| Diff::LCS.LCS(seq1, seq2, tcbo) } 205 | # 206 | # The necessary #finish call will be made. 207 | # 208 | # === Simplified Array Format 209 | # The simplified array format used in the example above can be obtained 210 | # with: 211 | # 212 | # require 'pp' 213 | # pp diffs.map { |e| e.map { |f| f.to_a } } 214 | class Diff::LCS::ContextDiffCallbacks < Diff::LCS::DiffCallbacks 215 | def discard_a(event) 216 | @hunk << Diff::LCS::ContextChange.simplify(event) 217 | end 218 | 219 | def discard_b(event) 220 | @hunk << Diff::LCS::ContextChange.simplify(event) 221 | end 222 | 223 | def change(event) 224 | @hunk << Diff::LCS::ContextChange.simplify(event) 225 | end 226 | end 227 | 228 | # This will produce a simple array of diff change objects. Each element in 229 | # the #diffs array is a single ContextChange. In the set of #diffs provided 230 | # by SDiffCallbacks, both old and new objects will be presented for both 231 | # changed and unchanged objects. +nil+ will be substituted 232 | # for a discarded object. 233 | # 234 | # The diffset produced by this callback, when provided to Diff::LCS#sdiff, 235 | # will compute and display the necessary components to show two sequences 236 | # and their minimized differences side by side, just like the Unix utility 237 | # +sdiff+. 238 | # 239 | # same same 240 | # before | after 241 | # old < - 242 | # - > new 243 | # 244 | # seq1 = %w(a b c e h j l m n p) 245 | # seq2 = %w(b c d e f j k l m r s t) 246 | # 247 | # diffs = Diff::LCS.sdiff(seq1, seq2) 248 | # # This example shows a simplified array format. 249 | # # [ [ "-", [ 0, "a"], [ 0, nil ] ], 250 | # # [ "=", [ 1, "b"], [ 0, "b" ] ], 251 | # # [ "=", [ 2, "c"], [ 1, "c" ] ], 252 | # # [ "+", [ 3, nil], [ 2, "d" ] ], 253 | # # [ "=", [ 3, "e"], [ 3, "e" ] ], 254 | # # [ "!", [ 4, "h"], [ 4, "f" ] ], 255 | # # [ "=", [ 5, "j"], [ 5, "j" ] ], 256 | # # [ "+", [ 6, nil], [ 6, "k" ] ], 257 | # # [ "=", [ 6, "l"], [ 7, "l" ] ], 258 | # # [ "=", [ 7, "m"], [ 8, "m" ] ], 259 | # # [ "!", [ 8, "n"], [ 9, "r" ] ], 260 | # # [ "!", [ 9, "p"], [ 10, "s" ] ], 261 | # # [ "+", [ 10, nil], [ 11, "t" ] ] ] 262 | # 263 | # The result of this operation is similar to that of 264 | # Diff::LCS::ContextDiffCallbacks. They may be compared as: 265 | # 266 | # s = Diff::LCS.sdiff(seq1, seq2).reject { |e| e.action == "=" } 267 | # c = Diff::LCS.sdiff(seq1, seq2, Diff::LCS::ContextDiffCallbacks).flatten 268 | # 269 | # s == c # -> true 270 | # 271 | # === Use 272 | # This callback object must be initialised and is used by the Diff::LCS#sdiff 273 | # method. 274 | # 275 | # cbo = Diff::LCS::SDiffCallbacks.new 276 | # Diff::LCS.LCS(seq1, seq2, cbo) 277 | # 278 | # As with the other initialisable callback objects, Diff::LCS::SDiffCallbacks 279 | # can be initialised with a block. As there is no "fininishing" to be done, 280 | # this has no effect on the state of the object. 281 | # 282 | # cbo = Diff::LCS::SDiffCallbacks.new { |tcbo| Diff::LCS.LCS(seq1, seq2, tcbo) } 283 | # 284 | # === Simplified Array Format 285 | # The simplified array format used in the example above can be obtained 286 | # with: 287 | # 288 | # require 'pp' 289 | # pp diffs.map { |e| e.to_a } 290 | class Diff::LCS::SDiffCallbacks 291 | # Returns the difference set collected during the diff process. 292 | attr_reader :diffs 293 | 294 | def initialize #:yields self: 295 | @diffs = [] 296 | yield self if block_given? 297 | end 298 | 299 | def match(event) 300 | @diffs << Diff::LCS::ContextChange.simplify(event) 301 | end 302 | 303 | def discard_a(event) 304 | @diffs << Diff::LCS::ContextChange.simplify(event) 305 | end 306 | 307 | def discard_b(event) 308 | @diffs << Diff::LCS::ContextChange.simplify(event) 309 | end 310 | 311 | def change(event) 312 | @diffs << Diff::LCS::ContextChange.simplify(event) 313 | end 314 | end 315 | -------------------------------------------------------------------------------- /test/test_diff-lcs.rb: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env ruby 2 | # Copyright (c) 2004 Austin Ziegler 3 | # 4 | # This program is free software. It may be redistributed and/or modified under 5 | # the terms of the GPL version 2 (or later), the Perl Artistic licence, or the 6 | # Ruby licence. 7 | 8 | $LOAD_PATH.unshift("#{File.dirname(__FILE__)}/../lib") if __FILE__ == $0 9 | 10 | require 'diff/lcs' 11 | require 'test/unit' 12 | require 'pp' 13 | require 'diff/lcs/array' 14 | 15 | module Diff::LCS::Tests 16 | def __format_diffs(diffs) 17 | diffs.map do |e| 18 | if e.kind_of?(Array) 19 | e.map { |f| f.to_a.join }.join(", ") 20 | else 21 | e.to_a.join 22 | end 23 | end.join("; ") 24 | end 25 | 26 | def __map_diffs(diffs, klass = Diff::LCS::ContextChange) 27 | diffs.map do |chunks| 28 | if klass == Diff::LCS::ContextChange 29 | klass.from_a(chunks) 30 | else 31 | chunks.map { |changes| klass.from_a(changes) } 32 | end 33 | end 34 | end 35 | 36 | def __simple_callbacks 37 | callbacks = Object.new 38 | class << callbacks 39 | attr_reader :matched_a 40 | attr_reader :matched_b 41 | attr_reader :discards_a 42 | attr_reader :discards_b 43 | attr_reader :done_a 44 | attr_reader :done_b 45 | 46 | def reset 47 | @matched_a = [] 48 | @matched_b = [] 49 | @discards_a = [] 50 | @discards_b = [] 51 | @done_a = [] 52 | @done_b = [] 53 | end 54 | 55 | def match(event) 56 | @matched_a << event.old_element 57 | @matched_b << event.new_element 58 | end 59 | 60 | def discard_b(event) 61 | @discards_b << event.new_element 62 | end 63 | 64 | def discard_a(event) 65 | @discards_a << event.old_element 66 | end 67 | 68 | def finished_a(event) 69 | @done_a << [event.old_element, event.old_position] 70 | end 71 | 72 | def finished_b(event) 73 | @done_b << [event.new_element, event.new_position] 74 | end 75 | end 76 | callbacks.reset 77 | callbacks 78 | end 79 | 80 | def __balanced_callback 81 | cb = Object.new 82 | class << cb 83 | attr_reader :result 84 | 85 | def reset 86 | @result = "" 87 | end 88 | 89 | def match(event) 90 | @result << "M#{event.old_position}#{event.new_position} " 91 | end 92 | 93 | def discard_a(event) 94 | @result << "DA#{event.old_position}#{event.new_position} " 95 | end 96 | 97 | def discard_b(event) 98 | @result << "DB#{event.old_position}#{event.new_position} " 99 | end 100 | 101 | def change(event) 102 | @result << "C#{event.old_position}#{event.new_position} " 103 | end 104 | end 105 | cb.reset 106 | cb 107 | end 108 | 109 | def setup 110 | @seq1 = %w(a b c e h j l m n p) 111 | @seq2 = %w(b c d e f j k l m r s t) 112 | 113 | @correct_lcs = %w(b c e j l m) 114 | 115 | @skipped_seq1 = 'a h n p' 116 | @skipped_seq2 = 'd f k r s t' 117 | 118 | correct_diff = [ 119 | [ [ '-', 0, 'a' ] ], 120 | [ [ '+', 2, 'd' ] ], 121 | [ [ '-', 4, 'h' ], 122 | [ '+', 4, 'f' ] ], 123 | [ [ '+', 6, 'k' ] ], 124 | [ [ '-', 8, 'n' ], 125 | [ '-', 9, 'p' ], 126 | [ '+', 9, 'r' ], 127 | [ '+', 10, 's' ], 128 | [ '+', 11, 't' ] ] ] 129 | @correct_diff = __map_diffs(correct_diff, Diff::LCS::Change) 130 | end 131 | end 132 | 133 | class TestLCS < Test::Unit::TestCase 134 | include Diff::LCS::Tests 135 | 136 | def test_lcs 137 | res = ares = bres = nil 138 | assert_nothing_raised { res = Diff::LCS.__lcs(@seq1, @seq2) } 139 | # The result of the LCS (less the +nil+ values) must be as long as the 140 | # correct result. 141 | assert_equal(res.compact.size, @correct_lcs.size) 142 | res.each_with_index { |ee, ii| assert(ee.nil? || (@seq1[ii] == @seq2[ee])) } 143 | assert_nothing_raised { ares = (0...res.size).map { |ii| res[ii] ? @seq1[ii] : nil } } 144 | assert_nothing_raised { bres = (0...res.size).map { |ii| res[ii] ? @seq2[res[ii]] : nil } } 145 | assert_equal(@correct_lcs, ares.compact) 146 | assert_equal(@correct_lcs, bres.compact) 147 | assert_nothing_raised { res = Diff::LCS.LCS(@seq1, @seq2) } 148 | assert_equal(res.compact, @correct_lcs) 149 | end 150 | end 151 | 152 | class TestSequences < Test::Unit::TestCase 153 | include Diff::LCS::Tests 154 | 155 | def test_sequences 156 | callbacks = nil 157 | assert_nothing_raised do 158 | callbacks = __simple_callbacks 159 | class << callbacks 160 | undef :finished_a 161 | undef :finished_b 162 | end 163 | Diff::LCS.traverse_sequences(@seq1, @seq2, callbacks) 164 | end 165 | assert_equal(@correct_lcs.size, callbacks.matched_a.size) 166 | assert_equal(@correct_lcs.size, callbacks.matched_b.size) 167 | assert_equal(@skipped_seq1, callbacks.discards_a.join(" ")) 168 | assert_equal(@skipped_seq2, callbacks.discards_b.join(" ")) 169 | assert_nothing_raised do 170 | callbacks = __simple_callbacks 171 | Diff::LCS.traverse_sequences(@seq1, @seq2, callbacks) 172 | end 173 | assert_equal(@correct_lcs.size, callbacks.matched_a.size) 174 | assert_equal(@correct_lcs.size, callbacks.matched_b.size) 175 | assert_equal(@skipped_seq1, callbacks.discards_a.join(" ")) 176 | assert_equal(@skipped_seq2, callbacks.discards_b.join(" ")) 177 | assert_equal(9, callbacks.done_a[0][1]) 178 | assert_nil(callbacks.done_b[0]) 179 | 180 | # seqw = %w(abcd efgh ijkl mnopqrstuvwxyz) 181 | # assert_nothing_raised do 182 | # callbacks = __simple_callbacks 183 | # class << callbacks 184 | # undef :finished_a 185 | # undef :finished_b 186 | # end 187 | # Diff::LCS.traverse_sequences(seqw, [], callbacks) 188 | # end 189 | end 190 | 191 | def test_diff 192 | diff = nil 193 | assert_nothing_raised { diff = Diff::LCS.diff(@seq1, @seq2) } 194 | assert_equal(__format_diffs(@correct_diff), __format_diffs(diff)) 195 | assert_equal(@correct_diff, diff) 196 | end 197 | 198 | def test_diff_empty 199 | seqw = %w(abcd efgh ijkl mnopqrstuvwxyz) 200 | correct_diff = [ 201 | [ [ '-', 0, 'abcd' ], 202 | [ '-', 1, 'efgh' ], 203 | [ '-', 2, 'ijkl' ], 204 | [ '-', 3, 'mnopqrstuvwxyz' ] ] ] 205 | diff = nil 206 | 207 | assert_nothing_raised { diff = Diff::LCS.diff(seqw, []) } 208 | assert_equal(__format_diffs(correct_diff), __format_diffs(diff)) 209 | 210 | correct_diff = [ 211 | [ [ '+', 0, 'abcd' ], 212 | [ '+', 1, 'efgh' ], 213 | [ '+', 2, 'ijkl' ], 214 | [ '+', 3, 'mnopqrstuvwxyz' ] ] ] 215 | assert_nothing_raised { diff = Diff::LCS.diff([], seqw) } 216 | assert_equal(__format_diffs(correct_diff), __format_diffs(diff)) 217 | end 218 | end 219 | 220 | class TestBalanced < Test::Unit::TestCase 221 | include Diff::LCS::Tests 222 | 223 | def test_sdiff_a 224 | sdiff = nil 225 | seq1 = %w(abc def yyy xxx ghi jkl) 226 | seq2 = %w(abc dxf xxx ghi jkl) 227 | correct_sdiff = [ 228 | [ '=', [ 0, 'abc' ], [ 0, 'abc' ] ], 229 | [ '!', [ 1, 'def' ], [ 1, 'dxf' ] ], 230 | [ '-', [ 2, 'yyy' ], [ 2, nil ] ], 231 | [ '=', [ 3, 'xxx' ], [ 2, 'xxx' ] ], 232 | [ '=', [ 4, 'ghi' ], [ 3, 'ghi' ] ], 233 | [ '=', [ 5, 'jkl' ], [ 4, 'jkl' ] ] ] 234 | correct_sdiff = __map_diffs(correct_sdiff) 235 | assert_nothing_raised { sdiff = Diff::LCS.sdiff(seq1, seq2) } 236 | assert_equal(correct_sdiff, sdiff) 237 | end 238 | 239 | def test_sdiff_b 240 | sdiff = nil 241 | correct_sdiff = [ 242 | [ '-', [ 0, 'a' ], [ 0, nil ] ], 243 | [ '=', [ 1, 'b' ], [ 0, 'b' ] ], 244 | [ '=', [ 2, 'c' ], [ 1, 'c' ] ], 245 | [ '+', [ 3, nil ], [ 2, 'd' ] ], 246 | [ '=', [ 3, 'e' ], [ 3, 'e' ] ], 247 | [ '!', [ 4, 'h' ], [ 4, 'f' ] ], 248 | [ '=', [ 5, 'j' ], [ 5, 'j' ] ], 249 | [ '+', [ 6, nil ], [ 6, 'k' ] ], 250 | [ '=', [ 6, 'l' ], [ 7, 'l' ] ], 251 | [ '=', [ 7, 'm' ], [ 8, 'm' ] ], 252 | [ '!', [ 8, 'n' ], [ 9, 'r' ] ], 253 | [ '!', [ 9, 'p' ], [ 10, 's' ] ], 254 | [ '+', [ 10, nil ], [ 11, 't' ] ] ] 255 | correct_sdiff = __map_diffs(correct_sdiff) 256 | assert_nothing_raised { sdiff = Diff::LCS.sdiff(@seq1, @seq2) } 257 | assert_equal(correct_sdiff, sdiff) 258 | end 259 | 260 | def test_sdiff_c 261 | sdiff = nil 262 | seq1 = %w(a b c d e) 263 | seq2 = %w(a e) 264 | correct_sdiff = [ 265 | [ '=', [ 0, 'a' ], [ 0, 'a' ] ], 266 | [ '-', [ 1, 'b' ], [ 1, nil ] ], 267 | [ '-', [ 2, 'c' ], [ 1, nil ] ], 268 | [ '-', [ 3, 'd' ], [ 1, nil ] ], 269 | [ '=', [ 4, 'e' ], [ 1, 'e' ] ] ] 270 | correct_sdiff = __map_diffs(correct_sdiff) 271 | assert_nothing_raised { sdiff = Diff::LCS.sdiff(seq1, seq2) } 272 | assert_equal(correct_sdiff, sdiff) 273 | end 274 | 275 | def test_sdiff_d 276 | sdiff = nil 277 | seq1 = %w(a e) 278 | seq2 = %w(a b c d e) 279 | correct_sdiff = [ 280 | [ '=', [ 0, 'a' ], [ 0, 'a' ] ], 281 | [ '+', [ 1, nil ], [ 1, 'b' ] ], 282 | [ '+', [ 1, nil ], [ 2, 'c' ] ], 283 | [ '+', [ 1, nil ], [ 3, 'd' ] ], 284 | [ '=', [ 1, 'e' ], [ 4, 'e' ] ] ] 285 | correct_sdiff = __map_diffs(correct_sdiff) 286 | assert_nothing_raised { sdiff = Diff::LCS.sdiff(seq1, seq2) } 287 | assert_equal(correct_sdiff, sdiff) 288 | end 289 | 290 | def test_sdiff_e 291 | sdiff = nil 292 | seq1 = %w(v x a e) 293 | seq2 = %w(w y a b c d e) 294 | correct_sdiff = [ 295 | [ '!', [ 0, 'v' ], [ 0, 'w' ] ], 296 | [ '!', [ 1, 'x' ], [ 1, 'y' ] ], 297 | [ '=', [ 2, 'a' ], [ 2, 'a' ] ], 298 | [ '+', [ 3, nil ], [ 3, 'b' ] ], 299 | [ '+', [ 3, nil ], [ 4, 'c' ] ], 300 | [ '+', [ 3, nil ], [ 5, 'd' ] ], 301 | [ '=', [ 3, 'e' ], [ 6, 'e' ] ] ] 302 | correct_sdiff = __map_diffs(correct_sdiff) 303 | assert_nothing_raised { sdiff = Diff::LCS.sdiff(seq1, seq2) } 304 | assert_equal(correct_sdiff, sdiff) 305 | end 306 | 307 | def test_sdiff_f 308 | sdiff = nil 309 | seq1 = %w(x a e) 310 | seq2 = %w(a b c d e) 311 | correct_sdiff = [ 312 | [ '-', [ 0, 'x' ], [ 0, nil ] ], 313 | [ '=', [ 1, 'a' ], [ 0, 'a' ] ], 314 | [ '+', [ 2, nil ], [ 1, 'b' ] ], 315 | [ '+', [ 2, nil ], [ 2, 'c' ] ], 316 | [ '+', [ 2, nil ], [ 3, 'd' ] ], 317 | [ '=', [ 2, 'e' ], [ 4, 'e' ] ] ] 318 | correct_sdiff = __map_diffs(correct_sdiff) 319 | assert_nothing_raised { sdiff = Diff::LCS.sdiff(seq1, seq2) } 320 | assert_equal(correct_sdiff, sdiff) 321 | end 322 | 323 | def test_sdiff_g 324 | sdiff = nil 325 | seq1 = %w(a e) 326 | seq2 = %w(x a b c d e) 327 | correct_sdiff = [ 328 | [ '+', [ 0, nil ], [ 0, 'x' ] ], 329 | [ '=', [ 0, 'a' ], [ 1, 'a' ] ], 330 | [ '+', [ 1, nil ], [ 2, 'b' ] ], 331 | [ '+', [ 1, nil ], [ 3, 'c' ] ], 332 | [ '+', [ 1, nil ], [ 4, 'd' ] ], 333 | [ '=', [ 1, 'e' ], [ 5, 'e' ] ] ] 334 | correct_sdiff = __map_diffs(correct_sdiff) 335 | assert_nothing_raised { sdiff = Diff::LCS.sdiff(seq1, seq2) } 336 | assert_equal(correct_sdiff, sdiff) 337 | end 338 | 339 | def test_sdiff_h 340 | sdiff = nil 341 | seq1 = %w(a e v) 342 | seq2 = %w(x a b c d e w x) 343 | correct_sdiff = [ 344 | [ '+', [ 0, nil ], [ 0, 'x' ] ], 345 | [ '=', [ 0, 'a' ], [ 1, 'a' ] ], 346 | [ '+', [ 1, nil ], [ 2, 'b' ] ], 347 | [ '+', [ 1, nil ], [ 3, 'c' ] ], 348 | [ '+', [ 1, nil ], [ 4, 'd' ] ], 349 | [ '=', [ 1, 'e' ], [ 5, 'e' ] ], 350 | [ '!', [ 2, 'v' ], [ 6, 'w' ] ], 351 | [ '+', [ 3, nil ], [ 7, 'x' ] ] ] 352 | correct_sdiff = __map_diffs(correct_sdiff) 353 | assert_nothing_raised { sdiff = Diff::LCS.sdiff(seq1, seq2) } 354 | assert_equal(correct_sdiff, sdiff) 355 | end 356 | 357 | def test_sdiff_i 358 | sdiff = nil 359 | seq1 = %w() 360 | seq2 = %w(a b c) 361 | correct_sdiff = [ 362 | [ '+', [ 0, nil ], [ 0, 'a' ] ], 363 | [ '+', [ 0, nil ], [ 1, 'b' ] ], 364 | [ '+', [ 0, nil ], [ 2, 'c' ] ] ] 365 | correct_sdiff = __map_diffs(correct_sdiff) 366 | assert_nothing_raised { sdiff = Diff::LCS.sdiff(seq1, seq2) } 367 | assert_equal(correct_sdiff, sdiff) 368 | end 369 | 370 | def test_sdiff_j 371 | sdiff = nil 372 | seq1 = %w(a b c) 373 | seq2 = %w() 374 | correct_sdiff = [ 375 | [ '-', [ 0, 'a' ], [ 0, nil ] ], 376 | [ '-', [ 1, 'b' ], [ 0, nil ] ], 377 | [ '-', [ 2, 'c' ], [ 0, nil ] ] ] 378 | correct_sdiff = __map_diffs(correct_sdiff) 379 | assert_nothing_raised { sdiff = Diff::LCS.sdiff(seq1, seq2) } 380 | assert_equal(correct_sdiff, sdiff) 381 | end 382 | 383 | def test_sdiff_k 384 | sdiff = nil 385 | seq1 = %w(a b c) 386 | seq2 = %w(1) 387 | correct_sdiff = [ 388 | [ '!', [ 0, 'a' ], [ 0, '1' ] ], 389 | [ '-', [ 1, 'b' ], [ 1, nil ] ], 390 | [ '-', [ 2, 'c' ], [ 1, nil ] ] ] 391 | correct_sdiff = __map_diffs(correct_sdiff) 392 | assert_nothing_raised { sdiff = Diff::LCS.sdiff(seq1, seq2) } 393 | assert_equal(correct_sdiff, sdiff) 394 | end 395 | 396 | def test_sdiff_l 397 | sdiff = nil 398 | seq1 = %w(a b c) 399 | seq2 = %w(c) 400 | correct_sdiff = [ 401 | [ '-', [ 0, 'a' ], [ 0, nil ] ], 402 | [ '-', [ 1, 'b' ], [ 0, nil ] ], 403 | [ '=', [ 2, 'c' ], [ 0, 'c' ] ] 404 | ] 405 | correct_sdiff = __map_diffs(correct_sdiff) 406 | assert_nothing_raised { sdiff = Diff::LCS.sdiff(seq1, seq2) } 407 | assert_equal(correct_sdiff, sdiff) 408 | end 409 | 410 | def test_sdiff_m 411 | sdiff = nil 412 | seq1 = %w(abcd efgh ijkl mnop) 413 | seq2 = [] 414 | correct_sdiff = [ 415 | [ '-', [ 0, 'abcd' ], [ 0, nil ] ], 416 | [ '-', [ 1, 'efgh' ], [ 0, nil ] ], 417 | [ '-', [ 2, 'ijkl' ], [ 0, nil ] ], 418 | [ '-', [ 3, 'mnop' ], [ 0, nil ] ] 419 | ] 420 | correct_sdiff = __map_diffs(correct_sdiff) 421 | assert_nothing_raised { sdiff = Diff::LCS.sdiff(seq1, seq2) } 422 | assert_equal(correct_sdiff, sdiff) 423 | end 424 | 425 | def test_sdiff_n 426 | sdiff = nil 427 | seq1 = [] 428 | seq2 = %w(abcd efgh ijkl mnop) 429 | correct_sdiff = [ 430 | [ '+', [ 0, nil ], [ 0, 'abcd' ] ], 431 | [ '+', [ 0, nil ], [ 1, 'efgh' ] ], 432 | [ '+', [ 0, nil ], [ 2, 'ijkl' ] ], 433 | [ '+', [ 0, nil ], [ 3, 'mnop' ] ] 434 | ] 435 | correct_sdiff = __map_diffs(correct_sdiff) 436 | assert_nothing_raised { sdiff = Diff::LCS.sdiff(seq1, seq2) } 437 | assert_equal(correct_sdiff, sdiff) 438 | end 439 | 440 | def test_balanced_a 441 | seq1 = %w(a b c) 442 | seq2 = %w(a x c) 443 | callback = nil 444 | assert_nothing_raised { callback = __balanced_callback } 445 | assert_nothing_raised { Diff::LCS.traverse_balanced(seq1, seq2, callback) } 446 | assert_equal("M00 C11 M22 ", callback.result) 447 | end 448 | 449 | def test_balanced_b 450 | seq1 = %w(a b c) 451 | seq2 = %w(a x c) 452 | callback = nil 453 | assert_nothing_raised do 454 | callback = __balanced_callback 455 | class << callback 456 | undef change 457 | end 458 | end 459 | assert_nothing_raised { Diff::LCS.traverse_balanced(seq1, seq2, callback) } 460 | assert_equal("M00 DA11 DB21 M22 ", callback.result) 461 | end 462 | 463 | def test_balanced_c 464 | seq1 = %w(a x y c) 465 | seq2 = %w(a v w c) 466 | callback = nil 467 | assert_nothing_raised { callback = __balanced_callback } 468 | assert_nothing_raised { Diff::LCS.traverse_balanced(seq1, seq2, callback) } 469 | assert_equal("M00 C11 C22 M33 ", callback.result) 470 | end 471 | 472 | def test_balanced_d 473 | seq1 = %w(x y c) 474 | seq2 = %w(v w c) 475 | callback = nil 476 | assert_nothing_raised { callback = __balanced_callback } 477 | assert_nothing_raised { Diff::LCS.traverse_balanced(seq1, seq2, callback) } 478 | assert_equal("C00 C11 M22 ", callback.result) 479 | end 480 | 481 | def test_balanced_e 482 | seq1 = %w(a x y z) 483 | seq2 = %w(b v w) 484 | callback = nil 485 | assert_nothing_raised { callback = __balanced_callback } 486 | assert_nothing_raised { Diff::LCS.traverse_balanced(seq1, seq2, callback) } 487 | assert_equal("C00 C11 C22 DA33 ", callback.result) 488 | end 489 | 490 | def test_balanced_f 491 | seq1 = %w(a z) 492 | seq2 = %w(a) 493 | callback = nil 494 | assert_nothing_raised { callback = __balanced_callback } 495 | assert_nothing_raised { Diff::LCS.traverse_balanced(seq1, seq2, callback) } 496 | assert_equal("M00 DA11 ", callback.result) 497 | end 498 | 499 | def test_balanced_g 500 | seq1 = %w(z a) 501 | seq2 = %w(a) 502 | callback = nil 503 | assert_nothing_raised { callback = __balanced_callback } 504 | assert_nothing_raised { Diff::LCS.traverse_balanced(seq1, seq2, callback) } 505 | assert_equal("DA00 M10 ", callback.result) 506 | end 507 | 508 | def test_balanced_h 509 | seq1 = %w(a b c) 510 | seq2 = %w(x y z) 511 | callback = nil 512 | assert_nothing_raised { callback = __balanced_callback } 513 | assert_nothing_raised { Diff::LCS.traverse_balanced(seq1, seq2, callback) } 514 | assert_equal("C00 C11 C22 ", callback.result) 515 | end 516 | 517 | def test_balanced_i 518 | seq1 = %w(abcd efgh ijkl mnopqrstuvwxyz) 519 | seq2 = [] 520 | callback = nil 521 | assert_nothing_raised { callback = __balanced_callback } 522 | assert_nothing_raised { Diff::LCS.traverse_balanced(seq1, seq2, callback) } 523 | assert_equal("DA00 DA10 DA20 DA30 ", callback.result) 524 | end 525 | 526 | def test_balanced_j 527 | seq1 = [] 528 | seq2 = %w(abcd efgh ijkl mnopqrstuvwxyz) 529 | callback = nil 530 | assert_nothing_raised { callback = __balanced_callback } 531 | assert_nothing_raised { Diff::LCS.traverse_balanced(seq1, seq2, callback) } 532 | assert_equal("DB00 DB01 DB02 DB03 ", callback.result) 533 | end 534 | end 535 | 536 | class TestPatching < Test::Unit::TestCase 537 | include Diff::LCS::Tests 538 | 539 | def test_patch_diff 540 | ps = ms1 = ms2 = ms3 = nil 541 | assert_nothing_raised do 542 | ps = Diff::LCS.diff(@seq1, @seq2) 543 | ms1 = Diff::LCS.patch(@seq1, ps) 544 | ms2 = Diff::LCS.patch(@seq2, ps, :unpatch) 545 | ms3 = Diff::LCS.patch(@seq2, ps) 546 | end 547 | assert_equal(@seq2, ms1) 548 | assert_equal(@seq1, ms2) 549 | assert_equal(@seq1, ms3) 550 | assert_nothing_raised do 551 | ps = Diff::LCS.diff(@seq1, @seq2, Diff::LCS::ContextDiffCallbacks) 552 | ms1 = Diff::LCS.patch(@seq1, ps) 553 | ms2 = Diff::LCS.patch(@seq2, ps, :unpatch) 554 | ms2 = Diff::LCS.patch(@seq2, ps) 555 | end 556 | assert_equal(@seq2, ms1) 557 | assert_equal(@seq1, ms2) 558 | assert_equal(@seq1, ms3) 559 | assert_nothing_raised do 560 | ps = Diff::LCS.diff(@seq1, @seq2, Diff::LCS::SDiffCallbacks) 561 | ms1 = Diff::LCS.patch(@seq1, ps) 562 | ms2 = Diff::LCS.patch(@seq2, ps, :unpatch) 563 | ms3 = Diff::LCS.patch(@seq2, ps) 564 | end 565 | assert_equal(@seq2, ms1) 566 | assert_equal(@seq1, ms2) 567 | assert_equal(@seq1, ms3) 568 | end 569 | 570 | # Tests patch bug #891: 571 | # http://rubyforge.org/tracker/?func=detail&atid=407&aid=891&group_id=84 572 | def test_patch_bug891 573 | s1 = s2 = s3 = s4 = s5 = ps = nil 574 | assert_nothing_raised do 575 | s1 = %w{a b c d e f g h i j k } 576 | s2 = %w{a b c d D e f g h i j k } 577 | ps = Diff::LCS::diff(s1, s2) 578 | s3 = Diff::LCS.patch(s1, ps, :patch) 579 | ps = Diff::LCS::diff(s1, s2, Diff::LCS::ContextDiffCallbacks) 580 | s4 = Diff::LCS.patch(s1, ps, :patch) 581 | ps = Diff::LCS::diff(s1, s2, Diff::LCS::SDiffCallbacks) 582 | s5 = Diff::LCS.patch(s1, ps, :patch) 583 | end 584 | assert_equal(s2, s3) 585 | assert_equal(s2, s4) 586 | assert_equal(s2, s5) 587 | 588 | assert_nothing_raised do 589 | ps = Diff::LCS::sdiff(s1, s2) 590 | s3 = Diff::LCS.patch(s1, ps, :patch) 591 | ps = Diff::LCS::diff(s1, s2, Diff::LCS::ContextDiffCallbacks) 592 | s4 = Diff::LCS.patch(s1, ps, :patch) 593 | ps = Diff::LCS::diff(s1, s2, Diff::LCS::DiffCallbacks) 594 | s5 = Diff::LCS.patch(s1, ps, :patch) 595 | end 596 | assert_equal(s2, s3) 597 | assert_equal(s2, s4) 598 | assert_equal(s2, s5) 599 | end 600 | 601 | def test_patch_sdiff 602 | ps = ms1 = ms2 = ms3 = nil 603 | assert_nothing_raised do 604 | ps = Diff::LCS.sdiff(@seq1, @seq2) 605 | ms1 = Diff::LCS.patch(@seq1, ps) 606 | ms2 = Diff::LCS.patch(@seq2, ps, :unpatch) 607 | ms3 = Diff::LCS.patch(@seq2, ps) 608 | end 609 | assert_equal(@seq2, ms1) 610 | assert_equal(@seq1, ms2) 611 | assert_equal(@seq1, ms3) 612 | assert_nothing_raised do 613 | ps = Diff::LCS.sdiff(@seq1, @seq2, Diff::LCS::ContextDiffCallbacks) 614 | ms1 = Diff::LCS.patch(@seq1, ps) 615 | ms2 = Diff::LCS.patch(@seq2, ps, :unpatch) 616 | ms3 = Diff::LCS.patch(@seq2, ps) 617 | end 618 | assert_equal(@seq2, ms1) 619 | assert_equal(@seq1, ms2) 620 | assert_equal(@seq1, ms3) 621 | assert_nothing_raised do 622 | ps = Diff::LCS.sdiff(@seq1, @seq2, Diff::LCS::DiffCallbacks) 623 | ms1 = Diff::LCS.patch(@seq1, ps) 624 | ms2 = Diff::LCS.patch(@seq2, ps, :unpatch) 625 | ms3 = Diff::LCS.patch(@seq2, ps) 626 | end 627 | assert_equal(@seq2, ms1) 628 | assert_equal(@seq1, ms2) 629 | assert_equal(@seq1, ms3) 630 | end 631 | end 632 | -------------------------------------------------------------------------------- /lib/diff/lcs.rb: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2004 Austin Ziegler 2 | # Copyright (c) 2010 Gioele Barabucci 3 | # 4 | # This program is free software. It may be redistributed and/or modified 5 | # under the terms of the GPL version 2 (or later), the Perl Artistic 6 | # licence, or the Ruby licence. 7 | 8 | require 'diff/lcs/version' 9 | 10 | module Diff 11 | # Computes "intelligent" differences between two sequenced Enumerables. 12 | # This is an implementation of the McIlroy-Hunt "diff" algorithm for 13 | # Enumerable objects that include Diffable. 14 | # 15 | # Based on Mario I. Wolczko's Smalltalk version 16 | # (1.2, 1993) and Ned Konz's Perl version 17 | # (Algorithm::Diff). 18 | # 19 | # == Synopsis 20 | # require 'diff/lcs' 21 | # 22 | # seq1 = %w(a b c e h j l m n p) 23 | # seq2 = %w(b c d e f j k l m r s t) 24 | # 25 | # lcs = Diff::LCS.LCS(seq1, seq2) 26 | # diffs = Diff::LCS.diff(seq1, seq2) 27 | # sdiff = Diff::LCS.sdiff(seq1, seq2) 28 | # seq = Diff::LCS.traverse_sequences(seq1, seq2, callback_obj) 29 | # bal = Diff::LCS.traverse_balanced(seq1, seq2, callback_obj) 30 | # seq2 == Diff::LCS.patch(seq1, diffs) 31 | # seq2 == Diff::LCS.patch!(seq1, diffs) 32 | # seq1 == Diff::LCS.unpatch(seq2, diffs) 33 | # seq1 == Diff::LCS.unpatch!(seq2, diffs) 34 | # seq2 == Diff::LCS.patch(seq1, sdiff) 35 | # seq2 == Diff::LCS.patch!(seq1, sdiff) 36 | # seq1 == Diff::LCS.unpatch(seq2, sdiff) 37 | # seq1 == Diff::LCS.unpatch!(seq2, sdiff) 38 | # 39 | # Alternatively, objects can be extended with Diff::LCS: 40 | # 41 | # seq1.extend(Diff::LCS) 42 | # lcs = seq1.lcs(seq2) 43 | # diffs = seq1.diff(seq2) 44 | # sdiff = seq1.sdiff(seq2) 45 | # seq = seq1.traverse_sequences(seq2, callback_obj) 46 | # bal = seq1.traverse_balanced(seq2, callback_obj) 47 | # seq2 == seq1.patch(diffs) 48 | # seq2 == seq1.patch!(diffs) 49 | # seq1 == seq2.unpatch(diffs) 50 | # seq1 == seq2.unpatch!(diffs) 51 | # seq2 == seq1.patch(sdiff) 52 | # seq2 == seq1.patch!(sdiff) 53 | # seq1 == seq2.unpatch(sdiff) 54 | # seq1 == seq2.unpatch!(sdiff) 55 | # 56 | # Default extensions are provided for Array and String objects through 57 | # the use of 'diff/lcs/array' and 'diff/lcs/string'. 58 | # 59 | # == Introduction (by Mark-Jason Dominus) 60 | # 61 | # The following text is from the Perl documentation. The only 62 | # changes have been to make the text appear better in Rdoc. 63 | # 64 | # I once read an article written by the authors of +diff+; they said 65 | # that they hard worked very hard on the algorithm until they found the 66 | # right one. 67 | # 68 | # I think what they ended up using (and I hope someone will correct me, 69 | # because I am not very confident about this) was the `longest common 70 | # subsequence' method. In the LCS problem, you have two sequences of 71 | # items: 72 | # 73 | # a b c d f g h j q z 74 | # a b c d e f g i j k r x y z 75 | # 76 | # and you want to find the longest sequence of items that is present in 77 | # both original sequences in the same order. That is, you want to find a 78 | # new sequence *S* which can be obtained from the first sequence by 79 | # deleting some items, and from the second sequence by deleting other 80 | # items. You also want *S* to be as long as possible. In this case *S* 81 | # is: 82 | # 83 | # a b c d f g j z 84 | # 85 | # From there it's only a small step to get diff-like output: 86 | # 87 | # e h i k q r x y 88 | # + - + + - + + + 89 | # 90 | # This module solves the LCS problem. It also includes a canned function 91 | # to generate +diff+-like output. 92 | # 93 | # It might seem from the example above that the LCS of two sequences is 94 | # always pretty obvious, but that's not always the case, especially when 95 | # the two sequences have many repeated elements. For example, consider 96 | # 97 | # a x b y c z p d q 98 | # a b c a x b y c z 99 | # 100 | # A naive approach might start by matching up the +a+ and +b+ that 101 | # appear at the beginning of each sequence, like this: 102 | # 103 | # a x b y c z p d q 104 | # a b c a b y c z 105 | # 106 | # This finds the common subsequence +a b c z+. But actually, the LCS is 107 | # +a x b y c z+: 108 | # 109 | # a x b y c z p d q 110 | # a b c a x b y c z 111 | # 112 | # == Author 113 | # This version is by Austin Ziegler . 114 | # 115 | # It is based on the Perl Algorithm::Diff by Ned Konz 116 | # , copyright © 2000 - 2002 and the Smalltalk 117 | # diff version by Mario I. Wolczko , copyright © 118 | # 1993. Documentation includes work by Mark-Jason Dominus. 119 | # 120 | # == Licence 121 | # Copyright © 2004 Austin Ziegler 122 | # This program is free software; you can redistribute it and/or modify it 123 | # under the same terms as Ruby, or alternatively under the Perl Artistic 124 | # licence. 125 | # 126 | # == Credits 127 | # Much of the documentation is taken directly from the Perl 128 | # Algorithm::Diff implementation and was written originally by Mark-Jason 129 | # Dominus and later by Ned Konz. The basic Ruby 130 | # implementation was re-ported from the Smalltalk implementation, available 131 | # at ftp://st.cs.uiuc.edu/pub/Smalltalk/MANCHESTER/manchester/4.0/diff.st 132 | # 133 | # #sdiff and #traverse_balanced were written for the Perl version by Mike 134 | # Schilli . 135 | # 136 | # "The algorithm is described in A Fast Algorithm for Computing Longest 137 | # Common Subsequences, CACM, vol.20, no.5, pp.350-353, May 1977, with 138 | # a few minor improvements to improve the speed." 139 | module LCS 140 | end 141 | end 142 | 143 | require 'diff/lcs/callbacks' 144 | 145 | module Diff::LCS 146 | # Returns an Array containing the longest common subsequence(s) between 147 | # +self+ and +other+. See Diff::LCS#LCS. 148 | # 149 | # lcs = seq1.lcs(seq2) 150 | def lcs(other, &block) #:yields self[ii] if there are matched subsequences: 151 | Diff::LCS.LCS(self, other, &block) 152 | end 153 | 154 | # Returns the difference set between +self+ and +other+. See 155 | # Diff::LCS#diff. 156 | def diff(other, callbacks = nil, &block) 157 | Diff::LCS::diff(self, other, callbacks, &block) 158 | end 159 | 160 | # Returns the balanced ("side-by-side") difference set between +self+ and 161 | # +other+. See Diff::LCS#sdiff. 162 | def sdiff(other, callbacks = nil, &block) 163 | Diff::LCS::sdiff(self, other, callbacks, &block) 164 | end 165 | 166 | # Traverses the discovered longest common subsequences between +self+ and 167 | # +other+. See Diff::LCS#traverse_sequences. 168 | def traverse_sequences(other, callbacks = nil, &block) 169 | traverse_sequences(self, other, callbacks || Diff::LCS::YieldingCallbacks, 170 | &block) 171 | end 172 | 173 | # Traverses the discovered longest common subsequences between +self+ and 174 | # +other+ using the alternate, balanced algorithm. See 175 | # Diff::LCS#traverse_balanced. 176 | def traverse_balanced(other, callbacks = nil, &block) 177 | traverse_balanced(self, other, callbacks || Diff::LCS::YieldingCallbacks, 178 | &block) 179 | end 180 | 181 | # Attempts to patch a copy of +self+ with the provided +patchset+. See 182 | # Diff::LCS#patch. 183 | def patch(patchset) 184 | Diff::LCS::patch(self.dup, patchset) 185 | end 186 | 187 | # Attempts to unpatch a copy of +self+ with the provided +patchset+. 188 | # See Diff::LCS#patch. 189 | def unpatch(patchset) 190 | Diff::LCS::unpatch(self.dup, patchset) 191 | end 192 | 193 | # Attempts to patch +self+ with the provided +patchset+. See 194 | # Diff::LCS#patch!. Does no autodiscovery. 195 | def patch!(patchset) 196 | Diff::LCS::patch!(self, patchset) 197 | end 198 | 199 | # Attempts to unpatch +self+ with the provided +patchset+. See 200 | # Diff::LCS#unpatch. Does no autodiscovery. 201 | def unpatch!(patchset) 202 | Diff::LCS::unpatch!(self, patchset) 203 | end 204 | end 205 | 206 | module Diff::LCS 207 | class << self 208 | # Given two sequenced Enumerables, LCS returns an Array containing their 209 | # longest common subsequences. 210 | # 211 | # lcs = Diff::LCS.LCS(seq1, seq2) 212 | # 213 | # This array whose contents is such that: 214 | # 215 | # lcs.each_with_index do |ee, ii| 216 | # assert(ee.nil? || (seq1[ii] == seq2[ee])) 217 | # end 218 | # 219 | # If a block is provided, the matching subsequences will be yielded from 220 | # +seq1+ in turn and may be modified before they are placed into the 221 | # returned Array of subsequences. 222 | def LCS(seq1, seq2, &block) #:yields seq1[ii] for each matched: 223 | matches = Diff::LCS.__lcs(seq1, seq2) 224 | ret = [] 225 | matches.each_with_index do |ee, ii| 226 | unless matches[ii].nil? 227 | if block_given? 228 | ret << (yield seq1[ii]) 229 | else 230 | ret << seq1[ii] 231 | end 232 | end 233 | end 234 | ret 235 | end 236 | 237 | # Diff::LCS.diff computes the smallest set of additions and deletions 238 | # necessary to turn the first sequence into the second, and returns a 239 | # description of these changes. 240 | # 241 | # See Diff::LCS::DiffCallbacks for the default behaviour. An alternate 242 | # behaviour may be implemented with Diff::LCS::ContextDiffCallbacks. 243 | # If a Class argument is provided for +callbacks+, #diff will attempt 244 | # to initialise it. If the +callbacks+ object (possibly initialised) 245 | # responds to #finish, it will be called. 246 | def diff(seq1, seq2, callbacks = nil, &block) # :yields diff changes: 247 | callbacks ||= Diff::LCS::DiffCallbacks 248 | if callbacks.kind_of?(Class) 249 | cb = callbacks.new rescue callbacks 250 | callbacks = cb 251 | end 252 | traverse_sequences(seq1, seq2, callbacks) 253 | callbacks.finish if callbacks.respond_to?(:finish) 254 | 255 | if block_given? 256 | res = callbacks.diffs.map do |hunk| 257 | if hunk.kind_of?(Array) 258 | hunk = hunk.map { |hunk_block| yield hunk_block } 259 | else 260 | yield hunk 261 | end 262 | end 263 | res 264 | else 265 | callbacks.diffs 266 | end 267 | end 268 | 269 | # Diff::LCS.sdiff computes all necessary components to show two sequences 270 | # and their minimized differences side by side, just like the Unix 271 | # utility sdiff does: 272 | # 273 | # old < - 274 | # same same 275 | # before | after 276 | # - > new 277 | # 278 | # See Diff::LCS::SDiffCallbacks for the default behaviour. An alternate 279 | # behaviour may be implemented with Diff::LCS::ContextDiffCallbacks. If 280 | # a Class argument is provided for +callbacks+, #diff will attempt to 281 | # initialise it. If the +callbacks+ object (possibly initialised) 282 | # responds to #finish, it will be called. 283 | def sdiff(seq1, seq2, callbacks = nil, &block) #:yields diff changes: 284 | callbacks ||= Diff::LCS::SDiffCallbacks 285 | if callbacks.kind_of?(Class) 286 | cb = callbacks.new rescue callbacks 287 | callbacks = cb 288 | end 289 | traverse_balanced(seq1, seq2, callbacks) 290 | callbacks.finish if callbacks.respond_to?(:finish) 291 | 292 | if block_given? 293 | res = callbacks.diffs.map do |hunk| 294 | if hunk.kind_of?(Array) 295 | hunk = hunk.map { |hunk_block| yield hunk_block } 296 | else 297 | yield hunk 298 | end 299 | end 300 | res 301 | else 302 | callbacks.diffs 303 | end 304 | end 305 | 306 | # Diff::LCS.traverse_sequences is the most general facility provided by this 307 | # module; +diff+ and +LCS+ are implemented as calls to it. 308 | # 309 | # The arguments to #traverse_sequences are the two sequences to 310 | # traverse, and a callback object, like this: 311 | # 312 | # traverse_sequences(seq1, seq2, Diff::LCS::ContextDiffCallbacks.new) 313 | # 314 | # #diff is implemented with #traverse_sequences. 315 | # 316 | # == Callback Methods 317 | # Optional callback methods are emphasized. 318 | # 319 | # callbacks#match:: Called when +a+ and +b+ are pointing 320 | # to common elements in +A+ and +B+. 321 | # callbacks#discard_a:: Called when +a+ is pointing to an 322 | # element not in +B+. 323 | # callbacks#discard_b:: Called when +b+ is pointing to an 324 | # element not in +A+. 325 | # callbacks#finished_a:: Called when +a+ has reached the end of 326 | # sequence +A+. 327 | # callbacks#finished_b:: Called when +b+ has reached the end of 328 | # sequence +B+. 329 | # 330 | # == Algorithm 331 | # a---+ 332 | # v 333 | # A = a b c e h j l m n p 334 | # B = b c d e f j k l m r s t 335 | # ^ 336 | # b---+ 337 | # 338 | # If there are two arrows (+a+ and +b+) pointing to elements of 339 | # sequences +A+ and +B+, the arrows will initially point to the first 340 | # elements of their respective sequences. #traverse_sequences will 341 | # advance the arrows through the sequences one element at a time, 342 | # calling a method on the user-specified callback object before each 343 | # advance. It will advance the arrows in such a way that if there are 344 | # elements A[ii] and B[jj] which are both equal and 345 | # part of the longest common subsequence, there will be some moment 346 | # during the execution of #traverse_sequences when arrow +a+ is pointing 347 | # to A[ii] and arrow +b+ is pointing to B[jj]. When 348 | # this happens, #traverse_sequences will call callbacks#match 349 | # and then it will advance both arrows. 350 | # 351 | # Otherwise, one of the arrows is pointing to an element of its sequence 352 | # that is not part of the longest common subsequence. 353 | # #traverse_sequences will advance that arrow and will call 354 | # callbacks#discard_a or callbacks#discard_b, depending 355 | # on which arrow it advanced. If both arrows point to elements that are 356 | # not part of the longest common subsequence, then #traverse_sequences 357 | # will advance one of them and call the appropriate callback, but it is 358 | # not specified which it will call. 359 | # 360 | # The methods for callbacks#match, callbacks#discard_a, 361 | # and callbacks#discard_b are invoked with an event comprising 362 | # the action ("=", "+", or "-", respectively), the indicies +ii+ and 363 | # +jj+, and the elements A[ii] and B[jj]. Return 364 | # values are discarded by #traverse_sequences. 365 | # 366 | # === End of Sequences 367 | # If arrow +a+ reaches the end of its sequence before arrow +b+ does, 368 | # #traverse_sequence try to call callbacks#finished_a with the 369 | # last index and element of +A+ (A[-1]) and the current index 370 | # and element of +B+ (B[jj]). If callbacks#finished_a 371 | # does not exist, then callbacks#discard_b will be called on 372 | # each element of +B+ until the end of the sequence is reached (the call 373 | # will be done with A[-1] and B[jj] for each element). 374 | # 375 | # If +b+ reaches the end of +B+ before +a+ reaches the end of +A+, 376 | # callbacks#finished_b will be called with the current index 377 | # and element of +A+ (A[ii]) and the last index and element of 378 | # +B+ (A[-1]). Again, if callbacks#finished_b does not 379 | # exist on the callback object, then callbacks#discard_a will 380 | # be called on each element of +A+ until the end of the sequence is 381 | # reached (A[ii] and B[-1]). 382 | # 383 | # There is a chance that one additional callbacks#discard_a or 384 | # callbacks#discard_b will be called after the end of the 385 | # sequence is reached, if +a+ has not yet reached the end of +A+ or +b+ 386 | # has not yet reached the end of +B+. 387 | def traverse_sequences(seq1, seq2, callbacks = Diff::LCS::SequenceCallbacks, &block) #:yields change events: 388 | matches = Diff::LCS.__lcs(seq1, seq2) 389 | 390 | run_finished_a = run_finished_b = false 391 | string = seq1.kind_of?(String) 392 | 393 | a_size = seq1.size 394 | b_size = seq2.size 395 | ai = bj = 0 396 | 397 | (0 .. matches.size).each do |ii| 398 | b_line = matches[ii] 399 | 400 | ax = string ? seq1[ii, 1] : seq1[ii] 401 | bx = string ? seq2[bj, 1] : seq2[bj] 402 | 403 | if b_line.nil? 404 | unless ax.nil? 405 | event = Diff::LCS::ContextChange.new('-', ii, ax, bj, bx) 406 | event = yield event if block_given? 407 | callbacks.discard_a(event) 408 | end 409 | else 410 | loop do 411 | break unless bj < b_line 412 | bx = string ? seq2[bj, 1] : seq2[bj] 413 | event = Diff::LCS::ContextChange.new('+', ii, ax, bj, bx) 414 | event = yield event if block_given? 415 | callbacks.discard_b(event) 416 | bj += 1 417 | end 418 | bx = string ? seq2[bj, 1] : seq2[bj] 419 | event = Diff::LCS::ContextChange.new('=', ii, ax, bj, bx) 420 | event = yield event if block_given? 421 | callbacks.match(event) 422 | bj += 1 423 | end 424 | ai = ii 425 | end 426 | ai += 1 427 | 428 | # The last entry (if any) processed was a match. +ai+ and +bj+ point 429 | # just past the last matching lines in their sequences. 430 | while (ai < a_size) or (bj < b_size) 431 | # last A? 432 | if ai == a_size and bj < b_size 433 | if callbacks.respond_to?(:finished_a) and not run_finished_a 434 | ax = string ? seq1[-1, 1] : seq1[-1] 435 | bx = string ? seq2[bj, 1] : seq2[bj] 436 | event = Diff::LCS::ContextChange.new('>', (a_size - 1), ax, bj, bx) 437 | event = yield event if block_given? 438 | callbacks.finished_a(event) 439 | run_finished_a = true 440 | else 441 | ax = string ? seq1[ai, 1] : seq1[ai] 442 | loop do 443 | bx = string ? seq2[bj, 1] : seq2[bj] 444 | event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx) 445 | event = yield event if block_given? 446 | callbacks.discard_b(event) 447 | bj += 1 448 | break unless bj < b_size 449 | end 450 | end 451 | end 452 | 453 | # last B? 454 | if bj == b_size and ai < a_size 455 | if callbacks.respond_to?(:finished_b) and not run_finished_b 456 | ax = string ? seq1[ai, 1] : seq1[ai] 457 | bx = string ? seq2[-1, 1] : seq2[-1] 458 | event = Diff::LCS::ContextChange.new('<', ai, ax, (b_size - 1), bx) 459 | event = yield event if block_given? 460 | callbacks.finished_b(event) 461 | run_finished_b = true 462 | else 463 | bx = string ? seq2[bj, 1] : seq2[bj] 464 | loop do 465 | ax = string ? seq1[ai, 1] : seq1[ai] 466 | event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx) 467 | event = yield event if block_given? 468 | callbacks.discard_a(event) 469 | ai += 1 470 | break unless bj < b_size 471 | end 472 | end 473 | end 474 | 475 | if ai < a_size 476 | ax = string ? seq1[ai, 1] : seq1[ai] 477 | bx = string ? seq2[bj, 1] : seq2[bj] 478 | event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx) 479 | event = yield event if block_given? 480 | callbacks.discard_a(event) 481 | ai += 1 482 | end 483 | 484 | if bj < b_size 485 | ax = string ? seq1[ai, 1] : seq1[ai] 486 | bx = string ? seq2[bj, 1] : seq2[bj] 487 | event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx) 488 | event = yield event if block_given? 489 | callbacks.discard_b(event) 490 | bj += 1 491 | end 492 | end 493 | end 494 | 495 | # #traverse_balanced is an alternative to #traverse_sequences. It 496 | # uses a different algorithm to iterate through the entries in the 497 | # computed longest common subsequence. Instead of viewing the changes as 498 | # insertions or deletions from one of the sequences, #traverse_balanced 499 | # will report changes between the sequences. To represent a 500 | # 501 | # The arguments to #traverse_balanced are the two sequences to traverse 502 | # and a callback object, like this: 503 | # 504 | # traverse_balanced(seq1, seq2, Diff::LCS::ContextDiffCallbacks.new) 505 | # 506 | # #sdiff is implemented with #traverse_balanced. 507 | # 508 | # == Callback Methods 509 | # Optional callback methods are emphasized. 510 | # 511 | # callbacks#match:: Called when +a+ and +b+ are pointing 512 | # to common elements in +A+ and +B+. 513 | # callbacks#discard_a:: Called when +a+ is pointing to an 514 | # element not in +B+. 515 | # callbacks#discard_b:: Called when +b+ is pointing to an 516 | # element not in +A+. 517 | # callbacks#change:: Called when +a+ and +b+ are pointing 518 | # to the same relative position, but 519 | # A[a] and B[b] are 520 | # not the same; a change has 521 | # occurred. 522 | # 523 | # #traverse_balanced might be a bit slower than #traverse_sequences, 524 | # noticable only while processing huge amounts of data. 525 | # 526 | # The +sdiff+ function of this module is implemented as call to 527 | # #traverse_balanced. 528 | # 529 | # == Algorithm 530 | # a---+ 531 | # v 532 | # A = a b c e h j l m n p 533 | # B = b c d e f j k l m r s t 534 | # ^ 535 | # b---+ 536 | # 537 | # === Matches 538 | # If there are two arrows (+a+ and +b+) pointing to elements of 539 | # sequences +A+ and +B+, the arrows will initially point to the first 540 | # elements of their respective sequences. #traverse_sequences will 541 | # advance the arrows through the sequences one element at a time, 542 | # calling a method on the user-specified callback object before each 543 | # advance. It will advance the arrows in such a way that if there are 544 | # elements A[ii] and B[jj] which are both equal and 545 | # part of the longest common subsequence, there will be some moment 546 | # during the execution of #traverse_sequences when arrow +a+ is pointing 547 | # to A[ii] and arrow +b+ is pointing to B[jj]. When 548 | # this happens, #traverse_sequences will call callbacks#match 549 | # and then it will advance both arrows. 550 | # 551 | # === Discards 552 | # Otherwise, one of the arrows is pointing to an element of its sequence 553 | # that is not part of the longest common subsequence. 554 | # #traverse_sequences will advance that arrow and will call 555 | # callbacks#discard_a or callbacks#discard_b, 556 | # depending on which arrow it advanced. 557 | # 558 | # === Changes 559 | # If both +a+ and +b+ point to elements that are not part of the longest 560 | # common subsequence, then #traverse_sequences will try to call 561 | # callbacks#change and advance both arrows. If 562 | # callbacks#change is not implemented, then 563 | # callbacks#discard_a and callbacks#discard_b will be 564 | # called in turn. 565 | # 566 | # The methods for callbacks#match, callbacks#discard_a, 567 | # callbacks#discard_b, and callbacks#change are 568 | # invoked with an event comprising the action ("=", "+", "-", or "!", 569 | # respectively), the indicies +ii+ and +jj+, and the elements 570 | # A[ii] and B[jj]. Return values are discarded by 571 | # #traverse_balanced. 572 | # 573 | # === Context 574 | # Note that +ii+ and +jj+ may not be the same index position, even if 575 | # +a+ and +b+ are considered to be pointing to matching or changed 576 | # elements. 577 | def traverse_balanced(seq1, seq2, callbacks = Diff::LCS::BalancedCallbacks) 578 | matches = Diff::LCS.__lcs(seq1, seq2) 579 | a_size = seq1.size 580 | b_size = seq2.size 581 | ai = bj = mb = 0 582 | ma = -1 583 | string = seq1.kind_of?(String) 584 | 585 | # Process all the lines in the match vector. 586 | loop do 587 | # Find next match indices +ma+ and +mb+ 588 | loop do 589 | ma += 1 590 | break unless ma < matches.size and matches[ma].nil? 591 | end 592 | 593 | break if ma >= matches.size # end of matches? 594 | mb = matches[ma] 595 | 596 | # Change(seq2) 597 | while (ai < ma) or (bj < mb) 598 | ax = string ? seq1[ai, 1] : seq1[ai] 599 | bx = string ? seq2[bj, 1] : seq2[bj] 600 | 601 | case [(ai < ma), (bj < mb)] 602 | when [true, true] 603 | if callbacks.respond_to?(:change) 604 | event = Diff::LCS::ContextChange.new('!', ai, ax, bj, bx) 605 | event = yield event if block_given? 606 | callbacks.change(event) 607 | ai += 1 608 | bj += 1 609 | else 610 | event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx) 611 | event = yield event if block_given? 612 | callbacks.discard_a(event) 613 | ai += 1 614 | ax = string ? seq1[ai, 1] : seq1[ai] 615 | event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx) 616 | event = yield event if block_given? 617 | callbacks.discard_b(event) 618 | bj += 1 619 | end 620 | when [true, false] 621 | event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx) 622 | event = yield event if block_given? 623 | callbacks.discard_a(event) 624 | ai += 1 625 | when [false, true] 626 | event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx) 627 | event = yield event if block_given? 628 | callbacks.discard_b(event) 629 | bj += 1 630 | end 631 | end 632 | 633 | # Match 634 | ax = string ? seq1[ai, 1] : seq1[ai] 635 | bx = string ? seq2[bj, 1] : seq2[bj] 636 | event = Diff::LCS::ContextChange.new('=', ai, ax, bj, bx) 637 | event = yield event if block_given? 638 | callbacks.match(event) 639 | ai += 1 640 | bj += 1 641 | end 642 | 643 | while (ai < a_size) or (bj < b_size) 644 | ax = string ? seq1[ai, 1] : seq1[ai] 645 | bx = string ? seq2[bj, 1] : seq2[bj] 646 | 647 | case [(ai < a_size), (bj < b_size)] 648 | when [true, true] 649 | if callbacks.respond_to?(:change) 650 | event = Diff::LCS::ContextChange.new('!', ai, ax, bj, bx) 651 | event = yield event if block_given? 652 | callbacks.change(event) 653 | ai += 1 654 | bj += 1 655 | else 656 | event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx) 657 | event = yield event if block_given? 658 | callbacks.discard_a(event) 659 | ai += 1 660 | ax = string ? seq1[ai, 1] : seq1[ai] 661 | event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx) 662 | event = yield event if block_given? 663 | callbacks.discard_b(event) 664 | bj += 1 665 | end 666 | when [true, false] 667 | event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx) 668 | event = yield event if block_given? 669 | callbacks.discard_a(event) 670 | ai += 1 671 | when [false, true] 672 | event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx) 673 | event = yield event if block_given? 674 | callbacks.discard_b(event) 675 | bj += 1 676 | end 677 | end 678 | end 679 | 680 | PATCH_MAP = { #:nodoc: 681 | :patch => { '+' => '+', '-' => '-', '!' => '!', '=' => '=' }, 682 | :unpatch => { '+' => '-', '-' => '+', '!' => '!', '=' => '=' } 683 | } 684 | 685 | # Given a patchset, convert the current version to the new 686 | # version. If +direction+ is not specified (must be 687 | # :patch or :unpatch), then discovery of the 688 | # direction of the patch will be attempted. 689 | def patch(src, patchset, direction = nil) 690 | string = src.kind_of?(String) 691 | # Start with a new empty type of the source's class 692 | res = src.class.new 693 | 694 | # Normalize the patchset. 695 | patchset = __normalize_patchset(patchset) 696 | 697 | direction ||= Diff::LCS.__diff_direction(src, patchset) 698 | direction ||= :patch 699 | 700 | ai = bj = 0 701 | 702 | patchset.each do |change| 703 | # Both Change and ContextChange support #action 704 | action = PATCH_MAP[direction][change.action] 705 | 706 | case change 707 | when Diff::LCS::ContextChange 708 | case direction 709 | when :patch 710 | el = change.new_element 711 | op = change.old_position 712 | np = change.new_position 713 | when :unpatch 714 | el = change.old_element 715 | op = change.new_position 716 | np = change.old_position 717 | end 718 | 719 | case action 720 | when '-' # Remove details from the old string 721 | while ai < op 722 | res << (string ? src[ai, 1] : src[ai]) 723 | ai += 1 724 | bj += 1 725 | end 726 | ai += 1 727 | when '+' 728 | while bj < np 729 | res << (string ? src[ai, 1] : src[ai]) 730 | ai += 1 731 | bj += 1 732 | end 733 | 734 | res << el 735 | bj += 1 736 | when '=' 737 | # This only appears in sdiff output with the SDiff callback. 738 | # Therefore, we only need to worry about dealing with a single 739 | # element. 740 | res << el 741 | 742 | ai += 1 743 | bj += 1 744 | when '!' 745 | while ai < op 746 | res << (string ? src[ai, 1] : src[ai]) 747 | ai += 1 748 | bj += 1 749 | end 750 | 751 | bj += 1 752 | ai += 1 753 | 754 | res << el 755 | end 756 | when Diff::LCS::Change 757 | case action 758 | when '-' 759 | while ai < change.position 760 | res << (string ? src[ai, 1] : src[ai]) 761 | ai += 1 762 | bj += 1 763 | end 764 | ai += 1 765 | when '+' 766 | while bj < change.position 767 | res << (string ? src[ai, 1] : src[ai]) 768 | ai += 1 769 | bj += 1 770 | end 771 | 772 | bj += 1 773 | 774 | res << change.element 775 | end 776 | end 777 | end 778 | 779 | while ai < src.size 780 | res << (string ? src[ai, 1] : src[ai]) 781 | ai += 1 782 | bj += 1 783 | end 784 | 785 | res 786 | end 787 | 788 | # Given a set of patchset, convert the current version to the prior 789 | # version. Does no auto-discovery. 790 | def unpatch!(src, patchset) 791 | Diff::LCS.patch(src, patchset, :unpatch) 792 | end 793 | 794 | # Given a set of patchset, convert the current version to the next 795 | # version. Does no auto-discovery. 796 | def patch!(src, patchset) 797 | Diff::LCS.patch(src, patchset, :patch) 798 | end 799 | 800 | # private 801 | # Compute the longest common subsequence between the sequenced Enumerables 802 | # +a+ and +b+. The result is an array whose contents is such that 803 | # 804 | # result = Diff::LCS.__lcs(a, b) 805 | # result.each_with_index do |e, ii| 806 | # assert_equal(a[ii], b[e]) unless e.nil? 807 | # end 808 | def __lcs(a, b) 809 | a_start = b_start = 0 810 | a_finish = a.size - 1 811 | b_finish = b.size - 1 812 | vector = [] 813 | 814 | # Prune off any common elements at the beginning... 815 | while (a_start <= a_finish) and 816 | (b_start <= b_finish) and 817 | (a[a_start] == b[b_start]) 818 | vector[a_start] = b_start 819 | a_start += 1 820 | b_start += 1 821 | end 822 | 823 | # Now the end... 824 | while (a_start <= a_finish) and 825 | (b_start <= b_finish) and 826 | (a[a_finish] == b[b_finish]) 827 | vector[a_finish] = b_finish 828 | a_finish -= 1 829 | b_finish -= 1 830 | end 831 | 832 | # Now, compute the equivalence classes of positions of elements. 833 | b_matches = Diff::LCS.__position_hash(b, b_start .. b_finish) 834 | 835 | thresh = [] 836 | links = [] 837 | 838 | (a_start .. a_finish).each do |ii| 839 | ai = a.kind_of?(String) ? a[ii, 1] : a[ii] 840 | bm = b_matches[ai] 841 | kk = nil 842 | bm.reverse_each do |jj| 843 | if kk and (thresh[kk] > jj) and (thresh[kk - 1] < jj) 844 | thresh[kk] = jj 845 | else 846 | kk = Diff::LCS.__replace_next_larger(thresh, jj, kk) 847 | end 848 | links[kk] = [ (kk > 0) ? links[kk - 1] : nil, ii, jj ] unless kk.nil? 849 | end 850 | end 851 | 852 | unless thresh.empty? 853 | link = links[thresh.size - 1] 854 | while not link.nil? 855 | vector[link[1]] = link[2] 856 | link = link[0] 857 | end 858 | end 859 | 860 | vector 861 | end 862 | 863 | # Find the place at which +value+ would normally be inserted into the 864 | # Enumerable. If that place is already occupied by +value+, do nothing 865 | # and return +nil+. If the place does not exist (i.e., it is off the end 866 | # of the Enumerable), add it to the end. Otherwise, replace the element 867 | # at that point with +value+. It is assumed that the Enumerable's values 868 | # are numeric. 869 | # 870 | # This operation preserves the sort order. 871 | def __replace_next_larger(enum, value, last_index = nil) 872 | # Off the end? 873 | if enum.empty? or (value > enum[-1]) 874 | enum << value 875 | return enum.size - 1 876 | end 877 | 878 | # Binary search for the insertion point 879 | last_index ||= enum.size 880 | first_index = 0 881 | while (first_index <= last_index) 882 | ii = (first_index + last_index) >> 1 883 | 884 | found = enum[ii] 885 | 886 | if value == found 887 | return nil 888 | elsif value > found 889 | first_index = ii + 1 890 | else 891 | last_index = ii - 1 892 | end 893 | end 894 | 895 | # The insertion point is in first_index; overwrite the next larger 896 | # value. 897 | enum[first_index] = value 898 | return first_index 899 | end 900 | 901 | # If +vector+ maps the matching elements of another collection onto this 902 | # Enumerable, compute the inverse +vector+ that maps this Enumerable 903 | # onto the collection. (Currently unused.) 904 | def __inverse_vector(a, vector) 905 | inverse = a.dup 906 | (0 ... vector.size).each do |ii| 907 | inverse[vector[ii]] = ii unless vector[ii].nil? 908 | end 909 | inverse 910 | end 911 | 912 | # Returns a hash mapping each element of an Enumerable to the set of 913 | # positions it occupies in the Enumerable, optionally restricted to the 914 | # elements specified in the range of indexes specified by +interval+. 915 | def __position_hash(enum, interval = 0 .. -1) 916 | hash = Hash.new { |hh, kk| hh[kk] = [] } 917 | interval.each do |ii| 918 | kk = enum.kind_of?(String) ? enum[ii, 1] : enum[ii] 919 | hash[kk] << ii 920 | end 921 | hash 922 | end 923 | 924 | # Examine the patchset and the source to see in which direction the 925 | # patch should be applied. 926 | # 927 | # WARNING: By default, this examines the whole patch, so this could take 928 | # some time. This also works better with Diff::LCS::ContextChange or 929 | # Diff::LCS::Change as its source, as an array will cause the creation 930 | # of one of the above. 931 | def __diff_direction(src, patchset, limit = nil) 932 | count = left = left_miss = right = right_miss = 0 933 | string = src.kind_of?(String) 934 | 935 | patchset.each do |change| 936 | count += 1 937 | 938 | case change 939 | when Diff::LCS::Change 940 | # With a simplistic change, we can't tell the difference between 941 | # the left and right on '!' actions, so we ignore those. On '=' 942 | # actions, if there's a miss, we miss both left and right. 943 | element = string ? src[change.position, 1] : src[change.position] 944 | 945 | case change.action 946 | when '-' 947 | if element == change.element 948 | left += 1 949 | else 950 | left_miss += 1 951 | end 952 | when '+' 953 | if element == change.element 954 | right += 1 955 | else 956 | right_miss += 1 957 | end 958 | when '=' 959 | if element != change.element 960 | left_miss += 1 961 | right_miss += 1 962 | end 963 | end 964 | when Diff::LCS::ContextChange 965 | case change.action 966 | when '-' # Remove details from the old string 967 | element = string ? src[change.old_position, 1] : src[change.old_position] 968 | if element == change.old_element 969 | left += 1 970 | else 971 | left_miss += 1 972 | end 973 | when '+' 974 | element = string ? src[change.new_position, 1] : src[change.new_position] 975 | if element == change.new_element 976 | right += 1 977 | else 978 | right_miss += 1 979 | end 980 | when '=' 981 | le = string ? src[change.old_position, 1] : src[change.old_position] 982 | re = string ? src[change.new_position, 1] : src[change.new_position] 983 | 984 | left_miss += 1 if le != change.old_element 985 | right_miss += 1 if re != change.new_element 986 | when '!' 987 | element = string ? src[change.old_position, 1] : src[change.old_position] 988 | if element == change.old_element 989 | left += 1 990 | else 991 | element = string ? src[change.new_position, 1] : src[change.new_position] 992 | if element == change.new_element 993 | right += 1 994 | else 995 | left_miss += 1 996 | right_miss += 1 997 | end 998 | end 999 | end 1000 | end 1001 | 1002 | break if not limit.nil? and count > limit 1003 | end 1004 | 1005 | no_left = (left == 0) and (left_miss >= 0) 1006 | no_right = (right == 0) and (right_miss >= 0) 1007 | 1008 | case [no_left, no_right] 1009 | when [false, true] 1010 | return :patch 1011 | when [true, false] 1012 | return :unpatch 1013 | else 1014 | raise "The provided patchset does not appear to apply to the provided value as either source or destination value." 1015 | end 1016 | end 1017 | 1018 | # Normalize the patchset. A patchset is always a sequence of changes, but 1019 | # how those changes are represented may vary, depending on how they were 1020 | # generated. In all cases we support, we also support the array 1021 | # representation of the changes. The formats are: 1022 | # 1023 | # [ # patchset <- Diff::LCS.diff(a, b) 1024 | # [ # one or more hunks 1025 | # Diff::LCS::Change # one or more changes 1026 | # ] ] 1027 | # 1028 | # [ # patchset, equivalent to the above 1029 | # [ # one or more hunks 1030 | # [ action, line, value ] # one or more changes 1031 | # ] ] 1032 | # 1033 | # [ # patchset <- Diff::LCS.diff(a, b, Diff::LCS::ContextDiffCallbacks) 1034 | # # OR <- Diff::LCS.sdiff(a, b, Diff::LCS::ContextDiffCallbacks) 1035 | # [ # one or more hunks 1036 | # Diff::LCS::ContextChange # one or more changes 1037 | # ] ] 1038 | # 1039 | # [ # patchset, equivalent to the above 1040 | # [ # one or more hunks 1041 | # [ action, [ old line, old value ], [ new line, new value ] ] 1042 | # # one or more changes 1043 | # ] ] 1044 | # 1045 | # [ # patchset <- Diff::LCS.sdiff(a, b) 1046 | # # OR <- Diff::LCS.diff(a, b, Diff::LCS::SDiffCallbacks) 1047 | # Diff::LCS::ContextChange # one or more changes 1048 | # ] 1049 | # 1050 | # [ # patchset, equivalent to the above 1051 | # [ action, [ old line, old value ], [ new line, new value ] ] 1052 | # # one or more changes 1053 | # ] 1054 | # 1055 | # The result of this will be either of the following. 1056 | # 1057 | # [ # patchset 1058 | # Diff::LCS::ContextChange # one or more changes 1059 | # ] 1060 | # 1061 | # [ # patchset 1062 | # Diff::LCS::Change # one or more changes 1063 | # ] 1064 | # 1065 | # If either of the above is provided, it will be returned as such. 1066 | # 1067 | def __normalize_patchset(patchset) 1068 | patchset.map do |hunk| 1069 | case hunk 1070 | when Diff::LCS::ContextChange, Diff::LCS::Change 1071 | hunk 1072 | when Array 1073 | if (not hunk[0].kind_of?(Array)) and hunk[1].kind_of?(Array) and hunk[2].kind_of?(Array) 1074 | Diff::LCS::ContextChange.from_a(hunk) 1075 | else 1076 | hunk.map do |change| 1077 | case change 1078 | when Diff::LCS::ContextChange, Diff::LCS::Change 1079 | change 1080 | when Array 1081 | # change[1] will ONLY be an array in a ContextChange#to_a call. 1082 | # In Change#to_a, it represents the line (singular). 1083 | if change[1].kind_of?(Array) 1084 | Diff::LCS::ContextChange.from_a(change) 1085 | else 1086 | Diff::LCS::Change.from_a(change) 1087 | end 1088 | end 1089 | end 1090 | end 1091 | else 1092 | raise ArgumentError, "Cannot normalise a hunk of class #{hunk.class}." 1093 | end 1094 | end.flatten 1095 | end 1096 | end 1097 | end 1098 | --------------------------------------------------------------------------------