Skip to content

Commit 73bfa35

Browse files
authored
Fix hash diffing algorithm (#293)
Fixes #99. This rewrites the hash diffing algorithm to leverage LCS on the hash keys, maintaining the goal of preserving the order of the `actual` keys where possible. This causes a couple notable changes. Previously, changes to a shared key's value were sometimes rendered as separate deletes and inserts. These changes are now always visually colocated as a delete of the old value followed immediately by an insert of the new value. Additionally, keys at the beginning of `expected` that are not in `actual` were previously rendered as deletes at the end of the diff. These are now rendered at the beginning, reflecting the key ordering relative to `actual` more accurately.
1 parent d6d82e1 commit 73bfa35

File tree

5 files changed

+378
-175
lines changed

5 files changed

+378
-175
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66

77
- Add official Ruby 3.4 support. [#289](https://github.com/splitwise/super_diff/pull/289) by [@olleolleolle](https://github.com/olleolleolle)
88

9+
### Bug fixes
10+
11+
- Fix hash diffing algorithm. [#293](https://github.com/splitwise/super_diff/pull/293)
12+
913
### Other changes
1014

1115
- Fix bundler gem caching in CI. [#289](https://github.com/splitwise/super_diff/pull/289) by [@olleolleolle](https://github.com/olleolleolle)

lib/super_diff/basic/operation_tree_builders/hash.rb

Lines changed: 73 additions & 173 deletions
Original file line numberDiff line numberDiff line change
@@ -21,195 +21,95 @@ def build_operation_tree
2121
private
2222

2323
def unary_operations_using_variant_of_patience_algorithm
24-
operations = []
2524
aks = actual.keys
2625
eks = expected.keys
27-
previous_ei = nil
28-
ei = 0
29-
ai = 0
30-
31-
# When diffing a hash, we're more interested in the 'actual' version
32-
# than the 'expected' version, because that's the ultimate truth.
33-
# Therefore, the diff is presented from the perspective of the 'actual'
34-
# hash, and we start off by looping over it.
35-
while ai < aks.size
36-
ak = aks[ai]
37-
av = actual[ak]
38-
ev = expected[ak]
39-
# While we iterate over 'actual' in order, we jump all over
40-
# 'expected', trying to match up its keys with the keys in 'actual' as
41-
# much as possible.
42-
ei = eks.index(ak)
43-
44-
if should_add_noop_operation?(ak)
45-
# (If we're here, it probably means that the key we're pointing to
46-
# in the 'actual' and 'expected' hashes have the same value.)
47-
48-
if ei && previous_ei && (ei - previous_ei) > 1
49-
# If we've jumped from one operation in the 'expected' hash to
50-
# another operation later in 'expected' (due to the fact that the
51-
# 'expected' hash is in a different order than 'actual'), collect
52-
# any delete operations in between and add them to our operations
53-
# array as deletes before adding the noop. If we don't do this
54-
# now, then those deletes will disappear. (Again, we are mainly
55-
# iterating over 'actual', so this is the only way to catch all of
56-
# the keys in 'expected'.)
57-
(previous_ei + 1).upto(ei - 1) do |ei2|
58-
ek = eks[ei2]
59-
ev2 = expected[ek]
60-
av2 = actual[ek]
61-
62-
next unless
63-
(!actual.include?(ek) || ev2 != av2) &&
64-
operations.none? do |operation|
65-
%i[delete noop].include?(operation.name) &&
66-
operation.key == ek
67-
end
68-
69-
operations << Core::UnaryOperation.new(
70-
name: :delete,
71-
collection: expected,
72-
key: ek,
73-
value: ev2,
74-
index: ei2
75-
)
76-
end
77-
end
7826

27+
operations = []
28+
lcs_callbacks = LCSCallbacks.new(
29+
operations: operations,
30+
expected: expected,
31+
actual: actual,
32+
should_add_noop_operation: method(:should_add_noop_operation?)
33+
)
34+
Diff::LCS.traverse_sequences(eks, aks, lcs_callbacks)
35+
36+
operations
37+
end
38+
39+
class LCSCallbacks
40+
extend AttrExtras.mixin
41+
42+
pattr_initialize %i[operations! expected! actual! should_add_noop_operation!]
43+
public :operations
44+
45+
def discard_a(event)
46+
# This key is in `expected`, but may also be in `actual`.
47+
key = event.old_element
48+
49+
# We want the diff to match the key order of `actual` as much as
50+
# possible, so if this is also a key in `actual` that's just being
51+
# reordered, don't add any operations now. The change or noop will
52+
# be added later.
53+
return if actual.include?(key)
54+
55+
operations << Core::UnaryOperation.new(
56+
name: :delete,
57+
collection: expected,
58+
key: key,
59+
value: expected[key],
60+
index: event.old_position
61+
)
62+
end
63+
64+
def discard_b(event)
65+
# This key is in `actual`, but may also be in `expected`.
66+
67+
key = event.new_element
68+
handle_operation_on_key_in_actual(key, event)
69+
end
70+
71+
def match(event)
72+
# This key is part of the longest common subsequence.
73+
74+
key = event.old_element
75+
handle_operation_on_key_in_actual(key, event)
76+
end
77+
78+
private
79+
80+
def handle_operation_on_key_in_actual(key, event)
81+
if should_add_noop_operation.call(key)
7982
operations << Core::UnaryOperation.new(
8083
name: :noop,
8184
collection: actual,
82-
key: ak,
83-
value: av,
84-
index: ai
85+
key: key,
86+
value: actual[key],
87+
index: event.new_position
8588
)
8689
else
87-
# (If we're here, it probably means that the key in 'actual' isn't
88-
# present in 'expected' or the values don't match.)
89-
90-
if (operations.empty? || operations.last.name == :noop) &&
91-
(ai.zero? || eks.include?(aks[ai - 1]))
92-
93-
# If we go from a match in the last iteration to a missing or
94-
# extra key in this one, or we're at the first key in 'actual' and
95-
# it's missing or extra, look for deletes in the 'expected' hash
96-
# and add them to our list of operations before we add the
97-
# inserts. In most cases we will accomplish this by backtracking a
98-
# bit to the key in 'expected' that matched the key in 'actual' we
99-
# processed in the previous iteration (or just the first key in
100-
# 'expected' if this is the first key in 'actual'), and then
101-
# iterating from there through 'expected' until we reach the end
102-
# or we hit some other condition (see below).
103-
104-
start_index =
105-
if ai.positive?
106-
eks.index(aks[ai - 1]) + 1
107-
else
108-
0
109-
end
110-
111-
start_index.upto(eks.size - 1) do |ei2|
112-
ek = eks[ei2]
113-
ev = expected[ek]
114-
av2 = actual[ek]
115-
116-
if actual.include?(ek) && ev == av2
117-
# If the key in 'expected' we've landed on happens to be a
118-
# match in 'actual', then stop, because it's going to be
119-
# handled in some future iteration of the 'actual' loop.
120-
break
121-
elsif aks[ai + 1..].any? do |k| # rubocop:disable Lint/DuplicateBranch for clarity
122-
expected.include?(k) && expected[k] != actual[k]
123-
end
124-
125-
# While we backtracked a bit to iterate over 'expected', we
126-
# now have to look ahead. If we will end up encountering a
127-
# insert that matches this delete later, stop and go back to
128-
# iterating over 'actual'. This is because the delete we would
129-
# have added now will be added later when we encounter the
130-
# associated insert, so we don't want to add it twice.
131-
break
132-
else
133-
operations << Core::UnaryOperation.new(
134-
name: :delete,
135-
collection: expected,
136-
key: ek,
137-
value: ev,
138-
index: ei2
139-
)
140-
end
141-
142-
next unless ek == ak && ev != av
143-
144-
# If we're pointing to the same key in 'expected' as in
145-
# 'actual', but with different values, go ahead and add an
146-
# insert now to accompany the delete added above. That way
147-
# they appear together, which will be easier to read.
148-
operations << Core::UnaryOperation.new(
149-
name: :insert,
150-
collection: actual,
151-
key: ak,
152-
value: av,
153-
index: ai
154-
)
155-
end
156-
end
157-
158-
if expected.include?(ak) && ev != av &&
159-
operations.none? do |op|
160-
op.name == :delete && op.key == ak
161-
end
162-
163-
# If we're here, it means that we didn't encounter any delete
164-
# operations above for whatever reason and so we need to add a
165-
# delete to represent the fact that the value for this key has
166-
# changed.
90+
# If the key is present in both `actual` and `expected`
91+
# but the values don't match, create a `delete` operation
92+
# just before the `insert`.
93+
# (This condition will always be true for `match` events.)
94+
if expected.include?(key)
16795
operations << Core::UnaryOperation.new(
16896
name: :delete,
16997
collection: expected,
170-
key: ak,
171-
value: expected[ak],
172-
index: ei
98+
key: key,
99+
value: expected[key],
100+
index: event.old_position
173101
)
174102
end
175103

176-
if operations.none? { |op| op.name == :insert && op.key == ak }
177-
# If we're here, it means that we didn't encounter any insert
178-
# operations above. Since we already handled delete, the only
179-
# alternative is that this key must not exist in 'expected', so
180-
# we need to add an insert.
181-
operations << Core::UnaryOperation.new(
182-
name: :insert,
183-
collection: actual,
184-
key: ak,
185-
value: av,
186-
index: ai
187-
)
188-
end
104+
operations << Core::UnaryOperation.new(
105+
name: :insert,
106+
collection: actual,
107+
key: key,
108+
value: actual[key],
109+
index: event.new_position
110+
)
189111
end
190-
191-
ai += 1
192-
previous_ei = ei
193-
end
194-
195-
# The last thing to do is this: if there are keys in 'expected' that
196-
# aren't in 'actual', and they aren't associated with any inserts to
197-
# where they would have been added above, tack those deletes onto the
198-
# end of our operations array.
199-
(eks - aks - operations.map(&:key)).each do |ek|
200-
ei = eks.index(ek)
201-
ev = expected[ek]
202-
203-
operations << Core::UnaryOperation.new(
204-
name: :delete,
205-
collection: expected,
206-
key: ek,
207-
value: ev,
208-
index: ei
209-
)
210112
end
211-
212-
operations
213113
end
214114

215115
def should_add_noop_operation?(key)

spec/integration/rspec/include_matcher_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,8 +294,8 @@
294294
plain_line ' {'
295295
plain_line ' number: 42,'
296296
plain_line %( city: "Burbank",)
297-
plain_line %( zip: "90210")
298297
expected_line %(- state: "CA")
298+
plain_line %( zip: "90210")
299299
plain_line ' }'
300300
end
301301
)

0 commit comments

Comments
 (0)