From 0dae38c385cd59068f31264aeb4960725b19a3e5 Mon Sep 17 00:00:00 2001 From: Benjamin Clayman Date: Wed, 6 Oct 2021 18:02:22 -0400 Subject: [PATCH] Proof of concept for improving ContainExactly matcher speed when elements obey transitivity This is a proof of concept approach for addressing issue #1161. The current implementation for ContainExactly runs in O(n!). In practice, it runs in O(n log n) when the elements are comparable and sorting result in a match. The crux of the problem is that some elements don't obey transitivity. As a result, knowing that sorting actual and expected doesn't result in a match *doesn't* guarantee that expected and actual don't match. This proof of concept provides a way for the user to indicate that the elements in a particular example's expected and actual obey transitivity. That looks like this: expect(a).to contain_exactly(*b).transitive And runs in O(n log n) time. More practically, this means that common use cases for contains_exactly will enjoy a massive speedup. Previously, users have examples where comparing arrays of 30 integers "never finishes." Using `.transitive` here with arrays of 10,000 integers runs in < 0.1s on my machine. --- .../matchers/built_in/contain_exactly.rb | 63 +++++++++++++- .../matchers/built_in/contain_exactly_spec.rb | 86 +++++++++++++++++++ 2 files changed, 147 insertions(+), 2 deletions(-) diff --git a/lib/rspec/matchers/built_in/contain_exactly.rb b/lib/rspec/matchers/built_in/contain_exactly.rb index 1bf7f9831..fe827f676 100644 --- a/lib/rspec/matchers/built_in/contain_exactly.rb +++ b/lib/rspec/matchers/built_in/contain_exactly.rb @@ -6,6 +6,19 @@ module BuiltIn # Provides the implementation for `contain_exactly` and `match_array`. # Not intended to be instantiated directly. class ContainExactly < BaseMatcher + def initialize(expected=nil) + super + @transitive = false + end + + # @api public + # Specifies that elements contained in actual and expected + # obey transitivity. This lets match run much faster. + def transitive + @transitive = true + self + end + # @api private # @return [String] def failure_message @@ -36,6 +49,7 @@ def description def generate_failure_message message = expected_collection_line message += actual_collection_line + @extra_items, @missing_items = fast_calculate_extra_missing if @transitive message += missing_elements_line unless missing_items.empty? message += extra_elements_line unless extra_items.empty? message @@ -72,7 +86,9 @@ def message_line(prefix, collection, surface_descriptions=false) def match(_expected, _actual) return false unless convert_actual_to_an_array - match_when_sorted? || (extra_items.empty? && missing_items.empty?) + matched_when_sorted = match_when_sorted? + return matched_when_sorted if matched_when_sorted || @transitive + (extra_items.empty? && missing_items.empty?) end # This cannot always work (e.g. when dealing with unsortable items, @@ -80,7 +96,8 @@ def match(_expected, _actual) # the slowness of the full matching algorithm, and in common cases this # works, so it's worth a try. def match_when_sorted? - values_match?(safe_sort(expected), safe_sort(actual)) + @sorted_expected, @sorted_actual = safe_sort(expected), safe_sort(actual) + values_match?(@sorted_expected, @sorted_actual) end def convert_actual_to_an_array @@ -96,6 +113,7 @@ def convert_actual_to_an_array def safe_sort(array) array.sort rescue Support::AllExceptionsExceptOnesWeMustNotRescue + raise "Invalid use of `.transitive` with unsortable array #{array}" if @transitive array end @@ -124,6 +142,47 @@ def extra_items end end + # We use this to determine extra and missing items between expected + # and actual arrays. This runs in O(n) time which is a big improvement + # over the O(n!) work incurred by PairingsMaximizer to evaluate all possible + # matchings between arrays + # rubocop:disable MethodLength + # rubocop:disable Metrics/AbcSize + def fast_calculate_extra_missing + extra, missing = [], [] + i, j = 0, 0 + + # Use 2-pointer approach to find elements in sorted_actual + # that aren't in sorted_expected and vice versa + while i < @sorted_actual.size && j < @sorted_expected.size + current_actual, current_expected = @sorted_actual[i], @sorted_expected[j] + + if current_actual < current_expected + extra << current_actual + i += 1 + elsif current_actual > current_expected + missing << current_expected + j += 1 + else + i += 1 + j += 1 + end + end + + while i < @sorted_actual.size + extra << current_actual + i += 1 + end + while j < @sorted_expected.size + missing << current_expected + j += 1 + end + + [extra, missing] + end + # rubocop:enable MethodLength + # rubocop:enable Metrics/AbcSize + def best_solution @best_solution ||= pairings_maximizer.find_best_solution end diff --git a/spec/rspec/matchers/built_in/contain_exactly_spec.rb b/spec/rspec/matchers/built_in/contain_exactly_spec.rb index ac1996ae8..815ad3d0b 100644 --- a/spec/rspec/matchers/built_in/contain_exactly_spec.rb +++ b/spec/rspec/matchers/built_in/contain_exactly_spec.rb @@ -95,6 +95,92 @@ def array.send; :sent; end end RSpec.describe "using contain_exactly with expect" do + # users have reported using contains_exactly with 50 elements + # never finishing! + context "with transitive enabled" do + require 'benchmark' + context "with actual and expected containing unsortable elements" do + it "raises" do + expect { + expect([be_positive, be_negative]).to contain_exactly(be_positive, be_negative).transitive + }.to raise_error(/Invalid use of/) + end + end + + context "with expected containing unsortable elements" do + it "raises" do + expect { + expect([1, -1]).to contain_exactly(be_positive, be_negative).transitive + }.to raise_error(/Invalid use of/) + end + end + + context "with actual and expected containing sortable elements" do + shared_examples "runs very fast" do + it do + time = Benchmark.realtime do + subject + end + # this is in seconds + expect(time).to be < 0.3 + end + end + + let(:a) { Array.new(10_000) { rand(10) } } + + context "with a positive expectation" do + subject { expect(a).to contain_exactly(*b).transitive } + + context "that is valid" do + let(:b) { a.shuffle } + + it "matches" do + subject + end + + include_examples "runs very fast" + end + + context "that is not valid" do + let(:b) { Array.new(10_000) { rand(10) } } + + it "fails quickly" do + time = Benchmark.realtime do + expect { subject }.to fail_with(/expected collection contained/) + end + expect(time).to be < 1 + end + + end + end + + context "with a negative expectation" do + subject { expect(a).not_to contain_exactly(*b).transitive } + + context "that is valid" do + let(:b) { Array.new(10_000) { rand(10) } } + + it "does not match" do + subject + end + + include_examples "runs very fast" + end + + context "that is not valid" do + let(:b) { a.shuffle } + + it "fails quickly" do + time = Benchmark.realtime do + expect { expect(a).not_to contain_exactly(*b).transitive }.to fail_with(/not to contain exactly/) + end + expect(time).to be < 1 + end + end + end + end + end + it "passes a valid positive expectation" do expect([1, 2]).to contain_exactly(2, 1) end