From 9bce3f54d58c8f9296001d58ad485c1569ee02ad Mon Sep 17 00:00:00 2001 From: Christian Sutter Date: Wed, 27 Nov 2024 14:37:25 +0000 Subject: [PATCH] Sanitise filter parameter values We occasionally (mostly through pentests) get values for filters that contain random odd Unicode characters. Google's filter logic has failing validations in those cases, causing an error to be raised on the request. This adds basic, allowlist based sanitisation for all incoming filter values. We know that the values come from a limited character set anyway (slugs and UUIDs), as they are not direct user input. - Add value parameter sanitisation for filters - Remove existing escape logic in `FilterExpressionHelpers` (as there should never be any quotes or backslashes in the values to begin with, and we will now have removed them anyway) --- .../query/filter_expression_helpers.rb | 7 +++--- .../discovery_engine/query/filters.rb | 7 ++++-- .../discovery_engine/query/filters_spec.rb | 24 ++++++++++++++----- 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/app/services/discovery_engine/query/filter_expression_helpers.rb b/app/services/discovery_engine/query/filter_expression_helpers.rb index e043b8e..c15ad73 100644 --- a/app/services/discovery_engine/query/filter_expression_helpers.rb +++ b/app/services/discovery_engine/query/filter_expression_helpers.rb @@ -6,7 +6,7 @@ module FilterExpressionHelpers # values in string_value_or_values def filter_any_string(string_or_array_field, string_value_or_values) Array(string_value_or_values) - .map { escape_and_quote(_1) } + .map { quote(_1) } .join(",") .then { "#{string_or_array_field}: ANY(#{_1})" } end @@ -69,9 +69,8 @@ def parenthesize(expression) "(#{expression})" end - def escape_and_quote(string_value) - escaped_string = string_value.gsub(/(["\\])/, '\\\\\1') - "\"#{escaped_string}\"" + def quote(string_value) + "\"#{string_value}\"" end end end diff --git a/app/services/discovery_engine/query/filters.rb b/app/services/discovery_engine/query/filters.rb index 972eb28..c98167a 100644 --- a/app/services/discovery_engine/query/filters.rb +++ b/app/services/discovery_engine/query/filters.rb @@ -1,6 +1,7 @@ module DiscoveryEngine::Query class Filters FILTER_PARAM_KEY_REGEX = /\A(filter_all|filter|reject)_(.+)\z/ + ACCEPTABLE_VALUE_REGEX = /\A[a-zA-Z0-9_:,\-\/]+\z/ FILTERABLE_STRING_FIELDS = %w[ content_purpose_supergroup @@ -70,9 +71,11 @@ def valid_filter_value?(value) # for the time being. This ensures that occasionally observed garbage parameters such as: # # filter_world_locations[\\\\]=all + # filter_something=blah%00blah # - # are ignored by checking that the value is either a string or an array of strings. - Array(value).all? { _1.is_a?(String) } + # are ignored by checking that the value is either a string or an array of strings, and + # matches an allowlist of permissible characters. + Array(value).all? { _1.is_a?(String) && _1.match?(ACCEPTABLE_VALUE_REGEX) } end end end diff --git a/spec/services/discovery_engine/query/filters_spec.rb b/spec/services/discovery_engine/query/filters_spec.rb index 9431fff..70be08c 100644 --- a/spec/services/discovery_engine/query/filters_spec.rb +++ b/spec/services/discovery_engine/query/filters_spec.rb @@ -32,6 +32,12 @@ it { is_expected.to be_nil } end + + context "with invalid characters in the filter value" do + let(:query_params) { { q: "garden centres", reject_link: "foo\u0000bar" } } + + it { is_expected.to be_nil } + end end context "with an 'any' string filter" do @@ -65,6 +71,12 @@ it { is_expected.to be_nil } end + context "with invalid characters in the filter value" do + let(:query_params) { { q: "garden centres", filter_link: "foo\u0000bar" } } + + it { is_expected.to be_nil } + end + context "with an unknown field" do let(:query_params) { { q: "garden centres", filter_foo: "bar" } } @@ -101,6 +113,12 @@ it { is_expected.to be_nil } end + context "with invalid characters in the filter value" do + let(:query_params) { { q: "garden centres", filter_all_link: "foo\u0000bar" } } + + it { is_expected.to be_nil } + end + context "with an unknown field" do let(:query_params) { { q: "garden centres", filter_all_foo: "bar" } } @@ -177,11 +195,5 @@ it { is_expected.to eq('(NOT link: ANY("/foo")) AND (content_purpose_supergroup: ANY("services")) AND ((part_of_taxonomy_tree: ANY("cafe-1234")) AND (part_of_taxonomy_tree: ANY("face-5678"))) AND (public_timestamp: IN(629510400,629596799))') } end - - context "with filters containing escapable characters" do - let(:query_params) { { q: "garden centres", filter_content_purpose_supergroup: "foo\"\\bar" } } - - it { is_expected.to eq('content_purpose_supergroup: ANY("foo\\"\\\\bar")') } - end end end