Skip to content

Commit

Permalink
Merge pull request #153 from alphagov/filter-timestamp
Browse files Browse the repository at this point in the history
Implement timestamp filtering
  • Loading branch information
csutter authored Dec 13, 2023
2 parents f983c62 + 09924a2 commit 0074f71
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 26 deletions.
32 changes: 23 additions & 9 deletions app/services/discovery_engine/query/filter_expression_helpers.rb
Original file line number Diff line number Diff line change
@@ -1,30 +1,44 @@
module DiscoveryEngine::Query
module FilterExpressionHelpers
TIMESTAMP_VALUE_REGEX = /\A(?:from:(?<from>\d{4}-\d{2}-\d{2}))?(?:,)?(?:to:(?<to>\d{4}-\d{2}-\d{2}))?\z/

# Creates a filter expression for documents where string_or_array_field contains any of the
# values in string_value_or_values
def any_string(string_or_array_field, string_value_or_values)
def filter_any_string(string_or_array_field, string_value_or_values)
Array(string_value_or_values)
.map { escape_and_quote(_1) }
.join(",")
.then { "#{string_or_array_field}: ANY(#{_1})" }
end

# Creates a filter expression for documents where array_field contains all of the values in string_value_or_values
def all_string(array_field, string_value_or_values)
def filter_all_string(array_field, string_value_or_values)
Array(string_value_or_values)
.map { any_string(array_field, _1) }
.then { conjunction(_1) }
.map { filter_any_string(array_field, _1) }
.then { filter_conjunction(_1) }
end

# Creates a filter expression for documents where string_or_array_field does not contain any of the values in
# string_value_or_values
def not_string(string_or_array_field, string_value_or_values)
any_string(string_or_array_field, string_value_or_values)
.then { negate(_1) }
def filter_not_string(string_or_array_field, string_value_or_values)
filter_any_string(string_or_array_field, string_value_or_values)
.then { filter_negate(_1) }
end

# Creates a filter expression for documents where timestamp_field is between the dates in
# timestamp_value
def filter_timestamp(timestamp_field, timestamp_value)
match = timestamp_value.match(TIMESTAMP_VALUE_REGEX)
return nil unless match && (match[:from] || match[:to])

from = match[:from] ? Date.parse(match[:from]).beginning_of_day.to_i : "*"
to = match[:to] ? Date.parse(match[:to]).end_of_day.to_i : "*"

"#{timestamp_field}: IN(#{from},#{to})"
end

# Creates a filter expression from several expressions where all must be true
def conjunction(expression_or_expressions)
def filter_conjunction(expression_or_expressions)
expressions = Array(expression_or_expressions).compact_blank
return expressions.first if expressions.one?

Expand All @@ -36,7 +50,7 @@ def conjunction(expression_or_expressions)

private

def negate(expression)
def filter_negate(expression)
"NOT #{expression}"
end

Expand Down
42 changes: 29 additions & 13 deletions app/services/discovery_engine/query/filters.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
module DiscoveryEngine::Query
class Filters
FILTERABLE_FIELDS = %i[content_purpose_supergroup link part_of_taxonomy_tree].freeze
FILTER_PARAM_KEY_REGEX = /\A(filter_all|filter|reject)_(.+)\z/

FILTERABLE_STRING_FIELDS = %w[content_purpose_supergroup link part_of_taxonomy_tree].freeze
FILTERABLE_TIMESTAMP_FIELDS = %w[public_timestamp].freeze

include FilterExpressionHelpers

Expand All @@ -9,24 +12,37 @@ def initialize(query_params)
end

def filter_expression
expressions = [
*query_params_of_type(:reject).map { not_string(_1, _2) },
*query_params_of_type(:filter).map { any_string(_1, _2) },
*query_params_of_type(:filter_all).map { all_string(_1, _2) },
].compact

conjunction(expressions)
query_params
.map { parse_param(_1, _2) }
.compact_blank
.then { filter_conjunction(_1) }
end

private

attr_reader :query_params

def query_params_of_type(type)
FILTERABLE_FIELDS
.filter_map { [_1, query_params["#{type}_#{_1}".to_sym]] }
.to_h
.compact_blank
def parse_param(key, value)
filter_type, filter_field = key.match(FILTER_PARAM_KEY_REGEX)&.captures
return nil unless filter_type && value.present?

case filter_field
when *FILTERABLE_STRING_FIELDS
string_filter_expression(filter_type, filter_field, value)
when *FILTERABLE_TIMESTAMP_FIELDS
filter_timestamp(filter_field, value)
end
end

def string_filter_expression(filter_type, filter_field, value)
case filter_type
when "filter"
filter_any_string(filter_field, value)
when "filter_all"
filter_all_string(filter_field, value)
when "reject"
filter_not_string(filter_field, value)
end
end
end
end
47 changes: 43 additions & 4 deletions spec/services/discovery_engine/query/filters_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
it { is_expected.to be_nil }
end

context "with a reject filter" do
context "with a reject string filter" do
context "with an empty parameter" do
let(:query_params) { { q: "garden centres", reject_link: "" } }

Expand All @@ -28,7 +28,7 @@
end
end

context "with an 'any' filter" do
context "with an 'any' string filter" do
context "with an empty parameter" do
let(:query_params) { { q: "garden centres", filter_content_purpose_supergroup: "" } }

Expand All @@ -52,7 +52,7 @@
end
end

context "with an 'all' filter" do
context "with an 'all' string filter" do
context "with an empty parameter" do
let(:query_params) { { q: "garden centres", filter_all_part_of_taxonomy_tree: "" } }

Expand All @@ -74,17 +74,56 @@
end
end

context "with a timestamp filter" do
context "with an empty parameter" do
let(:query_params) { { q: "garden centres", filter_public_timestamp: "" } }

it { is_expected.to be_nil }
end

context "with a from parameter" do
let(:query_params) { { q: "garden centres", filter_public_timestamp: "from:1989-12-13" } }

it { is_expected.to eq("public_timestamp: IN(629510400,*)") }
end

context "with a to parameter" do
let(:query_params) { { q: "garden centres", filter_public_timestamp: "to:1989-12-13" } }

it { is_expected.to eq("public_timestamp: IN(*,629596799)") }
end

context "with both from and to parameters" do
let(:query_params) { { q: "garden centres", filter_public_timestamp: "from:1989-12-13,to:1989-12-13" } }

it { is_expected.to eq("public_timestamp: IN(629510400,629596799)") }
end

context "with an invalid from parameter" do
let(:query_params) { { q: "garden centres", filter_public_timestamp: "from:1989" } }

it { is_expected.to be_nil }
end

context "with an invalid to parameter" do
let(:query_params) { { q: "garden centres", filter_public_timestamp: "to:12-13" } }

it { is_expected.to be_nil }
end
end

context "with several filters specified" do
let(:query_params) do
{
q: "garden centres",
reject_link: "/foo",
filter_content_purpose_supergroup: "services",
filter_all_part_of_taxonomy_tree: %w[cafe-1234 face-5678],
filter_public_timestamp: "from:1989-12-13,to:1989-12-13",
}
end

it { is_expected.to eq('(NOT link: ANY("/foo")) AND (content_purpose_supergroup: ANY("services")) AND ((part_of_taxonomy_tree: ANY("cafe-1234")) AND (part_of_taxonomy_tree: ANY("face-5678")))') }
it { is_expected.to eq('(NOT link: ANY("/foo")) AND (content_purpose_supergroup: ANY("services")) AND ((part_of_taxonomy_tree: ANY("cafe-1234")) AND (part_of_taxonomy_tree: ANY("face-5678"))) AND (public_timestamp: IN(629510400,629596799))') }
end

context "with filters containing escapable characters" do
Expand Down

0 comments on commit 0074f71

Please sign in to comment.