Skip to content

Commit

Permalink
Merge pull request #252 from alphagov/truncate-taxons
Browse files Browse the repository at this point in the history
Fix sync failures due to excessive taxons
  • Loading branch information
csutter authored Apr 8, 2024
2 parents 0f8c20b + cc590a2 commit f58a885
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 5 deletions.
13 changes: 9 additions & 4 deletions app/models/concerns/publishing_api/metadata.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,17 @@ module Metadata
# Taxons can be deeply nested, so we need to make sure we extract all of their content IDs all
# the way down.
TAXON_VALUES_JSON_PATHS = [
# Root and parent taxons
#
# Note: these come *before* the direct taxons on purpose, as the top two levels of taxons are
# facetable, and we set a limit on the maximum number of taxons to index. This way, they are
# less at risk of being truncated.
"$.expanded_links.taxons..links.root_taxon[*].content_id",
"$.expanded_links.taxons..links.parent_taxons[*].content_id",
# Direct taxons
"$.expanded_links.taxons[*].content_id",
# Parent taxons
"$.expanded_links.taxons..links.parent_taxons[*].content_id",
# Root taxon (note: that's still an array!)
"$.expanded_links.taxons..links.root_taxon[*].content_id",
].map { JsonPath.new(_1, use_symbols: true) }.freeze
MAX_TAXON_COUNT = 250

# Extracts a hash of structured metadata about this document.
def metadata
Expand Down Expand Up @@ -88,6 +92,7 @@ def part_of_taxonomy_tree
.flat_map { _1.on(document_hash) }
.compact
.uniq
.first(MAX_TAXON_COUNT)
end

def historic?
Expand Down
16 changes: 15 additions & 1 deletion spec/models/concerns/publishing_api/metadata_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,21 @@
}
end

it { is_expected.to match_array(%w[0000 1111 2222 3333 4444 5555]) }
it { is_expected.to eq(%w[0000 4444 5555 1111 2222 3333]) }
end

context "with an excessive number of taxon links" do
let(:document_hash) do
{
expanded_links: {
taxons: Array.new(260) { { content_id: sprintf("%04d", _1) } },
},
}
end

it "is truncated to 250 taxons" do
expect(extracted_part_of_taxonomy_tree.count).to eq(250)
end
end

context "without taxon links" do
Expand Down

0 comments on commit f58a885

Please sign in to comment.