Skip to content

Commit

Permalink
Merge pull request #241 from alphagov/full-taxonomy-tree
Browse files Browse the repository at this point in the history
Implement support for deeply nested taxons
  • Loading branch information
csutter authored Mar 15, 2024
2 parents 7d449fe + 0c9ef4c commit 9cda934
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 15 deletions.
20 changes: 19 additions & 1 deletion app/models/concerns/publishing_api/metadata.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,17 @@ module Metadata
# paths in this list will have its `manual` field set accordingly.
IMPLICIT_MANUAL_PATHS = %w[/service-manual].freeze

# Taxons can be deeply nested, so we need to make sure we extract all of their content IDs all
# the way down.
TAXON_VALUES_JSON_PATHS = [
# Direct taxons
"$.expanded_links.taxons[*].content_id",
# Parent taxons
"$.expanded_links.taxons..links.parent_taxons[*].content_id",
# Root taxon (note: that's still an array!)
"$.expanded_links.taxons..links.root_taxon[*].content_id",
].map { JsonPath.new(_1, use_symbols: true) }.freeze

# Extracts a hash of structured metadata about this document.
def metadata
{
Expand All @@ -20,7 +31,7 @@ def metadata
public_timestamp:,
document_type: document_hash[:document_type],
content_purpose_supergroup: document_hash[:content_purpose_supergroup],
part_of_taxonomy_tree: document_hash.dig(:links, :taxons) || [],
part_of_taxonomy_tree:,
# Vertex can only currently boost on numeric fields, not booleans
is_historic: historic? ? 1 : 0,
government_name:,
Expand Down Expand Up @@ -72,6 +83,13 @@ def public_timestamp
# rubocop:enable Rails/TimeZone
end

def part_of_taxonomy_tree
TAXON_VALUES_JSON_PATHS
.flat_map { _1.on(document_hash) }
.compact
.uniq
end

def historic?
political = document_hash.dig(:details, :political) || false
government = document_hash.dig(:expanded_links, :government)&.first
Expand Down
41 changes: 30 additions & 11 deletions spec/integration/document_synchronization_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@
is_historic: 0,
government_name: "2015 Conservative government",
content_purpose_supergroup: "news_and_communications",
part_of_taxonomy_tree: %w[37d0fa26-abed-4c74-8835-b3b51ae1c8b2],
part_of_taxonomy_tree: an_array_matching(%w[
37d0fa26-abed-4c74-8835-b3b51ae1c8b2
f3bbdec2-0e62-4520-a7fd-6ffd5d36e03a
]),
organisations: %w[government-digital-service],
locale: "en",
debug: {
Expand Down Expand Up @@ -62,9 +65,12 @@
document_type: "travel_advice",
is_historic: 0,
content_purpose_supergroup: "guidance_and_regulation",
part_of_taxonomy_tree: %w[
8f78544f-a4ed-46b4-8163-889679d119b9 71cd9f51-f492-4c3f-91ca-5ad694c26592
],
part_of_taxonomy_tree: an_array_matching(%w[
91b8ef20-74e7-4552-880c-50e6d73c2ff9
862fdd81-0b52-41c1-9aa0-e208ac86b763
8f78544f-a4ed-46b4-8163-889679d119b9
71cd9f51-f492-4c3f-91ca-5ad694c26592
]),
organisations: %w[foreign-commonwealth-development-office],
locale: "en",
parts: [
Expand Down Expand Up @@ -122,11 +128,13 @@
is_historic: 1,
government_name: "2010 to 2015 Conservative and Liberal Democrat coalition government",
content_purpose_supergroup: "news_and_communications",
part_of_taxonomy_tree: %w[
06ad07f7-1e79-462f-a192-6b2c9d92089c
part_of_taxonomy_tree: an_array_matching(%w[
6e3f3cfb-142a-41f4-a03a-fea504cc1f79
ce9e9802-6138-4fe9-9f33-045ef213be29
f3bbdec2-0e62-4520-a7fd-6ffd5d36e03a
9597c30a-605a-4e36-8bc1-47e5cdae41b3
3dbeb4a3-33c0-4bda-bd21-b721b0f8736f
],
]),
organisations: %w[foreign-commonwealth-office],
locale: "en",
debug: {
Expand Down Expand Up @@ -274,10 +282,12 @@
public_timestamp: 1_372_436_926,
document_type: "worldwide_organisation",
is_historic: 0,
part_of_taxonomy_tree: %w[
part_of_taxonomy_tree: an_array_matching(%w[
862fdd81-0b52-41c1-9aa0-e208ac86b763
f1744c25-bbae-42d5-b0fa-452ccea8f802
91b8ef20-74e7-4552-880c-50e6d73c2ff9
ca97c97d-30c3-4c31-86d5-a84fb37f919a
],
]),
world_locations: %w[austria],
content_purpose_supergroup: "other",
locale: "en",
Expand Down Expand Up @@ -309,7 +319,12 @@
is_historic: 0,
government_name: "2010 to 2015 Conservative and Liberal Democrat coalition government",
content_purpose_supergroup: "research_and_statistics",
part_of_taxonomy_tree: %w[f3caf326-fe33-410f-b7f4-553f4011c81e],
part_of_taxonomy_tree: an_array_matching(%w[
f3bbdec2-0e62-4520-a7fd-6ffd5d36e03a
e48ab80a-de80-4e83-bf59-26316856a5f9
f3f4b5d3-49c4-487b-bd5b-be75f11ec8c5
f3caf326-fe33-410f-b7f4-553f4011c81e
]),
organisations: %w[cabinet-office efficiency-and-reform-group government-digital-service],
locale: "en",
debug: {
Expand Down Expand Up @@ -378,7 +393,11 @@
organisations: %w[foreign-commonwealth-development-office],
document_type: "speech",
is_historic: 0,
part_of_taxonomy_tree: %w[d6dba75a-42bd-4e1e-984c-2bddb6b41951],
part_of_taxonomy_tree: an_array_matching(%w[
f3bbdec2-0e62-4520-a7fd-6ffd5d36e03a
37d0fa26-abed-4c74-8835-b3b51ae1c8b2
d6dba75a-42bd-4e1e-984c-2bddb6b41951
]),
world_locations: %w[usa],
topical_events: %w[her-majesty-queen-elizabeth-ii],
content_purpose_supergroup: "news_and_communications",
Expand Down
37 changes: 34 additions & 3 deletions spec/models/concerns/publishing_api/metadata_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,41 @@
describe "part_of_taxonomy_tree" do
subject(:extracted_part_of_taxonomy_tree) { extracted_metadata[:part_of_taxonomy_tree] }

context "with a set of taxon links" do
let(:document_hash) { { links: { taxons: %w[0000 ffff] } } }
context "with a set of taxon links and their details in expanded links" do
let(:document_hash) do
{
expanded_links: {
taxons: [
{ content_id: "0000" },
{
content_id: "1111",
links: {},
},
{
content_id: "2222",
links: { root_taxon: [{ content_id: "0000" }] },
},
{
content_id: "3333",
links: {
parent_taxons: [
{
content_id: "4444",
links: {
parent_taxons: [
{ content_id: "5555" },
],
},
},
],
},
},
],
},
}
end

it { is_expected.to eq(%w[0000 ffff]) }
it { is_expected.to match_array(%w[0000 1111 2222 3333 4444 5555]) }
end

context "without taxon links" do
Expand Down

0 comments on commit 9cda934

Please sign in to comment.