Skip to content

Commit

Permalink
Merge pull request #384 from alphagov/models
Browse files Browse the repository at this point in the history
Refactor Discovery Engine resource handling
  • Loading branch information
csutter authored Feb 7, 2025
2 parents 2da041c + 4d6e809 commit 9bc12ba
Show file tree
Hide file tree
Showing 24 changed files with 185 additions and 33 deletions.
4 changes: 1 addition & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,7 @@ jobs:
RAILS_ENV: test
# All Google client library calls are mocked, but the application needs this set to boot
GOOGLE_CLOUD_PROJECT_ID: not-used
DISCOVERY_ENGINE_SERVING_CONFIG: not-used
DISCOVERY_ENGINE_DATASTORE: not-used
DISCOVERY_ENGINE_DATASTORE_BRANCH: not-used
DISCOVERY_ENGINE_DEFAULT_COLLECTION_NAME: not-used
# Redis running through govuk-infrastructure action
REDIS_URL: redis://localhost:6379
steps:
Expand Down
21 changes: 21 additions & 0 deletions app/models/branch.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Represents a branch on a Discovery Engine data store.
#
# Currently, every data store on Discovery Engine has exactly *one* branch (the default branch), and
# we are not able to make any changes to that resource. However, we still need to model it here
# because documents are children of the branch, not the datastore itself.
#
# see https://cloud.google.com/ruby/docs/reference/google-cloud-discovery_engine-v1/latest/Google-Cloud-DiscoveryEngine-V1-DocumentService-Client
# (there is no documentation specific to branches)
Branch = Data.define(:remote_resource_id) do
include DiscoveryEngineNameable

# The default branch automatically available on a data store
def self.default
new("default_branch")
end

def parent
# We only use a single data store in our architecture, so we can hardcode it here.
DataStore.default
end
end
31 changes: 31 additions & 0 deletions app/models/concerns/discovery_engine_nameable.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Enhances models with a `#name` method returning their fully qualified Google Cloud Platform
# resource name (like a path).
#
# For example, for a `Control`, this would be:
# projects/{project}/locations/{location}/collections/{collection_id}/engines/
# {engine_id}/controls/{control_id}
#
# Requires the including class to implement `#remote_resource_id`, and optionally `#parent` if the
# parent resource is not the default collection.
module DiscoveryEngineNameable
# The name (fully qualified path) of this Discovery Engine resource on GCP
def name
[parent_name, resource_path_fragment, remote_resource_id].join("/")
end

private

def resource_path_fragment
# For example: `DataStore` -> `dataStores`
self.class.name.downcase_first.pluralize
end

def parent_name
if respond_to?(:parent)
parent.name
else
# The default collection is the parent of all root-level resources
Rails.configuration.discovery_engine_default_collection_name
end
end
end
15 changes: 15 additions & 0 deletions app/models/data_store.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Represents a data store on Discovery Engine.
#
# A data store contains the indexed documents that can be searched through an engine.
#
# Our architecture currently only has a single data store, so we do not need the ability to manage
# data stores through Search Admin.
#
# see https://cloud.google.com/ruby/docs/reference/google-cloud-discovery_engine-v1/latest/Google-Cloud-DiscoveryEngine-V1-DataStore
DataStore = Data.define(:remote_resource_id) do
include DiscoveryEngineNameable

def self.default
new("govuk_content")
end
end
18 changes: 18 additions & 0 deletions app/models/engine.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Represents an engine on Discovery Engine.
#
# An engine (called "app" in the Google Cloud UI) is an abstraction over the data stores that
# contain our searchable documents, and is used for querying. It is the parent resource of several
# other resources such as controls and serving configs.
#
# Our architecture currently only has a single engine, so we do not need the ability to manage
# engines through Search Admin.
#
# see https://cloud.google.com/ruby/docs/reference/google-cloud-discovery_engine-v1/latest/Google-Cloud-DiscoveryEngine-V1-Engine
Engine = Data.define(:remote_resource_id) do
include DiscoveryEngineNameable

# The default engine created through Terraform in `govuk-infrastructure`
def self.default
new("govuk")
end
end
21 changes: 21 additions & 0 deletions app/models/serving_config.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Represents a serving config on Discovery Engine.
#
# A serving config is an endpoint on an engine that can be used for
# querying. Each serving config can have different configuration (in particular, different sets of
# active controls), which allows us to test out new configuration changes outside of the default
# serving config.
#
# see https://cloud.google.com/ruby/docs/reference/google-cloud-discovery_engine-v1beta/latest/Google-Cloud-DiscoveryEngine-V1beta-ServingConfig
ServingConfig = Data.define(:remote_resource_id) do
include DiscoveryEngineNameable

# The default serving config automatically available on an engine
def self.default
new("default_search")
end

def parent
# We only use a single engine in our architecture, so we can hardcode it here.
Engine.default
end
end
6 changes: 1 addition & 5 deletions app/services/discovery_engine/autocomplete/complete.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,10 @@ def suggestions

def complete_query_request
{
data_store:,
data_store: DataStore.default.name,
query:,
query_model: QUERY_MODEL,
}
end

def data_store
Rails.configuration.discovery_engine_datastore
end
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def bucket_name
end

def parent
Rails.configuration.discovery_engine_datastore
DataStore.default.name
end
end
end
2 changes: 1 addition & 1 deletion app/services/discovery_engine/query/search.rb
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def query
end

def serving_config
Rails.configuration.discovery_engine_serving_config
ServingConfig.default.name
end

def page_size
Expand Down
2 changes: 1 addition & 1 deletion app/services/discovery_engine/sync/operation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def version_cache
end

def document_name
"#{Rails.configuration.discovery_engine_datastore_branch}/documents/#{content_id}"
[Branch.default.name, "documents", content_id].join("/")
end

def log(level, message)
Expand Down
2 changes: 1 addition & 1 deletion app/services/discovery_engine/user_events/import.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def call
table_id:,
partition_date:,
},
parent: Rails.configuration.discovery_engine_datastore,
parent: DataStore.default.name,
)

logger.info("Waiting for import_user_events operation to finish remotely")
Expand Down
4 changes: 1 addition & 3 deletions config/application.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@ class Application < Rails::Application
config.api_only = true

# Google Discovery Engine configuration
config.discovery_engine_serving_config = ENV.fetch("DISCOVERY_ENGINE_SERVING_CONFIG")
config.discovery_engine_datastore = ENV.fetch("DISCOVERY_ENGINE_DATASTORE")
config.discovery_engine_datastore_branch = ENV.fetch("DISCOVERY_ENGINE_DATASTORE_BRANCH")
config.discovery_engine_default_collection_name = ENV.fetch("DISCOVERY_ENGINE_DEFAULT_COLLECTION_NAME")
config.google_cloud_project_id = ENV.fetch("GOOGLE_CLOUD_PROJECT_ID")

# Document sync configuration
Expand Down
3 changes: 3 additions & 0 deletions config/environments/test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@

# Raise error when a before_action's only/except options reference missing actions.
config.action_controller.raise_on_missing_callback_actions = true

# Google Discovery Engine configuration
config.discovery_engine_default_collection_name = "[collection]"
end

# TODO: remove this workaround once GovukPrometheusExporter initialisation is fixed in govuk_app_config.
Expand Down
15 changes: 15 additions & 0 deletions spec/models/branch_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
RSpec.describe Branch do
subject(:branch) { described_class.new("my-branch") }

describe ".default" do
it "returns the default branch" do
expect(described_class.default).to eq(described_class.new("default_branch"))
end
end

describe "#name" do
it "returns the fully qualified name of the branch" do
expect(branch.name).to eq("[collection]/dataStores/govuk_content/branches/my-branch")
end
end
end
15 changes: 15 additions & 0 deletions spec/models/data_store_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
RSpec.describe DataStore do
subject(:data_store) { described_class.new("my-data-store") }

describe ".default" do
it "returns the default data store" do
expect(described_class.default).to eq(described_class.new("govuk_content"))
end
end

describe "#name" do
it "returns the fully qualified name of the data store" do
expect(data_store.name).to eq("[collection]/dataStores/my-data-store")
end
end
end
15 changes: 15 additions & 0 deletions spec/models/engine_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
RSpec.describe Engine do
subject(:engine) { described_class.new("my-engine") }

describe ".default" do
it "returns the default engine" do
expect(described_class.default).to eq(described_class.new("govuk"))
end
end

describe "#name" do
it "returns the fully qualified name of the engine" do
expect(engine.name).to eq("[collection]/engines/my-engine")
end
end
end
15 changes: 15 additions & 0 deletions spec/models/serving_config_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
RSpec.describe ServingConfig do
subject(:serving_config) { described_class.new("my-serving-config") }

describe ".default" do
it "returns the default serving config" do
expect(described_class.default).to eq(described_class.new("default_search"))
end
end

describe "#name" do
it "returns the fully qualified name of the serving config" do
expect(serving_config.name).to eq("[collection]/engines/govuk/servingConfigs/my-serving-config")
end
end
end
6 changes: 1 addition & 5 deletions spec/services/discovery_engine/autocomplete/complete_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,6 @@

let(:client) { double("completion service", complete_query:) }

before do
allow(Rails.configuration).to receive(:discovery_engine_datastore).and_return("/the/datastore")
end

describe "#completion_result" do
subject(:completion_result) { completion.completion_result }

Expand All @@ -22,7 +18,7 @@
completion_result

expect(client).to have_received(:complete_query).with(
data_store: "/the/datastore",
data_store: DataStore.default.name,
query:,
query_model: "user-event",
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

before do
allow(Rails.configuration).to receive_messages(
discovery_engine_datastore: "data/store",
google_cloud_project_id: "my-fancy-project",
)
end
Expand All @@ -25,7 +24,7 @@
update_denylist.call

expect(client).to have_received(:purge_suggestion_deny_list_entries)
.with(parent: "data/store")
.with(parent: DataStore.default.name)
expect(purge_operation).to have_received(:wait_until_done!)
end

Expand All @@ -37,7 +36,7 @@
data_schema: "suggestion_deny_list",
input_uris: ["gs://my-fancy-project_vais_artifacts/denylist.jsonl"],
},
parent: "data/store",
parent: DataStore.default.name,
)
expect(import_operation).to have_received(:wait_until_done!)
end
Expand Down
6 changes: 2 additions & 4 deletions spec/services/discovery_engine/query/search_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
end

before do
allow(Rails.configuration).to receive(:discovery_engine_serving_config)
.and_return("serving-config-path")
allow(DiscoveryEngine::Query::Filters).to receive(:new).and_return(filters)
end

Expand Down Expand Up @@ -52,7 +50,7 @@

it "calls the client with the expected parameters" do
expect(client).to have_received(:search).with(
serving_config: "serving-config-path",
serving_config: ServingConfig.default.name,
query: "garden centres",
offset: 0,
page_size: 10,
Expand All @@ -76,7 +74,7 @@

it "calls the client with the expected parameters" do
expect(client).to have_received(:search).with(
serving_config: "serving-config-path",
serving_config: ServingConfig.default.name,
query: "garden centres",
offset: 11,
page_size: 22,
Expand Down
2 changes: 1 addition & 1 deletion spec/services/discovery_engine/sync/delete_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

it "deletes the document" do
expect(client).to have_received(:delete_document)
.with(name: "branch/documents/some_content_id")
.with(name: "#{Branch.default.name}/documents/some_content_id")
end

it_behaves_like "a successful sync operation", "delete"
Expand Down
2 changes: 1 addition & 1 deletion spec/services/discovery_engine/sync/put_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
expect(client).to have_received(:update_document).with(
document: {
id: "some_content_id",
name: "branch/documents/some_content_id",
name: "#{Branch.default.name}/documents/some_content_id",
json_data: "{\"foo\":\"bar\",\"payload_version\":\"1\"}",
content: {
mime_type: "text/html",
Expand Down
1 change: 0 additions & 1 deletion spec/services/discovery_engine/sync/shared_examples.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
before do
allow(Kernel).to receive(:sleep).and_return(nil)
allow(Rails).to receive(:logger).and_return(logger)
allow(Rails.configuration).to receive(:discovery_engine_datastore_branch).and_return("branch")
allow(GovukError).to receive(:notify)

allow(Coordination::DocumentLock).to receive(:new).with("some_content_id").and_return(lock)
Expand Down
5 changes: 2 additions & 3 deletions spec/services/discovery_engine/user_events/import_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

before do
allow(Rails.configuration).to receive_messages(
discovery_engine_datastore: "data/store",
google_cloud_project_id: "my-fancy-project",
)
end
Expand Down Expand Up @@ -55,7 +54,7 @@
table_id: "search-event",
partition_date: Google::Type::Date.new(year: 2000, month: 1, day: 1),
},
parent: "data/store",
parent: DataStore.default.name,
)
end
end
Expand All @@ -71,7 +70,7 @@
table_id: "search-intraday-event",
partition_date: Google::Type::Date.new(year: 1989, month: 12, day: 13),
},
parent: "data/store",
parent: DataStore.default.name,
)
end
end
Expand Down

0 comments on commit 9bc12ba

Please sign in to comment.