diff --git a/app/services/discovery_engine/autocomplete/update_denylist.rb b/app/services/discovery_engine/autocomplete/update_denylist.rb new file mode 100644 index 0000000..f428943 --- /dev/null +++ b/app/services/discovery_engine/autocomplete/update_denylist.rb @@ -0,0 +1,60 @@ +module DiscoveryEngine::Autocomplete + # Updates Discovery Engine's autocomplete denylist from Google Cloud Storage + # + # This allows us to remove sensitive terms from any suggestions returned by the autocomplete API, + # based on a JSONL file stored in a Google Cloud Storage bucket. While there is support for an + # inline source, the denylist may contain sensitive terms that we don't want to store in the + # codebase (until we have an agreed organisational approach to private/public repo splits). + # + # Note that an import will not remove existing entries from the denylist, so we purge the list + # before importing. + # + # See https://cloud.google.com/generative-ai-app-builder/docs/configure-autocomplete#denylist + class UpdateDenylist + # The name of the file in the Google Cloud Storage bucket that contains the denylist + FILENAME = "denylist.jsonl".freeze + + # The schema of the data in the JSONL file (this is the only supported option) + DATA_SCHEMA = "suggestion_deny_list".freeze + + def initialize(client: ::Google::Cloud::DiscoveryEngine.completion_service(version: :v1)) + @client = client + end + + def call + purge_operation = client.purge_suggestion_deny_list_entries(parent:) + purge_operation.wait_until_done! + raise purge_operation.results.message if purge_operation.error? + + Rails.logger.info("Successfully purged autocomplete denylist") + + import_operation = client.import_suggestion_deny_list_entries( + gcs_source: { + data_schema: DATA_SCHEMA, + input_uris: ["gs://#{bucket_name}/#{FILENAME}"], + }, + parent:, + ) + import_operation.wait_until_done! + raise import_operation.results.message if import_operation.error? + + failed = import_operation.results.failed_entries_count + raise "Failed to import #{failed} entries to autocomplete denylist" if failed.positive? + + imported = import_operation.results.imported_entries_count + Rails.logger.info("Successfully imported #{imported} entries to autocomplete denylist") + end + + private + + attr_reader :client + + def bucket_name + "#{Rails.configuration.google_cloud_project_id}_vais_artifacts" + end + + def parent + Rails.configuration.discovery_engine_datastore + end + end +end diff --git a/lib/tasks/autocomplete.rake b/lib/tasks/autocomplete.rake new file mode 100644 index 0000000..df0df96 --- /dev/null +++ b/lib/tasks/autocomplete.rake @@ -0,0 +1,6 @@ +namespace :autocomplete do + desc "Trigger a purge and re-import of the autocomplete denylist" + task update_denylist: :environment do + DiscoveryEngine::Autocomplete::UpdateDenylist.new.call + end +end diff --git a/spec/services/discovery_engine/autocomplete/update_denylist_spec.rb b/spec/services/discovery_engine/autocomplete/update_denylist_spec.rb new file mode 100644 index 0000000..8540a04 --- /dev/null +++ b/spec/services/discovery_engine/autocomplete/update_denylist_spec.rb @@ -0,0 +1,77 @@ +RSpec.describe DiscoveryEngine::Autocomplete::UpdateDenylist do + subject(:update_denylist) { described_class.new(client:) } + + let(:client) do + instance_double( + ::Google::Cloud::DiscoveryEngine::V1::CompletionService::Client, + purge_suggestion_deny_list_entries: purge_operation, + import_suggestion_deny_list_entries: import_operation, + ) + end + let(:purge_operation) { instance_double(Gapic::Operation, wait_until_done!: nil, error?: false, results: purge_results) } + let(:purge_results) { double("results") } + let(:import_operation) { instance_double(Gapic::Operation, wait_until_done!: nil, error?: false, results: import_results) } + let(:import_results) { double("results", failed_entries_count: 0, imported_entries_count: 100) } + + before do + allow(Rails.configuration).to receive_messages( + discovery_engine_datastore: "data/store", + google_cloud_project_id: "my-fancy-project", + ) + end + + describe "#call" do + it "purges existing suggestion deny list entries" do + update_denylist.call + + expect(client).to have_received(:purge_suggestion_deny_list_entries) + .with(parent: "data/store") + expect(purge_operation).to have_received(:wait_until_done!) + end + + it "imports new suggestion deny list entries from GCS" do + update_denylist.call + + expect(client).to have_received(:import_suggestion_deny_list_entries).with( + gcs_source: { + data_schema: "suggestion_deny_list", + input_uris: ["gs://my-fancy-project_vais_artifacts/denylist.jsonl"], + }, + parent: "data/store", + ) + expect(import_operation).to have_received(:wait_until_done!) + end + + context "when an error occurs during purge" do + let(:purge_results) { double("results", message: "Purge failed") } + + before do + allow(purge_operation).to receive(:error?).and_return(true) + end + + it "raises an error" do + expect { update_denylist.call }.to raise_error("Purge failed") + end + end + + context "when an error occurs during import" do + let(:import_results) { double("results", message: "Import failed") } + + before do + allow(import_operation).to receive(:error?).and_return(true) + end + + it "raises an error" do + expect { update_denylist.call }.to raise_error("Import failed") + end + end + + context "when there are failed entries during import" do + let(:import_results) { double("results", failed_entries_count: 2, imported_entries_count: 0) } + + it "raises an error" do + expect { update_denylist.call }.to raise_error("Failed to import 2 entries to autocomplete denylist") + end + end + end +end