Skip to content

Commit

Permalink
Merge pull request #355 from alphagov/autocomplete-denylist-import
Browse files Browse the repository at this point in the history
Add service and task to update autocomplete denylist
  • Loading branch information
csutter authored Nov 28, 2024
2 parents 144693b + 38e61d6 commit 1a7edf1
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 0 deletions.
60 changes: 60 additions & 0 deletions app/services/discovery_engine/autocomplete/update_denylist.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
module DiscoveryEngine::Autocomplete
# Updates Discovery Engine's autocomplete denylist from Google Cloud Storage
#
# This allows us to remove sensitive terms from any suggestions returned by the autocomplete API,
# based on a JSONL file stored in a Google Cloud Storage bucket. While there is support for an
# inline source, the denylist may contain sensitive terms that we don't want to store in the
# codebase (until we have an agreed organisational approach to private/public repo splits).
#
# Note that an import will not remove existing entries from the denylist, so we purge the list
# before importing.
#
# See https://cloud.google.com/generative-ai-app-builder/docs/configure-autocomplete#denylist
class UpdateDenylist
# The name of the file in the Google Cloud Storage bucket that contains the denylist
FILENAME = "denylist.jsonl".freeze

# The schema of the data in the JSONL file (this is the only supported option)
DATA_SCHEMA = "suggestion_deny_list".freeze

def initialize(client: ::Google::Cloud::DiscoveryEngine.completion_service(version: :v1))
@client = client
end

def call
purge_operation = client.purge_suggestion_deny_list_entries(parent:)
purge_operation.wait_until_done!
raise purge_operation.results.message if purge_operation.error?

Rails.logger.info("Successfully purged autocomplete denylist")

import_operation = client.import_suggestion_deny_list_entries(
gcs_source: {
data_schema: DATA_SCHEMA,
input_uris: ["gs://#{bucket_name}/#{FILENAME}"],
},
parent:,
)
import_operation.wait_until_done!
raise import_operation.results.message if import_operation.error?

failed = import_operation.results.failed_entries_count
raise "Failed to import #{failed} entries to autocomplete denylist" if failed.positive?

imported = import_operation.results.imported_entries_count
Rails.logger.info("Successfully imported #{imported} entries to autocomplete denylist")
end

private

attr_reader :client

def bucket_name
"#{Rails.configuration.google_cloud_project_id}_vais_artifacts"
end

def parent
Rails.configuration.discovery_engine_datastore
end
end
end
6 changes: 6 additions & 0 deletions lib/tasks/autocomplete.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
namespace :autocomplete do
desc "Trigger a purge and re-import of the autocomplete denylist"
task update_denylist: :environment do
DiscoveryEngine::Autocomplete::UpdateDenylist.new.call
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
RSpec.describe DiscoveryEngine::Autocomplete::UpdateDenylist do
subject(:update_denylist) { described_class.new(client:) }

let(:client) do
instance_double(
::Google::Cloud::DiscoveryEngine::V1::CompletionService::Client,
purge_suggestion_deny_list_entries: purge_operation,
import_suggestion_deny_list_entries: import_operation,
)
end
let(:purge_operation) { instance_double(Gapic::Operation, wait_until_done!: nil, error?: false, results: purge_results) }
let(:purge_results) { double("results") }
let(:import_operation) { instance_double(Gapic::Operation, wait_until_done!: nil, error?: false, results: import_results) }
let(:import_results) { double("results", failed_entries_count: 0, imported_entries_count: 100) }

before do
allow(Rails.configuration).to receive_messages(
discovery_engine_datastore: "data/store",
google_cloud_project_id: "my-fancy-project",
)
end

describe "#call" do
it "purges existing suggestion deny list entries" do
update_denylist.call

expect(client).to have_received(:purge_suggestion_deny_list_entries)
.with(parent: "data/store")
expect(purge_operation).to have_received(:wait_until_done!)
end

it "imports new suggestion deny list entries from GCS" do
update_denylist.call

expect(client).to have_received(:import_suggestion_deny_list_entries).with(
gcs_source: {
data_schema: "suggestion_deny_list",
input_uris: ["gs://my-fancy-project_vais_artifacts/denylist.jsonl"],
},
parent: "data/store",
)
expect(import_operation).to have_received(:wait_until_done!)
end

context "when an error occurs during purge" do
let(:purge_results) { double("results", message: "Purge failed") }

before do
allow(purge_operation).to receive(:error?).and_return(true)
end

it "raises an error" do
expect { update_denylist.call }.to raise_error("Purge failed")
end
end

context "when an error occurs during import" do
let(:import_results) { double("results", message: "Import failed") }

before do
allow(import_operation).to receive(:error?).and_return(true)
end

it "raises an error" do
expect { update_denylist.call }.to raise_error("Import failed")
end
end

context "when there are failed entries during import" do
let(:import_results) { double("results", failed_entries_count: 2, imported_entries_count: 0) }

it "raises an error" do
expect { update_denylist.call }.to raise_error("Failed to import 2 entries to autocomplete denylist")
end
end
end
end

0 comments on commit 1a7edf1

Please sign in to comment.