Skip to content

Commit

Permalink
Add service and task to update autocomplete denylist
Browse files Browse the repository at this point in the history
The data file for the denylist is stored in a GCP Cloud Storage bucket
rather than as configuration in the app itself, as the denylist may
contain sensitive language. The plan is to potentially move this into a
private gem in the future.

In the meantime, this adds a service class to handle the occasionally
required task of purging and re-importing the denylist, as well as a
Rake task to invoke it.
  • Loading branch information
csutter committed Nov 28, 2024
1 parent 3d69d7c commit 38e61d6
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 0 deletions.
60 changes: 60 additions & 0 deletions app/services/discovery_engine/autocomplete/update_denylist.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
module DiscoveryEngine::Autocomplete
# Updates Discovery Engine's autocomplete denylist from Google Cloud Storage
#
# This allows us to remove sensitive terms from any suggestions returned by the autocomplete API,
# based on a JSONL file stored in a Google Cloud Storage bucket. While there is support for an
# inline source, the denylist may contain sensitive terms that we don't want to store in the
# codebase (until we have an agreed organisational approach to private/public repo splits).
#
# Note that an import will not remove existing entries from the denylist, so we purge the list
# before importing.
#
# See https://cloud.google.com/generative-ai-app-builder/docs/configure-autocomplete#denylist
class UpdateDenylist
# The name of the file in the Google Cloud Storage bucket that contains the denylist
FILENAME = "denylist.jsonl".freeze

# The schema of the data in the JSONL file (this is the only supported option)
DATA_SCHEMA = "suggestion_deny_list".freeze

def initialize(client: ::Google::Cloud::DiscoveryEngine.completion_service(version: :v1))
@client = client
end

def call
purge_operation = client.purge_suggestion_deny_list_entries(parent:)
purge_operation.wait_until_done!
raise purge_operation.results.message if purge_operation.error?

Rails.logger.info("Successfully purged autocomplete denylist")

import_operation = client.import_suggestion_deny_list_entries(
gcs_source: {
data_schema: DATA_SCHEMA,
input_uris: ["gs://#{bucket_name}/#{FILENAME}"],
},
parent:,
)
import_operation.wait_until_done!
raise import_operation.results.message if import_operation.error?

failed = import_operation.results.failed_entries_count
raise "Failed to import #{failed} entries to autocomplete denylist" if failed.positive?

imported = import_operation.results.imported_entries_count
Rails.logger.info("Successfully imported #{imported} entries to autocomplete denylist")
end

private

attr_reader :client

def bucket_name
"#{Rails.configuration.google_cloud_project_id}_vais_artifacts"
end

def parent
Rails.configuration.discovery_engine_datastore
end
end
end
6 changes: 6 additions & 0 deletions lib/tasks/autocomplete.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
namespace :autocomplete do
desc "Trigger a purge and re-import of the autocomplete denylist"
task update_denylist: :environment do
DiscoveryEngine::Autocomplete::UpdateDenylist.new.call
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
RSpec.describe DiscoveryEngine::Autocomplete::UpdateDenylist do
subject(:update_denylist) { described_class.new(client:) }

let(:client) do
instance_double(
::Google::Cloud::DiscoveryEngine::V1::CompletionService::Client,
purge_suggestion_deny_list_entries: purge_operation,
import_suggestion_deny_list_entries: import_operation,
)
end
let(:purge_operation) { instance_double(Gapic::Operation, wait_until_done!: nil, error?: false, results: purge_results) }
let(:purge_results) { double("results") }
let(:import_operation) { instance_double(Gapic::Operation, wait_until_done!: nil, error?: false, results: import_results) }
let(:import_results) { double("results", failed_entries_count: 0, imported_entries_count: 100) }

before do
allow(Rails.configuration).to receive_messages(
discovery_engine_datastore: "data/store",
google_cloud_project_id: "my-fancy-project",
)
end

describe "#call" do
it "purges existing suggestion deny list entries" do
update_denylist.call

expect(client).to have_received(:purge_suggestion_deny_list_entries)
.with(parent: "data/store")
expect(purge_operation).to have_received(:wait_until_done!)
end

it "imports new suggestion deny list entries from GCS" do
update_denylist.call

expect(client).to have_received(:import_suggestion_deny_list_entries).with(
gcs_source: {
data_schema: "suggestion_deny_list",
input_uris: ["gs://my-fancy-project_vais_artifacts/denylist.jsonl"],
},
parent: "data/store",
)
expect(import_operation).to have_received(:wait_until_done!)
end

context "when an error occurs during purge" do
let(:purge_results) { double("results", message: "Purge failed") }

before do
allow(purge_operation).to receive(:error?).and_return(true)
end

it "raises an error" do
expect { update_denylist.call }.to raise_error("Purge failed")
end
end

context "when an error occurs during import" do
let(:import_results) { double("results", message: "Import failed") }

before do
allow(import_operation).to receive(:error?).and_return(true)
end

it "raises an error" do
expect { update_denylist.call }.to raise_error("Import failed")
end
end

context "when there are failed entries during import" do
let(:import_results) { double("results", failed_entries_count: 2, imported_entries_count: 0) }

it "raises an error" do
expect { update_denylist.call }.to raise_error("Failed to import 2 entries to autocomplete denylist")
end
end
end
end

0 comments on commit 38e61d6

Please sign in to comment.