Skip to content

Commit 38e61d6

Browse files
committed
Add service and task to update autocomplete denylist
The data file for the denylist is stored in a GCP Cloud Storage bucket rather than as configuration in the app itself, as the denylist may contain sensitive language. The plan is to potentially move this into a private gem in the future. In the meantime, this adds a service class to handle the occasionally required task of purging and re-importing the denylist, as well as a Rake task to invoke it.
1 parent 3d69d7c commit 38e61d6

File tree

3 files changed

+143
-0
lines changed

3 files changed

+143
-0
lines changed
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
module DiscoveryEngine::Autocomplete
2+
# Updates Discovery Engine's autocomplete denylist from Google Cloud Storage
3+
#
4+
# This allows us to remove sensitive terms from any suggestions returned by the autocomplete API,
5+
# based on a JSONL file stored in a Google Cloud Storage bucket. While there is support for an
6+
# inline source, the denylist may contain sensitive terms that we don't want to store in the
7+
# codebase (until we have an agreed organisational approach to private/public repo splits).
8+
#
9+
# Note that an import will not remove existing entries from the denylist, so we purge the list
10+
# before importing.
11+
#
12+
# See https://cloud.google.com/generative-ai-app-builder/docs/configure-autocomplete#denylist
13+
class UpdateDenylist
14+
# The name of the file in the Google Cloud Storage bucket that contains the denylist
15+
FILENAME = "denylist.jsonl".freeze
16+
17+
# The schema of the data in the JSONL file (this is the only supported option)
18+
DATA_SCHEMA = "suggestion_deny_list".freeze
19+
20+
def initialize(client: ::Google::Cloud::DiscoveryEngine.completion_service(version: :v1))
21+
@client = client
22+
end
23+
24+
def call
25+
purge_operation = client.purge_suggestion_deny_list_entries(parent:)
26+
purge_operation.wait_until_done!
27+
raise purge_operation.results.message if purge_operation.error?
28+
29+
Rails.logger.info("Successfully purged autocomplete denylist")
30+
31+
import_operation = client.import_suggestion_deny_list_entries(
32+
gcs_source: {
33+
data_schema: DATA_SCHEMA,
34+
input_uris: ["gs://#{bucket_name}/#{FILENAME}"],
35+
},
36+
parent:,
37+
)
38+
import_operation.wait_until_done!
39+
raise import_operation.results.message if import_operation.error?
40+
41+
failed = import_operation.results.failed_entries_count
42+
raise "Failed to import #{failed} entries to autocomplete denylist" if failed.positive?
43+
44+
imported = import_operation.results.imported_entries_count
45+
Rails.logger.info("Successfully imported #{imported} entries to autocomplete denylist")
46+
end
47+
48+
private
49+
50+
attr_reader :client
51+
52+
def bucket_name
53+
"#{Rails.configuration.google_cloud_project_id}_vais_artifacts"
54+
end
55+
56+
def parent
57+
Rails.configuration.discovery_engine_datastore
58+
end
59+
end
60+
end

lib/tasks/autocomplete.rake

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
namespace :autocomplete do
2+
desc "Trigger a purge and re-import of the autocomplete denylist"
3+
task update_denylist: :environment do
4+
DiscoveryEngine::Autocomplete::UpdateDenylist.new.call
5+
end
6+
end
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
RSpec.describe DiscoveryEngine::Autocomplete::UpdateDenylist do
2+
subject(:update_denylist) { described_class.new(client:) }
3+
4+
let(:client) do
5+
instance_double(
6+
::Google::Cloud::DiscoveryEngine::V1::CompletionService::Client,
7+
purge_suggestion_deny_list_entries: purge_operation,
8+
import_suggestion_deny_list_entries: import_operation,
9+
)
10+
end
11+
let(:purge_operation) { instance_double(Gapic::Operation, wait_until_done!: nil, error?: false, results: purge_results) }
12+
let(:purge_results) { double("results") }
13+
let(:import_operation) { instance_double(Gapic::Operation, wait_until_done!: nil, error?: false, results: import_results) }
14+
let(:import_results) { double("results", failed_entries_count: 0, imported_entries_count: 100) }
15+
16+
before do
17+
allow(Rails.configuration).to receive_messages(
18+
discovery_engine_datastore: "data/store",
19+
google_cloud_project_id: "my-fancy-project",
20+
)
21+
end
22+
23+
describe "#call" do
24+
it "purges existing suggestion deny list entries" do
25+
update_denylist.call
26+
27+
expect(client).to have_received(:purge_suggestion_deny_list_entries)
28+
.with(parent: "data/store")
29+
expect(purge_operation).to have_received(:wait_until_done!)
30+
end
31+
32+
it "imports new suggestion deny list entries from GCS" do
33+
update_denylist.call
34+
35+
expect(client).to have_received(:import_suggestion_deny_list_entries).with(
36+
gcs_source: {
37+
data_schema: "suggestion_deny_list",
38+
input_uris: ["gs://my-fancy-project_vais_artifacts/denylist.jsonl"],
39+
},
40+
parent: "data/store",
41+
)
42+
expect(import_operation).to have_received(:wait_until_done!)
43+
end
44+
45+
context "when an error occurs during purge" do
46+
let(:purge_results) { double("results", message: "Purge failed") }
47+
48+
before do
49+
allow(purge_operation).to receive(:error?).and_return(true)
50+
end
51+
52+
it "raises an error" do
53+
expect { update_denylist.call }.to raise_error("Purge failed")
54+
end
55+
end
56+
57+
context "when an error occurs during import" do
58+
let(:import_results) { double("results", message: "Import failed") }
59+
60+
before do
61+
allow(import_operation).to receive(:error?).and_return(true)
62+
end
63+
64+
it "raises an error" do
65+
expect { update_denylist.call }.to raise_error("Import failed")
66+
end
67+
end
68+
69+
context "when there are failed entries during import" do
70+
let(:import_results) { double("results", failed_entries_count: 2, imported_entries_count: 0) }
71+
72+
it "raises an error" do
73+
expect { update_denylist.call }.to raise_error("Failed to import 2 entries to autocomplete denylist")
74+
end
75+
end
76+
end
77+
end

0 commit comments

Comments
 (0)