Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add service and task to update autocomplete denylist #355

Merged
merged 1 commit into from
Nov 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions app/services/discovery_engine/autocomplete/update_denylist.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
module DiscoveryEngine::Autocomplete
# Updates Discovery Engine's autocomplete denylist from Google Cloud Storage
#
# This allows us to remove sensitive terms from any suggestions returned by the autocomplete API,
# based on a JSONL file stored in a Google Cloud Storage bucket. While there is support for an
# inline source, the denylist may contain sensitive terms that we don't want to store in the
# codebase (until we have an agreed organisational approach to private/public repo splits).
#
# Note that an import will not remove existing entries from the denylist, so we purge the list
# before importing.
#
# See https://cloud.google.com/generative-ai-app-builder/docs/configure-autocomplete#denylist
class UpdateDenylist
# The name of the file in the Google Cloud Storage bucket that contains the denylist
FILENAME = "denylist.jsonl".freeze

# The schema of the data in the JSONL file (this is the only supported option)
DATA_SCHEMA = "suggestion_deny_list".freeze

def initialize(client: ::Google::Cloud::DiscoveryEngine.completion_service(version: :v1))
@client = client
end

def call
purge_operation = client.purge_suggestion_deny_list_entries(parent:)
purge_operation.wait_until_done!
raise purge_operation.results.message if purge_operation.error?

Rails.logger.info("Successfully purged autocomplete denylist")

import_operation = client.import_suggestion_deny_list_entries(
gcs_source: {
data_schema: DATA_SCHEMA,
input_uris: ["gs://#{bucket_name}/#{FILENAME}"],
},
parent:,
)
import_operation.wait_until_done!
raise import_operation.results.message if import_operation.error?

failed = import_operation.results.failed_entries_count
raise "Failed to import #{failed} entries to autocomplete denylist" if failed.positive?

imported = import_operation.results.imported_entries_count
Rails.logger.info("Successfully imported #{imported} entries to autocomplete denylist")
end

private

attr_reader :client

def bucket_name
"#{Rails.configuration.google_cloud_project_id}_vais_artifacts"
end

def parent
Rails.configuration.discovery_engine_datastore
end
end
end
6 changes: 6 additions & 0 deletions lib/tasks/autocomplete.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
namespace :autocomplete do
desc "Trigger a purge and re-import of the autocomplete denylist"
task update_denylist: :environment do
DiscoveryEngine::Autocomplete::UpdateDenylist.new.call
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
RSpec.describe DiscoveryEngine::Autocomplete::UpdateDenylist do
subject(:update_denylist) { described_class.new(client:) }

let(:client) do
instance_double(
::Google::Cloud::DiscoveryEngine::V1::CompletionService::Client,
purge_suggestion_deny_list_entries: purge_operation,
import_suggestion_deny_list_entries: import_operation,
)
end
let(:purge_operation) { instance_double(Gapic::Operation, wait_until_done!: nil, error?: false, results: purge_results) }
let(:purge_results) { double("results") }
let(:import_operation) { instance_double(Gapic::Operation, wait_until_done!: nil, error?: false, results: import_results) }
let(:import_results) { double("results", failed_entries_count: 0, imported_entries_count: 100) }

before do
allow(Rails.configuration).to receive_messages(
discovery_engine_datastore: "data/store",
google_cloud_project_id: "my-fancy-project",
)
end

describe "#call" do
it "purges existing suggestion deny list entries" do
update_denylist.call

expect(client).to have_received(:purge_suggestion_deny_list_entries)
.with(parent: "data/store")
expect(purge_operation).to have_received(:wait_until_done!)
end

it "imports new suggestion deny list entries from GCS" do
update_denylist.call

expect(client).to have_received(:import_suggestion_deny_list_entries).with(
gcs_source: {
data_schema: "suggestion_deny_list",
input_uris: ["gs://my-fancy-project_vais_artifacts/denylist.jsonl"],
},
parent: "data/store",
)
expect(import_operation).to have_received(:wait_until_done!)
end

context "when an error occurs during purge" do
let(:purge_results) { double("results", message: "Purge failed") }

before do
allow(purge_operation).to receive(:error?).and_return(true)
end

it "raises an error" do
expect { update_denylist.call }.to raise_error("Purge failed")
end
end

context "when an error occurs during import" do
let(:import_results) { double("results", message: "Import failed") }

before do
allow(import_operation).to receive(:error?).and_return(true)
end

it "raises an error" do
expect { update_denylist.call }.to raise_error("Import failed")
end
end

context "when there are failed entries during import" do
let(:import_results) { double("results", failed_entries_count: 2, imported_entries_count: 0) }

it "raises an error" do
expect { update_denylist.call }.to raise_error("Failed to import 2 entries to autocomplete denylist")
end
end
end
end