Skip to content

Commit

Permalink
Refactor quality monitoring; new datasets
Browse files Browse the repository at this point in the history
- Move to using original CSV files to avoid need for YAML conversion
- Make judgement and running more generic to support multiple kinds of
  judgement and treat invariants as one of those
  • Loading branch information
csutter committed Jan 31, 2024
1 parent 59e4ab9 commit d64ebfb
Show file tree
Hide file tree
Showing 16 changed files with 1,511 additions and 397 deletions.
33 changes: 0 additions & 33 deletions app/services/quality_monitoring/check_result_invariants.rb

This file was deleted.

25 changes: 25 additions & 0 deletions app/services/quality_monitoring/dataset_loader.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
require "csv"

module QualityMonitoring
class DatasetLoader
attr_reader :file_path, :data

def initialize(file_path)
@file_path = file_path
@data = Hash.new([])

load_data
end

private

def load_data
CSV.foreach(file_path, headers: true) do
query = _1["query"]
link = _1["link"]

data[query] += [link]
end
end
end
end
37 changes: 37 additions & 0 deletions app/services/quality_monitoring/judge.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
module QualityMonitoring
class Judge
attr_reader :result_links, :expected_links

# Returns a new instance of Judge for a given query and expected links and a cutoff
def self.for_query(query, expected_links, cutoff: 10)
if expected_links.count > cutoff
raise ArgumentError, "cannot have more than cutoff (#{cutoff}) expected links"
end

query_params = { q: query }
result_links = DiscoveryEngine::Query::Search.new(query_params).result_set.results.map(&:link)

new(result_links, expected_links)
end

# Initializes a new instance of Judge for a given set of result links and expected links
def initialize(result_links, expected_links)
@result_links = Array(result_links)
@expected_links = Array(expected_links)

raise ArgumentError, "at least one expected link is required" if expected_links.empty?
end

# Calculates recall (how many of the expected links are in the result links)
def recall
expected_links.count { result_links.include?(_1) }.to_f / expected_links.count
end

# Calculates precision (how many of the result links are in the expected links)
def precision
return 0 if result_links.empty?

result_links.count { expected_links.include?(_1) }.to_f / result_links.count
end
end
end
12 changes: 0 additions & 12 deletions app/services/quality_monitoring/result_invariant.rb

This file was deleted.

4 changes: 0 additions & 4 deletions app/services/quality_monitoring/result_invariant_violation.rb

This file was deleted.

79 changes: 79 additions & 0 deletions app/services/quality_monitoring/runner.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
module QualityMonitoring
class Runner
class FailuresEncountered < StandardError; end

attr_reader :file, :type, :cutoff, :report_query_below_score, :judge_by

def initialize(file, type, cutoff: 10, report_query_below_score: nil, judge_by: :recall)
@file = Pathname.new(file)
@type = type

@cutoff = cutoff
@report_query_below_score = report_query_below_score
@judge_by = judge_by
end

def run
scores = []
failure_details = []

data.each do |query, expected_links|
judge = Judge.for_query(query, expected_links, cutoff:)
score = judge.public_send(judge_by)

scores << score
next unless report_query_below_score && score < report_query_below_score

missing_links = expected_links - judge.result_links
failure_details << <<~DETAIL
'#{query}' #{judge_by}:#{score} is below #{report_query_below_score}, missing:
#{missing_links.join("\n • ")}
DETAIL
rescue StandardError => e
GovukError.notify(e)
end

mean_score = scores.sum / scores.size.to_f
Rails.logger.info(
sprintf(
"[%s] Completed run for %s dataset %s with %s:%f",
self.class.name,
type,
dataset_name,
judge_by,
mean_score,
),
)
# TODO: Send to Prometheus as `quality_monitoring_score{dataset: dataset_name, type: type}`

if failure_details.any?
Rails.logger.warn(
sprintf(
"[%s] %d failure(s) encountered for %s dataset %s\n%s",
self.class.name,
failure_details.size,
type,
dataset_name,
failure_details.join("\n"),
),
)

err = FailuresEncountered.new(
"Quality monitoring: #{failure_details.size} failures encountered " \
"for #{type} dataset #{dataset_name}",
)
GovukError.notify(err, extra: { dataset_name:, type:, failure_details: })
end
end

private

def dataset_name
file.basename(".csv").to_s
end

def data
@data ||= DatasetLoader.new(file).data
end
end
end
3 changes: 0 additions & 3 deletions config/application.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,5 @@ class Application < Rails::Application

# Query configuration
config.best_bets = config_for(:best_bets)

# Quality monitoring configuration
config.result_invariants = config_for(:result_invariants)
end
end
45 changes: 45 additions & 0 deletions config/quality_monitoring_datasets/invariants/self_assessment.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
query,link
self assessment,/log-in-file-self-assessment-tax-return
personal tax account,/personal-tax-account
self assessment,/self-assessment-tax-returns
sign in,/log-in-register-hmrc-online-services
hmrc,/log-in-register-hmrc-online-services
gateway,/log-in-register-hmrc-online-services
tax return,/self-assessment-tax-returns
log in,/log-in-register-hmrc-online-services
self assesment,/log-in-file-self-assessment-tax-return
utr,/find-utr-number
login,/log-in-register-hmrc-online-services
government gateway,/log-in-register-hmrc-online-services
tax,/log-in-register-hmrc-online-services
tax,/log-in-file-self-assessment-tax-return
sa100,/self-assessment-tax-return-forms
tax return,/log-in-file-self-assessment-tax-return
personal tax,/log-in-register-hmrc-online-services
self assessment,/pay-self-assessment-tax-bill
pta,/personal-tax-account
self assessment,/log-in-register-hmrc-online-services
utr number,/find-utr-number
self assesment,/self-assessment-tax-returns
hmrc sign in,/log-in-register-hmrc-online-services
tax refund,/claim-tax-refund
self assessment tax return,/log-in-file-self-assessment-tax-return
self,/log-in-file-self-assessment-tax-return
pay self assessment,/pay-self-assessment-tax-bill
personal tax,/personal-tax-account
tax rebate,/claim-tax-refund
register for self assessment,/register-for-self-assessment
sa1,/guidance/register-for-self-assessment-if-you-are-not-self-employed
pay tax,/pay-self-assessment-tax-bill
tax account,/log-in-register-hmrc-online-services
hmrc login,/log-in-register-hmrc-online-services
personal tax account,/log-in-register-hmrc-online-services
paying hmrc,/pay-tax-debit-credit-card
register for self assessment,/guidance/register-for-self-assessment-if-you-are-not-self-employed
contact hmrc,/government/organisations/hm-revenue-customs/contact/self-assessment
tell hmrc about underpaid tax from previous years,/guidance/tell-hmrc-about-underpaid-tax-from-previous-years
sa100,/government/publications/self-assessment-tax-return-sa100
hmrc services,/log-in-register-hmrc-online-services
64-8,/government/publications/tax-agents-and-advisers-authorising-your-agent-64-8
tax,/self-assessment-tax-returns
self assessment login,/log-in-file-self-assessment-tax-return
Loading

0 comments on commit d64ebfb

Please sign in to comment.