From 9dca95ad651407159f71ab655a8b645ed672c26b Mon Sep 17 00:00:00 2001 From: Matt Hinz Date: Thu, 12 Dec 2024 09:06:53 -0800 Subject: [PATCH] [HACKATHON] Initial crack at an IdV matcher (#11624) * Initial crack at an IdV matcher Matcher is a state machine that collects IDV "attempts" as they happen and tries to suss out interesting things about them. [skip changelog] * removed unused method --------- Co-authored-by: Douglas Price --- bin/summarize-user-events | 15 +- lib/event_summarizer/idv_matcher.rb | 436 ++++++++++++++++++ .../vendor_result_evaluators/aamva.rb | 129 ++++++ .../instant_verify.rb | 83 ++++ 4 files changed, 653 insertions(+), 10 deletions(-) create mode 100644 lib/event_summarizer/idv_matcher.rb create mode 100644 lib/event_summarizer/vendor_result_evaluators/aamva.rb create mode 100644 lib/event_summarizer/vendor_result_evaluators/instant_verify.rb diff --git a/bin/summarize-user-events b/bin/summarize-user-events index 7b780276bfe..0b4443cfb68 100755 --- a/bin/summarize-user-events +++ b/bin/summarize-user-events @@ -23,9 +23,10 @@ Dir[File.expand_path( end class SummarizeUserEvents - attr_reader :uuid, :from_date, :to_date + attr_reader :uuid, :from_date, :to_date, :zone def initialize(user_uuid: nil, start_time: nil, end_time: nil, zone: 'UTC') + @zone = zone Time.zone = zone @uuid = user_uuid @from_date = parse_time(start_time) || 1.week.ago @@ -41,11 +42,14 @@ class SummarizeUserEvents def matchers @matchers ||= [ EventSummarizer::ExampleMatcher.new, + EventSummarizer::IdvMatcher.new ] end def run find_cloudwatch_events do |event| + Time.zone ||= zone + event['@message'] = JSON.parse(event['@message']) if event['@message'].is_a?(String) matchers.each do |matcher| @@ -90,15 +94,6 @@ class SummarizeUserEvents QUERY end - def find_events(&block) - warn "$stdin.tty? = #{$stdin.tty?}" - if $stdin.tty? - cloudwatch_source(&block) - else - stdin_source(&block) - end - end - def cloudwatch_client @cloudwatch_client ||= Reporting::CloudwatchClient.new( num_threads: 5, diff --git a/lib/event_summarizer/idv_matcher.rb b/lib/event_summarizer/idv_matcher.rb new file mode 100644 index 00000000000..e547ffdf8be --- /dev/null +++ b/lib/event_summarizer/idv_matcher.rb @@ -0,0 +1,436 @@ +# frozen_string_literal: true + +require 'active_support' +require 'active_support/time' + +require 'event_summarizer/vendor_result_evaluators/aamva' +require 'event_summarizer/vendor_result_evaluators/instant_verify' + +module EventSummarizer + class IdvMatcher + IDV_WELCOME_SUBMITTED_EVENT = 'IdV: doc auth welcome submitted' + IDV_GPO_CODE_SUBMITTED_EVENT = 'IdV: enter verify by mail code submitted' + IDV_FINAL_RESOLUTION_EVENT = 'IdV: final resolution' + IDV_VERIFY_PROOFING_RESULTS_EVENT = 'IdV: doc auth verify proofing results' + IPP_ENROLLMENT_STATUS_UPDATED_EVENT = 'GetUspsProofingResultsJob: Enrollment status updated' + PROFILE_ENCRYPTION_INVALID_EVENT = 'Profile Encryption: Invalid' + RATE_LIMIT_REACHED_EVENT = 'Rate Limit Reached' + + EVENT_PROPERTIES = ['@message', 'properties', 'event_properties'].freeze + + VENDORS = { + 'lexisnexis:instant_verify' => { + id: :instant_verify, + name: 'Instant Verify', + evaluator_module: EventSummarizer::VendorResultEvaluators::InstantVerify, + }, + 'aamva:state_id' => { + id: :aamva, + name: 'AAMVA', + evaluator_module: EventSummarizer::VendorResultEvaluators::Aamva, + }, + }.freeze + + UNKNOWN_VENDOR = { + id: :unknown, + name: 'Unknown vendor', + }.freeze + + IdvAttempt = Data.define( + :started_at, + :significant_events, + ) do + def initialize(started_at:, significant_events: []) + super(started_at:, significant_events:) + end + + def gpo? + self.significant_events.any? { |e| e.type == :start_gpo } + end + + def ipp? + self.significant_events.any? { |e| e.type == :start_ipp } + end + + def successful? + self.significant_events.any? do |e| + e.type == :identity_verified + end + end + end.freeze + + SignificantIdvEvent = Data.define( + :timestamp, + :type, + :description, + ).freeze + + attr_reader :current_idv_attempt + attr_reader :idv_attempts + + # @return {Hash,nil} + def handle_cloudwatch_event(event) + @idv_attempts ||= [] + + case event['name'] + when IDV_WELCOME_SUBMITTED_EVENT + start_new_idv_attempt(event:) + + when IDV_FINAL_RESOLUTION_EVENT + for_current_idv_attempt(event:) do + handle_final_resolution_event(event:) + end + + when IDV_GPO_CODE_SUBMITTED_EVENT + for_current_idv_attempt(event:) do + handle_gpo_code_submission(event:) + end + + when IPP_ENROLLMENT_STATUS_UPDATED_EVENT + for_current_idv_attempt(event:) do + handle_ipp_enrollment_status_update(event:) + end + + when IDV_VERIFY_PROOFING_RESULTS_EVENT + for_current_idv_attempt(event:) do + handle_verify_proofing_results_event(event:) + end + + when PROFILE_ENCRYPTION_INVALID_EVENT + for_current_idv_attempt(event:) do + handle_profile_encryption_error(event:) + end + + when RATE_LIMIT_REACHED_EVENT + handle_rate_limit_reached(event:) + + else + warn event['name'] if ENV['LOG_UNHANDLED_EVENTS'] + end + end + + def finish + finish_current_idv_attempt + + self.idv_attempts.map { |a| summarize_idv_attempt(a) }.tap do + idv_attempts.clear + end + end + + private + + def add_significant_event( + timestamp:, + type:, + description: + ) + current_idv_attempt.significant_events << SignificantIdvEvent.new( + timestamp:, + type:, + description:, + ) + end + + def for_current_idv_attempt(event:, &block) + if !current_idv_attempt + warn <<~WARNING + Encountered #{event['name']} without seeing a '#{IDV_WELCOME_SUBMITTED_EVENT}' event first. + This could indicate you need to include earlier events in your request. + WARNING + return + end + + block.call(event) + end + + def finish_current_idv_attempt + idv_attempts << current_idv_attempt if current_idv_attempt + @current_idv_attempt = nil + end + + # @return {Hash,nil} + def handle_final_resolution_event(event:) + timestamp = Time.zone.parse(event['@timestamp']) + + gpo_pending = !!event.dig( + *EVENT_PROPERTIES, + 'gpo_verification_pending', + ) + + if gpo_pending + add_significant_event( + type: :start_gpo, + timestamp:, + description: 'User requested a letter to verfy by mail', + ) + end + + ipp_pending = !!event.dig( + *EVENT_PROPERTIES, + 'in_person_verification_pending', + ) + + if ipp_pending + add_significant_event( + type: :start_ipp, + timestamp:, + descirption: 'User entered the in-person proofing flow', + ) + end + + fraud_review_pending = !!event.dig( + *EVENT_PROPERTIES, + 'fraud_review_pending', + ) + + if fraud_review_pending + add_significant_event( + type: :flagged_for_fraud, + timestamp:, + description: 'User was flagged for fraud', + ) + end + + pending = + gpo_pending || + ipp_pending || + fraud_review_pending + + if !pending + add_significant_event( + type: :verified, + timestamp:, + description: 'User completed identity verification (remote unsupervised flow)', + ) + + finish_current_idv_attempt + end + end + + def handle_gpo_code_submission(event:) + timestamp = Time.zone.parse(event['@timestamp']) + success = event.dig(*EVENT_PROPERTIES, 'success') + + if !success + add_significant_event( + type: :gpo_code_failure, + timestamp:, + description: 'The user entered an invalid GPO code', + ) + return + end + + # User successfully entered GPO code. If nothing else is pending, + # then they are fully verified + + ipp_pending = !!event.dig( + *EVENT_PROPERTIES, + 'pending_in_person_enrollment', + ) + + fraud_review_pending = !!event.dig( + *EVENT_PROPERTIES, + 'fraud_check_failed', + ) + + fully_verified = !(ipp_pending || fraud_review_pending) + + description = ipp_pending ? + 'User successfully entered a GPO code, but is still pending in-person proofing' + : 'User successfully entered a GPO code' + + add_significant_event( + type: :gpo_code_success, + timestamp:, + description:, + ) + + if fully_verified + add_significant_event( + type: :verified, + timestamp:, + description: 'User completed identity verification', + ) + + finish_current_idv_attempt + end + end + + def handle_ipp_enrollment_status_update(event:) + timestamp = Time.zone.parse(event['@timestamp']) + passed = event.dig(*EVENT_PROPERTIES, 'passed') + tmx_status = event.dig(*EVENT_PROPERTIES, 'tmx_status') + + return if !passed + + add_significant_event( + type: :ipp_enrollment_complete, + timestamp:, + description: 'User visited the post office and completed IPP enrollment', + ) + + verified = tmx_status != 'review' && tmx_status != 'reject' + + if verified + current_idv_attempt.event << SignificantIdvEvent.new( + type: :verified, + timestamp:, + description: 'User is fully verified', + ) + end + end + + def handle_profile_encryption_error(event:) + caveats = [ + # TODO these need to check if GPO/IPP were still pending at time of the event + current_idv_attempt.gpo? ? 'The user will not be able to enter a GPO code' : nil, + current_idv_attempt.ipp? ? 'the user will not be able to verify in-person' : nil, + ].compact + + add_significant_event( + type: :password_reset, + timestamp: Time.zone.parse(event['@timestamp']), + description: [ + 'The user reset their password and did not provide their personal key.', + caveats.length > 0 ? + "The user will not be able to #{caveats.join(' or ')}" : + nil, + ].compact.join(' '), + ) + end + + def handle_rate_limit_reached(event:) + limiters = { + 'idv_doc_auth' => 'Doc Auth', + } + + limiter_type = event.dig(*EVENT_PROPERTIES, 'limiter_type') + + limit_name = limiters[limiter_type] + + return if limit_name.blank? + + timestamp = Time.zone.parse(event['@timestamp']) + + for_current_idv_attempt(event:) do + add_significant_event( + type: :rate_limited, + timestamp:, + description: "Rate limited for #{limit_name}", + ) + end + end + + def handle_verify_proofing_results_event(event:) + timestamp = Time.zone.parse(event['@timestamp']) + success = event.dig(*EVENT_PROPERTIES, 'success') + + if success + # We only really care about passing identity resolution if the + # user previously failed in this attempt + + prior_failures = current_idv_attempt.significant_events.count do |e| + e[:type] == :failed_identity_resolution + end + + if prior_failures > 0 + # TODO: What changed that made them be able to pass? + + add_significant_event( + timestamp:, + type: :passed_identity_resolution, + description: "User passed identity resolution after #{prior_failures + 1} tries", + ) + end + + return + end + + # Failing identity resolution is where it gets interesting + + prev_count = current_idv_attempt.significant_events.count + + add_events_for_failed_vendor_result( + event.dig( + *EVENT_PROPERTIES, 'proofing_results', 'context', 'stages', 'resolution' + ), + timestamp:, + ) + + add_events_for_failed_vendor_result( + event.dig( + *EVENT_PROPERTIES, 'proofing_results', 'context', 'stages', 'residential_address' + ), + timestamp:, + ) + + add_events_for_failed_vendor_result( + event.dig( + *EVENT_PROPERTIES, 'proofing_results', 'context', 'stages', 'state_id' + ), + timestamp:, + ) + + any_events_added = current_idv_attempt.significant_events.count > prev_count + + if !any_events_added + add_significant_event( + timestamp:, + type: :failed_identity_resolution, + description: 'User failed identity resolution (check logs for reason)', + ) + + end + end + + def add_events_for_failed_vendor_result(result, timestamp:) + return if result['success'] + + vendor = VENDORS[result['vendor_name']] || UNKNOWN_VENDOR + evaluator = vendor[:evaluator_module] + + if !evaluator.present? + add_significant_event( + type: :"#{vendor[:id]}_request_failed", + timestamp:, + description: "Request to #{vendor[:name]} failed.", + ) + return + end + + evaluation = evaluator.evaluate_result(result) + add_significant_event(**evaluation, timestamp:) if evaluation + end + + # @return {IdvAttempt,nil} The previous IdvAttempt (if any) + def start_new_idv_attempt(event:) + finish_current_idv_attempt if current_idv_attempt + + @current_idv_attempt = IdvAttempt.new( + started_at: Time.zone.parse(event['@timestamp']), + ) + end + + def summarize_idv_attempt(attempt) + type = :idv + title = 'Identity verification started' + attributes = attempt.significant_events.map do |e| + { + type: e.type, + description: e.description, + } + end + + if attempt.successful? + title = 'Identity verified' + end + + { + started_at: attempt.started_at, + title:, + type:, + attributes:, + } + end + end +end diff --git a/lib/event_summarizer/vendor_result_evaluators/aamva.rb b/lib/event_summarizer/vendor_result_evaluators/aamva.rb new file mode 100644 index 00000000000..2c1ac9ffd2f --- /dev/null +++ b/lib/event_summarizer/vendor_result_evaluators/aamva.rb @@ -0,0 +1,129 @@ +# frozen_string_literal: true + +module EventSummarizer + module VendorResultEvaluators + module Aamva + ID_TYPES = { + 'state_id_card' => 'non-driving ID card', + 'drivers_license' => 'drivers\' license', + }.freeze + + # TODO: Load these from the AAMVA proofer or put them somewhere common + + REQUIRED_VERIFICATION_ATTRIBUTES = %i[ + state_id_number + dob + last_name + first_name + ].freeze + + REQUIRED_IF_PRESENT_ATTRIBUTES = [:state_id_expiration].freeze + + # @param result {Hash} The result structure logged to Cloudwatch + # @return [Hash] A Hash with a type, timestamp, and description key. + def self.evaluate_result(result) + if result['success'] + return { + type: :aamva_success, + description: 'AAMVA call succeeded', + } + end + + if result['timed_out'] + return { + type: :aamva_timed_out, + description: 'AAMVA request timed out.', + } + end + + if result['mva_exception'] + state = result['state_id_jurisdiction'] + return { + type: :aamva_mva_exception, + # rubocop:disable Layout/LineLength + description: "AAMVA request failed because the MVA in #{state} failed to return a response.", + # rubocop:enable Layout/LineLength + } + end + + if result['exception'] + + description = 'AAMVA request resulted in an exception' + + m = /ExceptionText: (.+?),/.match(result['exception']) + if m.present? + description = "#{description} (#{m[1]})" + end + + return { + type: :aamva_exception, + description:, + } + end + + # The API call failed because of actual errors in the user's data. + # Try to come up with an explanation + + explanation = explain_errors(result) || 'Check logs for more info.' + + return { + type: :aamva_error, + description: "AAMVA request failed. #{explanation}", + } + end + + def self.explain_errors(result) + # The values in the errors object are arrays + attributes = {} + result['errors'].each do |key, values| + attributes[key] = values.first + end + + id_type = ID_TYPES[result['state_id_type']] || 'id card' + state = result['state_id_jurisdiction'] + + if mva_says_invalid_id_number?(attributes) + # rubocop:disable Layout/LineLength + return "The ID # from the user's #{id_type} was invalid according to the state of #{state}" + # rubocop:enable Layout/LineLength + end + + failed_attributes = relevant_failed_attributes(attributes) + + if !failed_attributes.empty? + plural = failed_attributes.length == 1 ? '' : 's' + + # rubocop:disable Layout/LineLength + "#{failed_attributes.length} attribute#{plural} failed to validate: #{failed_attributes.join(', ')}" + # rubocop:enable Layout/LineLength + end + end + + def self.mva_says_invalid_id_number?(attributes) + # When all attributes are marked "MISSING", except ID number, + # which is marked "UNVERIFIED", that indicates the MVA could not + # find the ID number to compare PII + + missing_count = attributes.count do |_attr, status| + status == 'MISSING' + end + + attributes['state_id_number'] == 'UNVERIFIED' && missing_count == attributes.count - 1 + end + + def self.relevant_failed_attributes(attributes) + failed_attributes = Set.new + + REQUIRED_VERIFICATION_ATTRIBUTES.each do |attr| + failed_attributes << attr if attributes[attr] != 'VERIFIED' + end + + REQUIRED_IF_PRESENT_ATTRIBUTES.each do |attr| + failed_attributes << attr if attributes[attr].present? && attributes[attr] != 'VERIFIED' + end + + failed_attributes + end + end + end +end diff --git a/lib/event_summarizer/vendor_result_evaluators/instant_verify.rb b/lib/event_summarizer/vendor_result_evaluators/instant_verify.rb new file mode 100644 index 00000000000..81dc58ba54f --- /dev/null +++ b/lib/event_summarizer/vendor_result_evaluators/instant_verify.rb @@ -0,0 +1,83 @@ +# frozen_string_literal: true + +module EventSummarizer + module VendorResultEvaluators + module InstantVerify + # @param result {Hash} The result structure logged to Cloudwatch + # @return [Hash] A Hash with a type and description keys. + def self.evaluate_result(result) + if result['success'] + return { + type: :instant_verify_success, + description: 'Instant Verify call succeeded', + } + end + + if result['timed_out'] + return { + type: :instant_verify_timed_out, + description: 'Instant Verify request timed out.', + } + end + + if result['exception'] + return { + type: :instant_verify_exception, + description: 'Instant Verify request resulted in an exception', + } + end + + # The API call failed because of actual errors in the user's data. + # Try to come up with an explanation + + explanation = explain_errors(result) || 'Check logs for more info.' + + return { + type: :instant_verify_error, + description: "Instant Verify request failed. #{explanation}", + } + end + + # Attempts to render a legible explanation of what went wrong in a + # LexisNexis Instant Verify request. + # @param result {Hash} The result structure logged to Cloudwatch + # @return {String} + def self.explain_errors(result) + # (The structure of the 'errors' key for Instant Verify is kind of weird) + + failed_items = [] + + result.dig('errors', 'InstantVerify')&.each do |iv_instance| + next if iv_instance['ProductStatus'] != 'fail' + iv_instance['Items'].each do |item| + if item['ItemStatus'] == 'fail' + failed_items << item + end + end + end + + if failed_items.empty? + return 'Check the full logs for more info.' + end + + checks = failed_items.map do |item| + name = item['ItemName'] + reason = item['ItemReason'] + reason_code = reason ? reason['Code'] : nil + + if reason_code + # TODO: Translate these reason codes to plain language + # TODO: Add suggestions for how the user could remedy + "#{name} (#{reason_code})" + else + name + end + end + + plural = checks.length == 1 ? '' : 's' + + "#{checks.length} check#{plural} failed: #{checks.join(", ")}" + end + end + end +end