Skip to content

Commit

Permalink
Merge pull request #140 from doctolib/TT-23249_increment_lock_timeout
Browse files Browse the repository at this point in the history
Tt 23249 increment lock_timeout gradually to reduce failing migrations
  • Loading branch information
CharlesDelannoy authored Apr 24, 2024
2 parents be9e6f8 + 43f730a commit 2fc2624
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 36 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,10 @@ SafePgMigrations.config.safe_timeout = 5.seconds # Statement timeout used for al

SafePgMigrations.config.lock_timeout = nil # Lock timeout used for all DDL operations except from CREATE / DROP INDEX. If not set, safe_timeout will be used with a deduction of 1% to ensure that the lock timeout is raised in priority

SafePgMigrations.config.increase_lock_timeout_on_retry # Activate the lock timeout increase feature on retry if set to true. See max_lock_timeout_for_retry for more information.

SafePgMigrations.config.max_lock_timeout_for_retry = 1.second # Max lock timeout for the retries for all DDL operations except from CREATE / DROP INDEX. Each retry will increase the lock_timeout (if increase_lock_timeout_on_retry option is set to true) by (max_lock_timeout_for_retry - lock_timeout) / max_tries

SafePgMigrations.config.blocking_activity_logger_verbose = true # Outputs the raw blocking queries on timeout. When false, outputs information about the lock instead

SafePgMigrations.config.sensitive_logger = nil # When given, sensitive data will be sent to this logger instead of the standard output. Must implement method `info`.
Expand Down
46 changes: 31 additions & 15 deletions lib/safe-pg-migrations/configuration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,50 +5,66 @@
module SafePgMigrations
class Configuration
attr_accessor(*%i[
backfill_batch_size
backfill_pause
blocking_activity_logger_margin
blocking_activity_logger_verbose
default_value_backfill_threshold
backfill_batch_size
backfill_pause
retry_delay
increase_lock_timeout_on_retry
max_tries
retry_delay
sensitive_logger
])
attr_reader :lock_timeout, :safe_timeout
attr_reader :lock_timeout, :safe_timeout, :max_lock_timeout_for_retry

def initialize
self.default_value_backfill_threshold = nil
self.safe_timeout = 5.seconds
self.lock_timeout = nil
self.blocking_activity_logger_margin = 1.second
self.blocking_activity_logger_verbose = true
self.backfill_batch_size = 100_000
self.backfill_pause = 0.5.second
self.retry_delay = 1.minute
self.blocking_activity_logger_margin = 1.second
self.blocking_activity_logger_verbose = true
self.default_value_backfill_threshold = nil
self.increase_lock_timeout_on_retry = false
self.lock_timeout = nil
self.max_lock_timeout_for_retry = 1.second
self.max_tries = 5
self.retry_delay = 1.minute
self.safe_timeout = 5.seconds
self.sensitive_logger = nil
end

def lock_timeout=(value)
raise 'Setting lock timeout to 0 disables the lock timeout and is dangerous' if value == 0.seconds

unless value.nil? || value < safe_timeout
raise ArgumentError, "Lock timeout (#{value}) cannot be greater than safe timeout (#{safe_timeout})"
unless value.nil? || (value < safe_timeout && value <= max_lock_timeout_for_retry)
raise ArgumentError, "Lock timeout (#{value}) cannot be greater than the safe timeout (#{safe_timeout}) or the
max lock timeout for retry (#{max_lock_timeout_for_retry})"
end

@lock_timeout = value
end

def safe_timeout=(value)
raise 'Setting safe timeout to 0 disables the safe timeout and is dangerous' unless value
unless value && value > 0.seconds
raise 'Setting safe timeout to 0 or nil disables the safe timeout and is dangerous'
end

unless lock_timeout.nil? || value > lock_timeout
raise ArgumentError, "Safe timeout (#{value}) cannot be less than lock timeout (#{lock_timeout})"
unless lock_timeout.nil? || (value > lock_timeout && value >= max_lock_timeout_for_retry)
raise ArgumentError, "Safe timeout (#{value}) cannot be lower than the lock timeout (#{lock_timeout}) or the
max lock timeout for retry (#{max_lock_timeout_for_retry})"
end

@safe_timeout = value
end

def max_lock_timeout_for_retry=(value)
unless lock_timeout.nil? || (value >= lock_timeout && value <= safe_timeout)
raise ArgumentError, "Max lock timeout for retry (#{value}) cannot be lower than the lock timeout
(#{lock_timeout}) and greater than the safe timeout (#{safe_timeout})"
end

@max_lock_timeout_for_retry = value
end

def pg_statement_timeout
pg_duration safe_timeout
end
Expand Down
34 changes: 30 additions & 4 deletions lib/safe-pg-migrations/plugins/statement_retrier.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,46 @@ module StatementRetrier
private

def retry_if_lock_timeout
remaining_tries = SafePgMigrations.config.max_tries
initial_lock_timeout = SafePgMigrations.config.lock_timeout
number_of_retries = 0
begin
remaining_tries -= 1
number_of_retries += 1
yield
rescue ActiveRecord::LockWaitTimeout
raise if transaction_open? # Retrying is useless if we're inside a transaction.
raise unless remaining_tries > 0
# Retrying is useless if we're inside a transaction.
if transaction_open? || number_of_retries >= SafePgMigrations.config.max_tries
SafePgMigrations.config.lock_timeout = initial_lock_timeout
raise
end

retry_delay = SafePgMigrations.config.retry_delay
Helpers::Logger.say "Retrying in #{retry_delay} seconds...", sub_item: true

if SafePgMigrations.config.increase_lock_timeout_on_retry && !SafePgMigrations.config.lock_timeout.nil?
increase_lock_timeout
end

sleep retry_delay
Helpers::Logger.say 'Retrying now.', sub_item: true
retry
end
end

def increase_lock_timeout
Helpers::Logger.say " Increasing the lock timeout... Currently set to #{SafePgMigrations.config.lock_timeout}",
sub_item: true
SafePgMigrations.config.lock_timeout = (SafePgMigrations.config.lock_timeout + lock_timeout_step)
unless SafePgMigrations.config.lock_timeout < SafePgMigrations.config.max_lock_timeout_for_retry
SafePgMigrations.config.lock_timeout = SafePgMigrations.config.max_lock_timeout_for_retry
end
Helpers::Logger.say " Lock timeout is now set to #{SafePgMigrations.config.lock_timeout}", sub_item: true
end

def lock_timeout_step
max_lock_timeout_for_retry = SafePgMigrations.config.max_lock_timeout_for_retry
lock_timeout = SafePgMigrations.config.lock_timeout
max_tries = SafePgMigrations.config.max_tries
@lock_timeout_step ||= (max_lock_timeout_for_retry - lock_timeout) / (max_tries - 1)
end
end
end
84 changes: 67 additions & 17 deletions test/statement_retrier_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,53 @@
require 'test_helper'

class StatementRetrierTest < Minitest::Test
def test_lock_timeout_increase_on_retry
SafePgMigrations.config.lock_timeout = 0.1.seconds
SafePgMigrations.config.increase_lock_timeout_on_retry = true

calls = calls_for_lock_timeout_migration

assert_equal [
' -> Retrying in 60 seconds...',
' -> Increasing the lock timeout... Currently set to 0.1',
' -> Lock timeout is now set to 0.325',
' -> Retrying now.',
' -> Retrying in 60 seconds...',
' -> Increasing the lock timeout... Currently set to 0.325',
' -> Lock timeout is now set to 0.55',
' -> Retrying now.',
' -> Retrying in 60 seconds...',
' -> Increasing the lock timeout... Currently set to 0.55',
' -> Lock timeout is now set to 0.775',
' -> Retrying now.',
' -> Retrying in 60 seconds...',
' -> Increasing the lock timeout... Currently set to 0.775',
' -> Lock timeout is now set to 1',
' -> Retrying now.',
], calls[1..].map(&:first)
end

def test_no_lock_timeout_increase_on_retry_if_disabled
SafePgMigrations.config.lock_timeout = 0.1.seconds
SafePgMigrations.config.increase_lock_timeout_on_retry = false

calls = calls_for_lock_timeout_migration

assert_equal [
' -> Retrying in 60 seconds...',
' -> Retrying now.',
' -> Retrying in 60 seconds...',
' -> Retrying now.',
' -> Retrying in 60 seconds...',
' -> Retrying now.',
' -> Retrying in 60 seconds...',
' -> Retrying now.',
], calls[1..].map(&:first)
end

def test_retry_if_lock_timeout
@migration =
Class.new(ActiveRecord::Migration::Current) do
def up
connection.send(:retry_if_lock_timeout) do
raise ActiveRecord::LockWaitTimeout, 'PG::LockNotAvailable: ERROR: canceling statement due to lock timeout'
end
end
end.new
calls = calls_for_lock_timeout_migration

@connection.expects(:sleep).times(4)
calls =
record_calls(@migration, :write) do
run_migration
flunk 'run_migration should raise'
rescue StandardError => e
assert_instance_of ActiveRecord::LockWaitTimeout, e.cause
assert_includes e.cause.message, 'canceling statement due to lock timeout'
end
assert_equal [
' -> Retrying in 60 seconds...',
' -> Retrying now.',
Expand All @@ -48,4 +76,26 @@ def test_statement_retry
' -> Retrying now.',
], calls[7..9]
end

private

def calls_for_lock_timeout_migration
@migration = Class.new(ActiveRecord::Migration::Current) do
def up
connection.send(:retry_if_lock_timeout) do
raise ActiveRecord::LockWaitTimeout, 'PG::LockNotAvailable: ERROR: canceling statement due to lock timeout'
end
end
end.new

@connection.expects(:sleep).times(4)

record_calls(@migration, :write) do
run_migration
flunk 'run_migration should raise'
rescue StandardError => e
assert_instance_of ActiveRecord::LockWaitTimeout, e.cause
assert_includes e.cause.message, 'canceling statement due to lock timeout'
end
end
end

0 comments on commit 2fc2624

Please sign in to comment.