|
| 1 | +-- Description: |
| 2 | +-- This script identifies the last historical filing before the 'lear_tombstone' filing for each business and sets |
| 3 | +-- backfill_cutoff_filing_id to that filing ID for businesses that have been ALREADY migrated. |
| 4 | +-- |
| 5 | +-- Context: |
| 6 | +-- During tombstone migration, historical filings from COLIN are migrated first, followed by a 'lear_tombstone' |
| 7 | +-- marker filing. The backfill_cutoff_filing_id should point to the last historical filing before this marker |
| 8 | +-- to establish a clear boundary between migrated historical data and future LEAR-native filings. |
| 9 | +-- |
| 10 | +-- What is exactly changing: Businesses that have been migrated (have a 'lear_tombstone' filing) but backfill_cutoff_filing_id is null |
| 11 | + |
| 12 | +-- Update backfill_cutoff_filing_id for migrated businesses from null to the last historical filing <-- Main Query |
| 13 | +WITH last_historical_filings AS ( |
| 14 | + SELECT |
| 15 | + b.id as business_id, |
| 16 | + f_tombstone.id as tombstone_filing_id, |
| 17 | + ( |
| 18 | + SELECT MAX(f_inner.id) |
| 19 | + FROM filings f_inner |
| 20 | + WHERE f_inner.business_id = b.id |
| 21 | + AND f_inner.id < f_tombstone.id |
| 22 | + AND f_inner.filing_type != 'lear_tombstone' |
| 23 | + ) as last_historical_filing_id |
| 24 | + FROM businesses b |
| 25 | + JOIN filings f_tombstone ON b.id = f_tombstone.business_id |
| 26 | + AND f_tombstone.filing_type = 'lear_tombstone' |
| 27 | + WHERE |
| 28 | + b.backfill_cutoff_filing_id IS NULL |
| 29 | +) |
| 30 | +UPDATE businesses |
| 31 | +SET backfill_cutoff_filing_id = lhf.last_historical_filing_id, |
| 32 | + last_modified = NOW() |
| 33 | +FROM last_historical_filings lhf |
| 34 | +WHERE businesses.id = lhf.business_id |
| 35 | + AND lhf.last_historical_filing_id IS NOT NULL; |
| 36 | + |
| 37 | +-- Update businesses_version table for current version records <-- 2nd Main Query |
| 38 | +WITH last_historical_filings AS ( |
| 39 | + SELECT |
| 40 | + b.id as business_id, |
| 41 | + f_tombstone.id as tombstone_filing_id, |
| 42 | + ( |
| 43 | + SELECT MAX(f_inner.id) |
| 44 | + FROM filings f_inner |
| 45 | + WHERE f_inner.business_id = b.id |
| 46 | + AND f_inner.id < f_tombstone.id |
| 47 | + AND f_inner.filing_type != 'lear_tombstone' |
| 48 | + ) as last_historical_filing_id |
| 49 | + FROM businesses b |
| 50 | + JOIN filings f_tombstone ON b.id = f_tombstone.business_id |
| 51 | + AND f_tombstone.filing_type = 'lear_tombstone' |
| 52 | + WHERE |
| 53 | + b.backfill_cutoff_filing_id IS NOT NULL |
| 54 | +) |
| 55 | +UPDATE businesses_version |
| 56 | +SET backfill_cutoff_filing_id = lhf.last_historical_filing_id, |
| 57 | + last_modified = NOW() |
| 58 | +FROM last_historical_filings lhf |
| 59 | +WHERE businesses_version.id = lhf.business_id |
| 60 | + AND businesses_version.end_transaction_id IS NULL |
| 61 | + AND lhf.last_historical_filing_id IS NOT NULL; |
| 62 | + |
| 63 | +-- The following queries are for verification purposes only |
| 64 | + |
| 65 | +-- Show businesses that will be affected |
| 66 | +SELECT |
| 67 | + b.id, |
| 68 | + b.identifier, |
| 69 | + b.legal_name, |
| 70 | + b.backfill_cutoff_filing_id as current_cutoff_id, |
| 71 | + f_tombstone.id as tombstone_filing_id, |
| 72 | + f_last.id as last_historical_filing_id, |
| 73 | + f_last.filing_type as last_filing_type, |
| 74 | + f_last.filing_date as last_filing_date |
| 75 | +FROM businesses b |
| 76 | + -- Find the lear_tombstone filing for this business |
| 77 | + JOIN filings f_tombstone ON b.id = f_tombstone.business_id |
| 78 | + AND f_tombstone.filing_type = 'lear_tombstone' |
| 79 | + -- Find the last filing before the tombstone (highest ID that's less than tombstone ID) |
| 80 | + JOIN filings f_last ON b.id = f_last.business_id |
| 81 | + AND f_last.id = ( |
| 82 | + SELECT MAX(f_inner.id) |
| 83 | + FROM filings f_inner |
| 84 | + WHERE f_inner.business_id = b.id |
| 85 | + AND f_inner.id < f_tombstone.id |
| 86 | + AND f_inner.filing_type != 'lear_tombstone' |
| 87 | + ) |
| 88 | +WHERE |
| 89 | + -- Only businesses that need backfilling |
| 90 | + b.backfill_cutoff_filing_id IS NULL |
| 91 | + -- Only migrated businesses (those with lear_tombstone) |
| 92 | + AND EXISTS ( |
| 93 | + SELECT 1 FROM filings f |
| 94 | + WHERE f.business_id = b.id |
| 95 | + AND f.filing_type = 'lear_tombstone' |
| 96 | + ) |
| 97 | +ORDER BY b.identifier; |
| 98 | + |
| 99 | +-- Count of businesses to be updated |
| 100 | +SELECT COUNT(*) as businesses_to_update |
| 101 | +FROM businesses b |
| 102 | +WHERE b.backfill_cutoff_filing_id IS NULL |
| 103 | + AND EXISTS ( |
| 104 | + SELECT 1 FROM filings f |
| 105 | + WHERE f.business_id = b.id |
| 106 | + AND f.filing_type = 'lear_tombstone' |
| 107 | + ); |
| 108 | + |
| 109 | +-- Check results after updating |
| 110 | +SELECT |
| 111 | + COUNT(*) as updated_businesses, |
| 112 | + COUNT(CASE WHEN b.backfill_cutoff_filing_id IS NOT NULL THEN 1 END) as businesses_with_cutoff_id, |
| 113 | + COUNT(CASE WHEN b.backfill_cutoff_filing_id IS NULL THEN 1 END) as businesses_without_cutoff_id |
| 114 | +FROM businesses b |
| 115 | +WHERE EXISTS ( |
| 116 | + SELECT 1 FROM filings f |
| 117 | + WHERE f.business_id = b.id |
| 118 | + AND f.filing_type = 'lear_tombstone' |
| 119 | +); |
| 120 | + |
| 121 | +-- Show 10 updated records |
| 122 | +SELECT |
| 123 | + b.id, |
| 124 | + b.identifier, |
| 125 | + b.legal_name, |
| 126 | + b.backfill_cutoff_filing_id, |
| 127 | + f_cutoff.filing_type as cutoff_filing_type, |
| 128 | + f_cutoff.filing_date as cutoff_filing_date, |
| 129 | + f_tombstone.id as tombstone_filing_id |
| 130 | +FROM businesses b |
| 131 | + LEFT JOIN filings f_cutoff ON b.backfill_cutoff_filing_id = f_cutoff.id |
| 132 | + JOIN filings f_tombstone ON b.id = f_tombstone.business_id |
| 133 | + AND f_tombstone.filing_type = 'lear_tombstone' |
| 134 | +WHERE b.backfill_cutoff_filing_id IS NOT NULL |
| 135 | +ORDER BY b.identifier |
| 136 | +LIMIT 10; |
| 137 | + |
| 138 | +-- Edge case check: Businesses with lear_tombstone but no historical filings |
| 139 | +-- These should have backfill_cutoff_filing_id remain NULL |
| 140 | +-- This doesn't happen, but just in case |
| 141 | +SELECT |
| 142 | + b.id, |
| 143 | + b.identifier, |
| 144 | + b.legal_name, |
| 145 | + b.backfill_cutoff_filing_id, |
| 146 | + COUNT(f.id) as total_filings, |
| 147 | + COUNT(CASE WHEN f.filing_type = 'lear_tombstone' THEN 1 END) as tombstone_filings |
| 148 | +FROM businesses b |
| 149 | + LEFT JOIN filings f ON b.id = f.business_id |
| 150 | +WHERE EXISTS ( |
| 151 | + SELECT 1 FROM filings f_tomb |
| 152 | + WHERE f_tomb.business_id = b.id |
| 153 | + AND f_tomb.filing_type = 'lear_tombstone' |
| 154 | +) |
| 155 | +GROUP BY b.id, b.identifier, b.legal_name, b.backfill_cutoff_filing_id |
| 156 | +HAVING COUNT(f.id) = COUNT(CASE WHEN f.filing_type = 'lear_tombstone' THEN 1 END) |
| 157 | +ORDER BY b.identifier; |
0 commit comments