Skip to content

Commit 9085c2a

Browse files
authored
30319 - Implemented SQL queries to populate backfill cutoff filing id (bcgov#3763)
* 30319 - Implemented SQL queries to populate backfill cutoff filing id * added id to verification queries * added comment * updated query to update businesses version table
1 parent 7236c23 commit 9085c2a

1 file changed

Lines changed: 157 additions & 0 deletions

File tree

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
-- Description:
2+
-- This script identifies the last historical filing before the 'lear_tombstone' filing for each business and sets
3+
-- backfill_cutoff_filing_id to that filing ID for businesses that have been ALREADY migrated.
4+
--
5+
-- Context:
6+
-- During tombstone migration, historical filings from COLIN are migrated first, followed by a 'lear_tombstone'
7+
-- marker filing. The backfill_cutoff_filing_id should point to the last historical filing before this marker
8+
-- to establish a clear boundary between migrated historical data and future LEAR-native filings.
9+
--
10+
-- What is exactly changing: Businesses that have been migrated (have a 'lear_tombstone' filing) but backfill_cutoff_filing_id is null
11+
12+
-- Update backfill_cutoff_filing_id for migrated businesses from null to the last historical filing <-- Main Query
13+
WITH last_historical_filings AS (
14+
SELECT
15+
b.id as business_id,
16+
f_tombstone.id as tombstone_filing_id,
17+
(
18+
SELECT MAX(f_inner.id)
19+
FROM filings f_inner
20+
WHERE f_inner.business_id = b.id
21+
AND f_inner.id < f_tombstone.id
22+
AND f_inner.filing_type != 'lear_tombstone'
23+
) as last_historical_filing_id
24+
FROM businesses b
25+
JOIN filings f_tombstone ON b.id = f_tombstone.business_id
26+
AND f_tombstone.filing_type = 'lear_tombstone'
27+
WHERE
28+
b.backfill_cutoff_filing_id IS NULL
29+
)
30+
UPDATE businesses
31+
SET backfill_cutoff_filing_id = lhf.last_historical_filing_id,
32+
last_modified = NOW()
33+
FROM last_historical_filings lhf
34+
WHERE businesses.id = lhf.business_id
35+
AND lhf.last_historical_filing_id IS NOT NULL;
36+
37+
-- Update businesses_version table for current version records <-- 2nd Main Query
38+
WITH last_historical_filings AS (
39+
SELECT
40+
b.id as business_id,
41+
f_tombstone.id as tombstone_filing_id,
42+
(
43+
SELECT MAX(f_inner.id)
44+
FROM filings f_inner
45+
WHERE f_inner.business_id = b.id
46+
AND f_inner.id < f_tombstone.id
47+
AND f_inner.filing_type != 'lear_tombstone'
48+
) as last_historical_filing_id
49+
FROM businesses b
50+
JOIN filings f_tombstone ON b.id = f_tombstone.business_id
51+
AND f_tombstone.filing_type = 'lear_tombstone'
52+
WHERE
53+
b.backfill_cutoff_filing_id IS NOT NULL
54+
)
55+
UPDATE businesses_version
56+
SET backfill_cutoff_filing_id = lhf.last_historical_filing_id,
57+
last_modified = NOW()
58+
FROM last_historical_filings lhf
59+
WHERE businesses_version.id = lhf.business_id
60+
AND businesses_version.end_transaction_id IS NULL
61+
AND lhf.last_historical_filing_id IS NOT NULL;
62+
63+
-- The following queries are for verification purposes only
64+
65+
-- Show businesses that will be affected
66+
SELECT
67+
b.id,
68+
b.identifier,
69+
b.legal_name,
70+
b.backfill_cutoff_filing_id as current_cutoff_id,
71+
f_tombstone.id as tombstone_filing_id,
72+
f_last.id as last_historical_filing_id,
73+
f_last.filing_type as last_filing_type,
74+
f_last.filing_date as last_filing_date
75+
FROM businesses b
76+
-- Find the lear_tombstone filing for this business
77+
JOIN filings f_tombstone ON b.id = f_tombstone.business_id
78+
AND f_tombstone.filing_type = 'lear_tombstone'
79+
-- Find the last filing before the tombstone (highest ID that's less than tombstone ID)
80+
JOIN filings f_last ON b.id = f_last.business_id
81+
AND f_last.id = (
82+
SELECT MAX(f_inner.id)
83+
FROM filings f_inner
84+
WHERE f_inner.business_id = b.id
85+
AND f_inner.id < f_tombstone.id
86+
AND f_inner.filing_type != 'lear_tombstone'
87+
)
88+
WHERE
89+
-- Only businesses that need backfilling
90+
b.backfill_cutoff_filing_id IS NULL
91+
-- Only migrated businesses (those with lear_tombstone)
92+
AND EXISTS (
93+
SELECT 1 FROM filings f
94+
WHERE f.business_id = b.id
95+
AND f.filing_type = 'lear_tombstone'
96+
)
97+
ORDER BY b.identifier;
98+
99+
-- Count of businesses to be updated
100+
SELECT COUNT(*) as businesses_to_update
101+
FROM businesses b
102+
WHERE b.backfill_cutoff_filing_id IS NULL
103+
AND EXISTS (
104+
SELECT 1 FROM filings f
105+
WHERE f.business_id = b.id
106+
AND f.filing_type = 'lear_tombstone'
107+
);
108+
109+
-- Check results after updating
110+
SELECT
111+
COUNT(*) as updated_businesses,
112+
COUNT(CASE WHEN b.backfill_cutoff_filing_id IS NOT NULL THEN 1 END) as businesses_with_cutoff_id,
113+
COUNT(CASE WHEN b.backfill_cutoff_filing_id IS NULL THEN 1 END) as businesses_without_cutoff_id
114+
FROM businesses b
115+
WHERE EXISTS (
116+
SELECT 1 FROM filings f
117+
WHERE f.business_id = b.id
118+
AND f.filing_type = 'lear_tombstone'
119+
);
120+
121+
-- Show 10 updated records
122+
SELECT
123+
b.id,
124+
b.identifier,
125+
b.legal_name,
126+
b.backfill_cutoff_filing_id,
127+
f_cutoff.filing_type as cutoff_filing_type,
128+
f_cutoff.filing_date as cutoff_filing_date,
129+
f_tombstone.id as tombstone_filing_id
130+
FROM businesses b
131+
LEFT JOIN filings f_cutoff ON b.backfill_cutoff_filing_id = f_cutoff.id
132+
JOIN filings f_tombstone ON b.id = f_tombstone.business_id
133+
AND f_tombstone.filing_type = 'lear_tombstone'
134+
WHERE b.backfill_cutoff_filing_id IS NOT NULL
135+
ORDER BY b.identifier
136+
LIMIT 10;
137+
138+
-- Edge case check: Businesses with lear_tombstone but no historical filings
139+
-- These should have backfill_cutoff_filing_id remain NULL
140+
-- This doesn't happen, but just in case
141+
SELECT
142+
b.id,
143+
b.identifier,
144+
b.legal_name,
145+
b.backfill_cutoff_filing_id,
146+
COUNT(f.id) as total_filings,
147+
COUNT(CASE WHEN f.filing_type = 'lear_tombstone' THEN 1 END) as tombstone_filings
148+
FROM businesses b
149+
LEFT JOIN filings f ON b.id = f.business_id
150+
WHERE EXISTS (
151+
SELECT 1 FROM filings f_tomb
152+
WHERE f_tomb.business_id = b.id
153+
AND f_tomb.filing_type = 'lear_tombstone'
154+
)
155+
GROUP BY b.id, b.identifier, b.legal_name, b.backfill_cutoff_filing_id
156+
HAVING COUNT(f.id) = COUNT(CASE WHEN f.filing_type = 'lear_tombstone' THEN 1 END)
157+
ORDER BY b.identifier;

0 commit comments

Comments
 (0)