-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathcovid_plot_vacs.py
656 lines (599 loc) · 34.4 KB
/
covid_plot_vacs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
import os
import re
import matplotlib.cm
import numpy as np
import pandas as pd
import utils_thai
from covid_plot_utils import plot_area
from covid_plot_utils import source
from utils_pandas import cum2daily
from utils_pandas import fix_gaps
from utils_pandas import get_cycle
from utils_pandas import import_csv
from utils_pandas import normalise_to_total
from utils_pandas import perc_format
from utils_pandas import pred_vac
from utils_pandas import rearrange
from utils_pandas import topprov
from utils_scraping import any_in
from utils_scraping import logger
from utils_thai import AREA_LEGEND_SIMPLE
from utils_thai import DISTRICT_RANGE_SIMPLE
from utils_thai import FIRST_AREAS
from utils_thai import get_provinces
from utils_thai import thaipop
from utils_thai import trend_table
def save_vacs_plots(df: pd.DataFrame) -> None:
####################
# Vaccines
####################
manuf = ["Sinovac", "AstraZeneca", "Sinopharm", "Pfizer", "Moderna"]
man_cols = pd.DataFrame()
for m in manuf:
man_cols[m] = df[[c for c in df.columns if f"Given {m}" in str(c)]].sum(axis=1)
man_cols = man_cols.replace(0.0, np.nan).interpolate().diff().replace(0.0, np.nan)
plot_area(df=man_cols,
title='Covid Vaccinations by Manufacturer - Thailand',
cols_subset=list(man_cols.columns),
png_prefix='vac_manuf',
periods_to_plot=["3", "all"],
ma_days=7,
kind='line', stacked=False, percent_fig=False,
footnote_left=f'{source}Data Source: DDC Daily Vaccination Reports')
def clean_vac_leg(label, first="1st Jab", second="2nd Jab"):
c = label
c = re.sub(r"(?:Vac )?(?:Group )?(.*) (?:1|Only 1)(?: Cum)?", fr"{first} - \1", c)
c = re.sub(r"(?:Vac )?(?:Group )?(.*) 2(?: Cum)?", fr"{second} - \1", c)
c = re.sub(r"(?:Vac )?(?:Group )?(.*) 3(?: Cum)?", r"3rd Booster/Other \1", c)
c = re.sub(r"(.*) (?:Only|Given)", r"\1", c)
c = c.replace(
'General Population', 'General Population (0-59)').replace(
'Risk: Location', 'General Population (0-59)').replace(
'Student', 'Students 12-17').replace(
'Medical All', 'Medical Staff & Volunteers').replace(
'Risk: Disease', 'Risk from 7 Diseases',).replace(
'Risk: Pregnant', 'Pregnant',
)
return c
groups = [c for c in df.columns if str(c).startswith('Vac Group')]
df_vac_groups = df['2021-02-28':][groups]
# Too many groups. Combine some for now
# for dose in range(1, 4):
# df_vac_groups[f"Vac Group Risk: Location {dose} Cum"] = df_vac_groups[
# f"Vac Group Risk: Location {dose} Cum"].add(df_vac_groups[f'Vac Group Risk: Pregnant {dose} Cum'],
# fill_value=0)
# df_vac_groups[f"Vac Group Medical Staff {dose} Cum"] = df_vac_groups[f"Vac Group Medical Staff {dose} Cum"].add(
# df_vac_groups[f'Vac Group Health Volunteer {dose} Cum'], fill_value=0)
# groups = [c for c in groups if "Pregnant" not in c and "Volunteer" not in c and " 3 " not in c]
df_vac_groups = df_vac_groups[groups]
# go backwards to get rid of "dips". ie take later value as correct. e.g. 2021-06-21
df_vac_groups = df_vac_groups.reindex(index=df_vac_groups.index[::-1])
df_vac_groups = df_vac_groups.cummin() # if later corrected down, take that number into past
df_vac_groups = df_vac_groups.reindex(index=df_vac_groups.index[::-1])
# We have some missing days so interpolate e.g. 2021-05-04
df_vac_groups = df_vac_groups.interpolate(method="time", limit_area="inside")
# TODO: should we use actual Given?
df_vac_groups['Vac Given Cum'] = df[[f'Vac Given {d} Cum' for d in range(1, 4)]].sum(axis=1, skipna=False)
df_vac_groups['Vac Given'] = df[[f'Vac Given {d}' for d in range(1, 4)]].sum(axis=1, skipna=False)
df_vac_groups['Vac Given 1 Cum'] = df['Vac Given 1 Cum']
df_vac_groups['Vac Given 2 Cum'] = df['Vac Given 2 Cum']
df_vac_groups['Vac Given 3 Cum'] = df['Vac Given 3 Cum']
# risk location is really general population and that now includes frontline and pregnant
gen_groups = ['Risk: Pregnant', 'Other Frontline Staff', 'Risk: Location']
for d in range(1, 4):
df_vac_groups[f'Vac Group General Population {d} Cum'] = df_vac_groups[[
f'Vac Group {g} {d} Cum' for g in gen_groups]].sum(axis=1, skipna=True, min_count=1)
df_vac_groups = df_vac_groups.drop(columns=[f'Vac Group {g} {d} Cum' for g in gen_groups])
groups = [c for c in df_vac_groups.columns if str(c).startswith('Vac Group')]
df_vac_groups['Vac Imported Cum'] = df_vac_groups[[
c for c in df_vac_groups.columns if "Vac Imported" in c]].sum(axis=1, skipna=False)
# now convert to daily and interpolate and then normalise to real daily total.
vac_daily = cum2daily(df_vac_groups)
# bring in any daily figures we might have collected first
vac_daily = df[['Vac Given', 'Vac Given 1', 'Vac Given 2', 'Vac Given 3']].combine_first(vac_daily)
daily_cols = [c for c in vac_daily.columns if c.startswith(
'Vac Group') and ' 3' not in c] + ['Vac Given 3'] # Keep for unknown
# We have "Medical All" instead
daily_cols = [c for c in daily_cols if not any_in(c, "Medical Staff", "Volunteer")]
# interpolate to fill gaps and get some values for each group
vac_daily[daily_cols] = vac_daily[daily_cols].interpolate(method="time", limit_area="inside")
# now normalise the filled in days so they add to their real total
vac_daily = vac_daily.pipe(normalise_to_total, daily_cols, 'Vac Given')
# vac_daily['7d Runway Rate'] = (df['Vac Imported Cum'].fillna(method="ffill") - df_vac_groups['Vac Given Cum']) / 7
days_to_target = (pd.Timestamp('2022-01-01') - vac_daily.index.to_series()).dt.days
vac_daily['Target Rate 1'] = (50000000 - df_vac_groups['Vac Given 1 Cum']) / days_to_target
vac_daily['Target Rate 2'] = (50000000 * 2 - df_vac_groups['Vac Given 2 Cum']) / days_to_target
#daily_cols = rearrange(daily_cols, 2, 1, 4, 3, 10, 9, 8, 7, 6, 5)
daily_cols = [c for c in daily_cols if "2" in c] + [c for c in daily_cols if "1" in c] + [c for c in daily_cols if "3" in c]
plot_area(df=vac_daily,
title='Daily Covid Vaccinations by Priority Groups - Thailand',
legends=[
# 'Doses per day needed to run out in a week',
# 'Rate for 70% 1st Jab in 2021',
# 'Rate for 70% 2nd Jab in 2021'
] + [clean_vac_leg(c) for c in daily_cols], # bar puts the line first?
legend_cols=2,
png_prefix='vac_groups_daily', cols_subset=daily_cols,
between=[
# '7d Runway Rate',
# 'Target Rate 1',
# 'Target Rate 2'
],
periods_to_plot=["30d", "3"], # too slow to do all
ma_days=None,
kind='bar', stacked=True, percent_fig=False,
cmap=get_cycle('tab20', len(daily_cols) - 1, extras=["grey"], unpair=True),
footnote_left=f'{source}Data Source: DDC Daily Vaccination Reports')
# # Now turn daily back to cumulative since we now have estimates for every day without dips
# vac_cum = vac_daily.cumsum().combine_first(vac_daily[daily_cols].fillna(0).cumsum())
# vac_cum.columns = [f"{c} Cum" for c in vac_cum.columns]
# # Not sure why but we end up with large cumulative than originally so normalise
# for c in groups:
# vac_cum[c] = vac_cum[c] / vac_cum[groups].sum(axis=1) * df_vac_groups['Vac Given Cum']
vac_cum = df_vac_groups
# TODO: adjust allocated for double dose group
# second_dose = [c for c in groups if "2 Cum" in c]
# first_dose = [c for c in groups if "1 Cum" in c]
# vac_cum['Available Vaccines Cum'] = df['Vac Imported Cum'].fillna(method="ffill") - vac_cum[second_dose].sum(axis=1)
cols = []
# We want people vaccinated not total doses
for c in groups:
if "1" in c:
vac_cum[c.replace(" 1 Cum", " Only 1 Cum")] = vac_cum[c].sub(vac_cum[c.replace(" 1 Cum", " 2 Cum")])
cols.extend([c.replace(" 1 Cum", " 2 Cum"), c.replace(" 1 Cum", " Only 1 Cum")])
#cols_cum = rearrange(cols, 1, 2, 3, 4, 9, 10, 7, 8, )
#cols_cum = cols_cum # + ['Available Vaccines Cum']
cols_cum = [c for c in cols if " 2 Cum" in c] + [c for c in cols if " 1 Cum" in c]
# We have "Medical All" instead
cols_cum = [c for c in cols_cum if not any_in(c, "Medical Staff", "Volunteer")]
# TODO: get paired colour map and use do 5 + 5 pairs
legends = [clean_vac_leg(c) for c in cols_cum]
plot_area(df=vac_cum,
title='Population Vaccinated against Covid by Priority Groups - Thailand',
legends=legends,
png_prefix='vac_groups', cols_subset=cols_cum,
ma_days=None,
kind='area', stacked=True, percent_fig=True,
cmap=get_cycle('tab20', len(cols_cum), unpair=True),
# between=['Available Vaccines Cum'],
y_formatter=thaipop,
footnote_left=f'{source}Data Source: DDC Daily Vaccination Reports')
# Targets for groups
# https://www.facebook.com/informationcovid19/photos/a.106455480972785/342985323986465/
# 712,000 for medical staff
# 1,900,000 for frontline staffs
# 1,000,000 for village health volunteer
# 5,350,000 for risk: disease
# 12,500,000 for risk: over 60
# 28,538,000 for general population
# medical staff 712,000
# village health volunteers 1,000,000
# frontline workers 1,900,000
# underlying diseases 6,347,125
# general public 28,634,733 - 46,169,508
# elderly over 60 10,906,142
# pregnant 500,000
# Students 12-17 4,500,000
# Target total 50,000,000
# Total was 73,833,176?
goals = [
('Medical All', 1000000 + 712000),
# ('Health Volunteer', 1000000),
# ('Medical Staff', 712000),
# ('Other Frontline Staff', 1900000),
['Over 60', 12704543], # Was 10906142
('Risk: Disease', 6347125),
('General Population', 41621025), # was 48569508
# ('Risk: Pregnant', 500000),
('Student', 4500000),
('Kids', 5150082),
]
for d in [3, 2, 1]:
for group, goal in goals:
vac_cum[f'Vac Group {group} {d} Cum % ({goal/1000000:.1f}M)'] = vac_cum[
f'Vac Group {group} {d} Cum'] / goal * 100
dose1 = vac_cum[[f'Vac Group {group} 1 Cum % ({goal/1000000:.1f}M)' for group, goal in goals]]
dose2 = vac_cum[[f'Vac Group {group} 2 Cum % ({goal/1000000:.1f}M)' for group, goal in goals]]
dose3 = vac_cum[[f'Vac Group {group} 3 Cum % ({goal/1000000:.1f}M)' for group, goal in goals]]
pred1, pred2 = pred_vac(dose1, dose2, lag=40)
_, pred3 = pred_vac(dose2, dose3, lag=150)
pred1 = pred1.clip(upper=pred1.iloc[0].clip(100), axis=1) # no more than 100% unless already over
pred2 = pred2.clip(upper=pred2.iloc[0].clip(100), axis=1) # no more than 100% unless already over
pred3 = pred3.clip(upper=pred2.iloc[0].clip(100), axis=1) # no more than 100% unless already over
# vac_cum = vac_cum.combine_first(pred1).combine_first(pred2).combine_first(pred3)
cols2 = [c for c in vac_cum.columns if " 2 Cum %" in c and "Vac Group " in c and "Pred" not in c]
legends = [clean_vac_leg(c) for c in cols2]
plot_area(df=vac_cum.combine_first(pred2),
title='Vaccination by group - 2nd Dose - Thailand',
legends=legends,
png_prefix='vac_groups_goals_full', cols_subset=cols2,
kind='line',
actuals=list(pred2.columns),
ma_days=None,
stacked=False, percent_fig=False,
y_formatter=perc_format,
cmap=get_cycle('tab20', len(cols2) * 2, unpair=True, start=len(cols2)),
footnote_left=f'{source}Data Source: DDC Daily Vaccination Reports',
footnote='Assumes avg 40day gap between doses')
cols2 = [c for c in vac_cum.columns if " 1 Cum %" in c and "Vac Group " in c and "Pred" not in c]
# actuals = [c for c in vac_cum.columns if " 1 Pred" in c]
legends = [clean_vac_leg(c) for c in cols2]
plot_area(df=vac_cum.combine_first(pred1),
title='Vaccination by group - 1st Dose - Thailand',
legends=legends,
png_prefix='vac_groups_goals_half', cols_subset=cols2,
actuals=list(pred1.columns),
ma_days=None,
kind='line', stacked=False, percent_fig=False,
y_formatter=perc_format,
cmap=get_cycle('tab20', len(cols2) * 2, unpair=True, start=len(cols2)), # TODO: seems to be getting wrong colors
footnote_left=f'{source}Data Source: DDC Daily Vaccination Reports')
cols2 = [c for c in vac_cum.columns if " 3 Cum %" in c and "Vac Group " in c and "Pred" not in c]
legends = [clean_vac_leg(c) for c in cols2]
plot_area(df=vac_cum.combine_first(pred3),
title='Vaccination by group - 3rd Dose - Thailand',
legends=legends,
png_prefix='vac_groups_goals_3', cols_subset=cols2,
kind='line',
actuals=list(pred3.columns),
ma_days=None,
stacked=False, percent_fig=False,
y_formatter=perc_format,
cmap=get_cycle('tab20', len(cols2) * 2, unpair=True, start=len(cols2)),
footnote_left=f'{source}Data Source: DDC Daily Vaccination Reports',
footnote='Assumes avg 150d to booster')
cols = rearrange([f'Vac Given Area {area} Cum' for area in DISTRICT_RANGE_SIMPLE], *FIRST_AREAS)
df_vac_areas_s1 = df['2021-02-28':][cols].interpolate(limit_area="inside")
plot_area(df=df_vac_areas_s1,
title='Covid Vaccination Doses by Health District - Thailand',
legends=AREA_LEGEND_SIMPLE,
png_prefix='vac_areas', cols_subset=cols,
ma_days=None,
kind='area', stacked=True, percent_fig=False,
cmap='tab20',
footnote_left=f'{source}Data Sources: MOPH Covid-19 Dashboard\n DDC Daily Vaccination Reports')
# Do a % of peak chart for death vs cases
cols = ['Cases', 'Deaths', 'ATK', ]
peaks = df[cols] / df[cols].rolling(7, 3, center=True).mean().max(axis=0) * 100
peaks["Vaccinated"] = df['Vac Given 2 Cum'] / 72034815.0 * 100 # pops['Vac Population'].sum() * 100 # pops.sum() is
# pops['Vac Population'].sum() * 100 # pops.sum() is 72034815.0
peaks["Boosted"] = df['Vac Given 3 Cum'] / 72034815.0 * 100
peaks["Positive Rate"] = (df["Pos XLS"] / df["Tests XLS"] * 100)
cols = [
'Cases',
# 'ATK',
'Vaccinated',
"Boosted",
'Deaths',
'Positive Rate',
]
legend = [
"Confirmed Cases (% of peak)",
# "Reg. ATK - Probable Case (% of peak)",
"Vaccinated - 2nd dose (% of Thai Pop.)",
"Vaccinated - 3rd dose (% of Thai Pop.)",
"Reported Covid Deaths (% of peak)",
"PCR +ve per PCR Test (Positive Rate)",
]
plot_area(df=peaks,
title='Covid 19 Trends - Thailand',
png_prefix='cases_peak', cols_subset=cols, legends=legend,
ma_days=7,
kind='line', stacked=False, percent_fig=False, clean_end=True,
cmap='tab10',
y_formatter=perc_format,
footnote_left=f'{source}Data Source: MOPH Covid-19 Dashboard, CCSA Daily Briefing',
footnote="% of peak (except vaccinated).\nVaccinated is % of population with 2 jabs.")
# kind of dodgy since ATK is subset of positives but we don't know total ATK
cols = [
'Cases',
'Tests XLS',
'ATK',
]
peaks = df[cols] / df.rolling(7).mean().max(axis=0) * 100
legends = [
'Cases from PCR Tests',
'PCR Tests',
'Home Isolation from Positive ATK Tests',
]
plot_area(df=peaks,
title='Tests as % of Peak - Thailand',
legends=legends,
png_prefix='tests_peak', cols_subset=cols,
ma_days=7,
kind='line', stacked=False, percent_fig=False, clean_end=True,
cmap='tab20_r',
y_formatter=perc_format,
footnote='ATK: Covid-19 Rapid Antigen Self Test Kit\n'
+ 'PCR: Polymerase Chain Reaction',
footnote_left=f'{source}Data Source: MOPH Covid-19 Dashboard, CCSA Daily Briefing')
def save_vacs_prov_plots(df, df_prov=None):
# Top 5 vaccine rollouts
vac = import_csv("vaccinations", ['Date', 'Province'])
vac = vac.groupby("Province", group_keys=False).apply(fix_gaps)
# vac = vac.groupby("Province", group_keys=False)
if df_prov is None:
df_prov = import_csv("cases_by_province", ['Date', 'Province'])
# df_prov = df_prov.groupby("Province", group_keys=False)
vac_df_prov = df_prov[[c for c in df_prov.columns if "Vac" in c]]
vac_df_prov = vac_df_prov.groupby("Province", group_keys=False).apply(fix_gaps)
vac = vac.combine_first(vac_df_prov)
# Let's trust the dashboard more but they could both be different
# TODO: dash gives different higher values. Also glitches cause problems
# vac = dash_prov.combine_first(vac)
#vac = vac.combine_first(vac_dash[[f"Vac Given {d} Cum" for d in range(1, 4)]])
# Add them all up
vac = vac.combine_first(vac[[f"Vac Given {d} Cum" for d in range(1, 4)]].sum(
axis=1, skipna=False).to_frame("Vac Given Cum"))
vac = vac.join(get_provinces()[['Population', 'region']], on='Province')
# Reset populations to the latest since they changed definitions over time
# Bring in vac populations
pops = vac["Vac Population"].groupby("Province").last().to_frame("Vac Population") # It's not on all data
pops2 = vac[["Vac Population", 'region']].groupby("Province").last()
# vac = vac.join(pops, rsuffix="2")
for pop_col in ["Vac Population Risk: Disease", 'Vac Population Over 60s', 'Vac Population']:
vac = vac.join(vac[pop_col].groupby("Province").last().to_frame(pop_col), lsuffix="1")
vac["Vac Population2"] = vac["Vac Population"]
# top5 = vac.pipe(topprov, lambda df: df['Vac Given Cum'] / df['Vac Population2'] * 100)
# cols = top5.columns.to_list()
# pred = pred_vac(top5)
# plot_area(df=top5,
# title='Covid Vaccination Doses - Top Provinces - Thailand',
# png_prefix='vac_top5_doses', cols_subset=cols,
# ma_days=None,
# kind='line', stacked=False, percent_fig=False,
# cmap='tab10',
# actuals=pred,
# y_formatter=perc_format,
# footnote_left=f'{source}Data Sources: MOPH Covid-19 Dashboard\n DDC Daily Vaccination Reports')
# Get rid of dates where we don't have complete data
by_region = vac.reset_index("Province")[vac.groupby("Date").count()['Vac Population2'] == 77].reset_index()
pop_region = by_region.pivot_table('Vac Population2', 'Date', 'region', "sum").replace(0, np.nan)
by_region_1 = by_region.pivot_table('Vac Given 1 Cum', 'Date', 'region', "sum").replace(0, np.nan) / pop_region * 100
by_region_2 = by_region.pivot_table('Vac Given 2 Cum', 'Date', 'region', "sum").replace(0, np.nan) / pop_region * 100
by_region_3 = by_region.pivot_table('Vac Given 3 Cum', 'Date', 'region', "sum").replace(0, np.nan) / pop_region * 100
# if we miss some provinces we get dips
by_region_1 = by_region_1.cummax()
by_region_2 = by_region_2.cummax()
by_region_3 = by_region_3.cummax()
pred_1, pred_2 = pred_vac(by_region_1, by_region_2)
pred_2 = pred_2.clip(upper=pred_2.iloc[0].clip(90), axis=1) # no more than 100% unless already over
pred_1 = pred_1.clip(upper=pred_1.iloc[0].clip(90), axis=1) # no more than 100% unless already over
plot_area(df=by_region_2.combine_first(pred_2),
title='Vacccinated - 2nd Dose - by Region - Thailand',
png_prefix='vac_region_2', cols_subset=utils_thai.REG_COLS, legends=utils_thai.REG_LEG,
ma_days=7,
kind='line', stacked=False, percent_fig=False, mini_map=True,
cmap=utils_thai.REG_COLOURS,
actuals=list(pred_2.columns),
table=trend_table(vac['Vac Given 2 Cum'] / vac['Vac Population2'] * 100, sensitivity=30, style="rank_up"),
y_formatter=perc_format,
footnote='Table of % vaccinated and 7 day trend in change in rank',
footnote_left=f'{source}Data Sources: DDC Daily Vaccination Reports',
)
plot_area(df=by_region_1.combine_first(pred_1),
title='Vacccinatated - 1st Dose - by Region - Thailand',
png_prefix='vac_region_1', cols_subset=utils_thai.REG_COLS, legends=utils_thai.REG_LEG,
ma_days=7,
kind='line', stacked=False, percent_fig=False, mini_map=True,
cmap=utils_thai.REG_COLOURS,
actuals=list(pred_1.columns),
table=trend_table(vac['Vac Given 1 Cum'] / vac['Vac Population2'] * 100, sensitivity=30, style="rank_up"),
y_formatter=perc_format,
footnote='Table of % vaccinated and 7 day trend in change in rank',
footnote_left=f'{source}Data Sources: DDC Daily Vaccination Reports',
)
pred_2, pred_3 = pred_vac(by_region_2, by_region_3, ahead=90, lag=150)
pred_3 = pred_3.clip(upper=pred_2.iloc[0].clip(90), axis=1) # no more than 100% unless already over
plot_area(df=by_region_3.combine_first(pred_3),
title='Vacccinated - 3rd Dose - by Region - Thailand',
png_prefix='vac_region_3', cols_subset=utils_thai.REG_COLS, legends=utils_thai.REG_LEG,
ma_days=7,
kind='line', stacked=False, percent_fig=False, mini_map=True,
cmap=utils_thai.REG_COLOURS,
actuals=list(pred_3.columns),
table=trend_table(vac['Vac Given 3 Cum'] / vac['Vac Population2'] * 100, sensitivity=30, style="rank_up"),
y_formatter=perc_format,
footnote='Assumes 5 month booster avg. Table shows rank change',
footnote_left=f'{source}Data Sources: DDC Daily Vaccination Reports',
)
# for over 60s
pop_region = by_region.pivot_table("Vac Population Over 60s", 'Date', 'region', "sum").replace(0, np.nan)
by_region_1 = by_region.pivot_table('Vac Group Over 60 1 Cum', 'Date', 'region',
"sum").replace(0, np.nan) / pop_region * 100
by_region_2 = by_region.pivot_table('Vac Group Over 60 2 Cum', 'Date', 'region',
"sum").replace(0, np.nan) / pop_region * 100
by_region_3 = by_region.pivot_table('Vac Group Over 60 3 Cum', 'Date', 'region',
"sum").replace(0, np.nan) / pop_region * 100
pred_1, pred_2 = pred_vac(by_region_1, by_region_2)
pred_2 = pred_2.clip(upper=pred_2.iloc[0].clip(100), axis=1) # no more than 100% unless already over
pred_1 = pred_1.clip(upper=pred_1.iloc[0].clip(100), axis=1) # no more than 100% unless already over
plot_area(df=by_region_2.combine_first(pred_2),
title='Vacccinated Over 60s - 2nd Dose - by Region - Thailand',
png_prefix='vac_region_60s_2', cols_subset=utils_thai.REG_COLS, legends=utils_thai.REG_LEG,
ma_days=7,
kind='line', stacked=False, percent_fig=False, mini_map=True,
cmap=utils_thai.REG_COLOURS,
actuals=list(pred_2.columns),
table=trend_table(vac['Vac Group Over 60 2 Cum'] / vac["Vac Population Over 60s"]
* 100, sensitivity=30, style="rank_up"),
y_formatter=perc_format,
footnote='Table of % vaccinated and 7 day trend in change in rank',
footnote_left=f'{source}Data Sources: DDC Daily Vaccination Reports',
)
# for risk disease
pop_region = by_region.pivot_table("Vac Population Risk: Disease", 'Date', 'region', "sum").replace(0, np.nan)
by_region_1 = by_region.pivot_table('Vac Group Risk: Disease 1 Cum', 'Date', 'region',
"sum").replace(0, np.nan) / pop_region * 100
by_region_2 = by_region.pivot_table('Vac Group Risk: Disease 2 Cum', 'Date', 'region',
"sum").replace(0, np.nan) / pop_region * 100
by_region_3 = by_region.pivot_table('Vac Group Risk: Disease 3 Cum', 'Date', 'region',
"sum").replace(0, np.nan) / pop_region * 100
pred_1, pred_2 = pred_vac(by_region_1, by_region_2)
pred_2 = pred_2.clip(upper=pred_2.iloc[0].clip(100), axis=1) # no more than 100% unless already over
pred_1 = pred_1.clip(upper=pred_1.iloc[0].clip(100), axis=1) # no more than 100% unless already over
plot_area(df=by_region_2.combine_first(pred_2),
title='Vacccinated Risk of 7 Diseases - 2nd Dose - by Region - Thailand',
png_prefix='vac_region_disease_2', cols_subset=utils_thai.REG_COLS, legends=utils_thai.REG_LEG,
ma_days=7,
kind='line', stacked=False, percent_fig=False, mini_map=True,
cmap=utils_thai.REG_COLOURS,
actuals=list(pred_2.columns),
table=trend_table(vac['Vac Group Risk: Disease 2 Cum'] / vac["Vac Population Risk: Disease"]
* 100, sensitivity=30, style="rank_up"),
y_formatter=perc_format,
footnote='Table of % vaccinated and 7 day trend in change in rank',
footnote_left=f'{source}Data Sources: DDC Daily Vaccination Reports',
)
# We only have partial data
vac_prov_daily = cum2daily(vac)
# vac_prov_daily = vac_prov_daily.join(get_provinces()[['Population', 'region']], on='Province')
# vac_prov_daily = vac_prov_daily.join(pops, rsuffix="2")
# by_region = vac_prov_daily.reset_index()
by_region = vac_prov_daily.reset_index("Province")[vac_prov_daily.groupby("Date").count()[
'Vac Population'] == 77].reset_index()
pop_region = by_region.pivot_table("Vac Population", 'Date', 'region', "sum").replace(0, np.nan)
by_region_1 = by_region.pivot_table('Vac Given 1', 'Date', 'region', "sum").replace(0, np.nan)
by_region_2 = by_region.pivot_table('Vac Given 2', 'Date', 'region', "sum").replace(0, np.nan)
by_region_3 = by_region.pivot_table('Vac Given 3', 'Date', 'region', "sum").replace(0, np.nan)
plot_area(df=by_region_3 / pop_region * 100000,
title='Vacccinatations/100k - 3nd Dose - by Region - Thailand',
png_prefix='vac_region_daily_3', cols_subset=utils_thai.REG_COLS, legends=utils_thai.REG_LEG,
ma_days=21,
kind='line', stacked=False, percent_fig=False, mini_map=True,
cmap=utils_thai.REG_COLOURS,
# table = trend_table(vac_prov_daily['Vac Given 2'], sensitivity=10, style="green_up"),
footnote='Table of latest Vacciantions and 7 day trend per 100k',
footnote_left=f'{source}Data Sources: DDC Daily Vaccination Reports',
)
plot_area(df=by_region_2 / pop_region * 100000,
title='Vacccinatations/100k - 2nd Dose - by Region - Thailand',
png_prefix='vac_region_daily_2', cols_subset=utils_thai.REG_COLS, legends=utils_thai.REG_LEG,
ma_days=21,
kind='line', stacked=False, percent_fig=False, mini_map=True,
cmap=utils_thai.REG_COLOURS,
# table = trend_table(vac_prov_daily['Vac Given 2'], sensitivity=10, style="green_up"),
footnote='Table of latest Vacciantions and 7 day trend per 100k',
footnote_left=f'{source}Data Sources: DDC Daily Vaccination Reports',
)
plot_area(df=by_region_1 / pop_region * 100000,
title='Vacccinatations/100k - 1st Dose - by Region - Thailand',
png_prefix='vac_region_daily_1', cols_subset=utils_thai.REG_COLS, legends=utils_thai.REG_LEG,
ma_days=21,
kind='line', stacked=False, percent_fig=False, mini_map=True,
cmap=utils_thai.REG_COLOURS,
# table = trend_table(vac_prov_daily['Vac Given 1'], sensitivity=10, style="green_up"),
footnote='Table of latest Vacciantions and 7 day trend per 100k',
footnote_left=f'{source}Data Sources: DDC Daily Vaccination Reports',
)
# TODO: to make this work have to fix negative values
# plot_area(df=by_region,
# title='Covid Deaths - by Region - Thailand',
# png_prefix='vac_region_daily_stacked', cols_subset=utils_thai.REG_COLS, legends=utils_thai.REG_LEG,
# ma_days=14,
# kind='area', stacked=True, percent_fig=True,
# cmap=utils_thai.REG_COLOURS,
# footnote_left=f'{source}Data Source: MOPH Covid-19 Dashboard')
vac = vac.replace(0, np.nan)
top5 = vac.pipe(topprov, lambda df: df['Vac Given 1 Cum'] / df['Vac Population2'] * 100)
pred = pred_vac(top5)
pred = pred.clip(upper=pred.iloc[0].clip(100), axis=1) # no more than 100% unless already over
cols = top5.columns.to_list()
plot_area(df=top5.combine_first(pred),
title='Covid Vaccinations 1st Dose - Top Provinces - Thailand',
png_prefix='vac_top5_doses_1', cols_subset=cols,
ma_days=None,
actuals=list(pred.columns),
kind='line', stacked=False, percent_fig=False,
cmap='tab10',
y_formatter=perc_format,
footnote_left=f'{source}Data Sources: MOPH Covid-19 Dashboard\n DDC Daily Vaccination Reports',
footnote='Percentage include ages 0-18')
top5 = vac.pipe(topprov, lambda df: df['Vac Given 2 Cum'] / df['Vac Population2'] * 100)
# since top5 might be different need to recalculate
top5_dose1 = vac.pipe(
topprov,
lambda df: df['Vac Given 2 Cum'] / df['Vac Population2'] * 100,
lambda df: df['Vac Given 1 Cum'] / df['Vac Population2'] * 100,
)
_, pred = pred_vac(top5_dose1, top5)
pred = pred.clip(upper=pred.iloc[0].clip(100), axis=1) # no more than 100% unless already over
cols = top5.columns.to_list()
plot_area(df=top5.combine_first(pred),
title='Covid Vaccinations 2nd Dose - Top Provinces - Thailand',
png_prefix='vac_top5_doses_2', cols_subset=cols,
actuals=list(pred.columns),
ma_days=None,
kind='line', stacked=False, percent_fig=False,
cmap='tab10',
y_formatter=perc_format,
footnote_left=f'{source}Data Sources: MOPH Covid-19 Dashboard\n DDC Daily Vaccination Reports',
footnote="Percentage include ages 0-18")
top5 = vac.pipe(topprov, lambda df: df['Vac Given 3 Cum'] / df['Vac Population2'] * 100)
# since top5 might be different need to recalculate
top5_dose2 = vac.pipe(
topprov,
lambda df: df['Vac Given 3 Cum'] / df['Vac Population2'] * 100,
lambda df: df['Vac Given 2 Cum'] / df['Vac Population2'] * 100,
)
_, pred = pred_vac(top5_dose2, top5)
pred = pred.clip(upper=pred.iloc[0].clip(100), axis=1) # no more than 100% unless already over
cols = top5.columns.to_list()
plot_area(df=top5.combine_first(pred),
title='Covid Vaccinations 3rd Dose - Top Provinces - Thailand',
png_prefix='vac_top5_doses_3', cols_subset=cols,
actuals=list(pred.columns),
ma_days=None,
kind='line', stacked=False, percent_fig=False,
cmap='tab10',
y_formatter=perc_format,
footnote_left=f'{source}Data Sources: MOPH Covid-19 Dashboard\n DDC Daily Vaccination Reports',
footnote="Percentage include ages 0-18")
top5 = vac.pipe(topprov, lambda df: -df['Vac Given 1 Cum'] / df['Vac Population2'] * 100,
lambda df: df['Vac Given 1 Cum'] / df['Vac Population2'] * 100,
other_name=None, num=7)
cols = top5.columns.to_list()
pred = pred_vac(top5)
pred = pred.clip(upper=pred.iloc[0].clip(100), axis=1) # no more than 100% unless already over
plot_area(df=top5.combine_first(pred),
title='Covid Vaccination 1st Dose - Lowest Provinces - Thailand',
png_prefix='vac_low_doses_1', cols_subset=cols,
actuals=list(pred.columns),
ma_days=None,
kind='line', stacked=False, percent_fig=False,
cmap='tab10',
y_formatter=perc_format,
footnote_left=f'{source}Data Sources: MOPH Covid-19 Dashboard\n DDC Daily Vaccination Reports',
footnote='Percentage include ages 0-18')
top5 = vac.pipe(topprov, lambda df: -df['Vac Given 2 Cum'] / df['Vac Population2'] * 100,
lambda df: df['Vac Given 2 Cum'] / df['Vac Population2'] * 100,
other_name=None, num=7)
cols = top5.columns.to_list()
top5_dose1 = vac.pipe(topprov, lambda df: -df['Vac Given 2 Cum'] / df['Vac Population2'] * 100,
lambda df: df['Vac Given 1 Cum'] / df['Vac Population2'] * 100,
other_name=None, num=7)
_, pred = pred_vac(top5_dose1, top5)
pred = pred.clip(upper=pred.iloc[0].clip(100), axis=1) # no more than 100% unless already over
plot_area(df=top5.combine_first(pred),
title='Covid Vaccinations 2nd Dose - Lowest Provinces - Thailand',
png_prefix='vac_low_doses_2', cols_subset=cols,
actuals=list(pred.columns),
ma_days=None,
kind='line', stacked=False, percent_fig=False,
cmap='tab10',
y_formatter=perc_format,
footnote_left=f'{source}Data Sources: MOPH Covid-19 Dashboard\n DDC Daily Vaccination Reports',
footnote='Percentage include ages 0-18')
if __name__ == "__main__":
df = import_csv("combined", index=["Date"], date_cols=["Date"])
briefings = import_csv("cases_briefings", index=["Date"], date_cols=["Date"])
dash = import_csv("moph_dashboard", ["Date"], False, dir="inputs/json") # so we cache it
dash_weekly = import_csv("moph_dash_weekly", ["Date"], False, dir="inputs/json") # so we cache it
# have vac in briefings and dashboard
df = briefings.combine_first(dash).combine_first(dash_weekly).combine_first(df)
vac = import_csv("vac_timeline", ['Date'])
df = df.combine_first(vac)
os.environ["MAX_DAYS"] = '0'
os.environ['USE_CACHE_DATA'] = 'True'
save_vacs_prov_plots(df)
save_vacs_plots(df)