-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathsars_ani_visualization.Rmd
766 lines (645 loc) · 40 KB
/
sars_ani_visualization.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
---
title: 'SARS-ANI: A Global Open Access Dataset of Reported SARS-CoV-2 Events in Animals
- Visualization'
author: 'SARS-ANI Team'
date: ''
output:
pdf_document: default
---
```{r setup, include=FALSE}
# Prepare the R environment: install packages
knitr::opts_chunk$set(echo = TRUE, fig.align = 'center')
# Define requested packages
my_packages <- c("dplyr", "ggplot2", "viridis", "ggalluvial","ggfittext","randomcoloR", "reshape2", "rworldmap", "stringr", "webr" , "knitr", "kableExtra","ggfittext" , "visNetwork", "scales", "treemap")
# Extract not installed packages
not_installed <- my_packages[!(my_packages %in% installed.packages()[ , "Package"])]
# Install not installed packages
if(length(not_installed)) install.packages(not_installed)
devtools::install_github("sctyner/geomnet")
# Load libraries
library(dplyr)
library(ggplot2)
library(viridis)
library(ggalluvial)
library(ggfittext)
library(randomcoloR)
library(reshape2)
library(rworldmap)
library(stringr)
require(webr)
library(knitr)
library(kableExtra)
library(ggfittext)
library(visNetwork)
library(geomnet)
library(scales)
library(treemap)
```
## Background
The severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) most probably emerged from an animal source and subsequently spilled over to the human population in 2019, in China. Despite its zoonotic origin, the current COVID-19 pandemic is being sustained through human-to-human transmission. <br>
Animal infections with SARS-CoV-2 have been reported by several countries. A wide range of animal species have proven to be susceptible to SARS-CoV-2 either via natural and/or experimental infection. <br>
Collecting and sharing data on reported SARS-CoV-2 natural infections in animals is of critical importance to assess their epidemiological significance for animal and human health, as well as their implications for biodiversity and conservation. <br>
```{r import, echo=FALSE, message=FALSE, warning=FALSE}
# Import the data
sars_df <- read.csv("sars_ani_data.csv", encoding="UTF-8")
# Create a column "Date" where Date = date_confirmed and Date = date_reported if date_confirmed is missing and Date = date_published if the two others are missing.
# Remove trailing and leading white spaces
sars_df <- sars_df %>%
mutate (Date = if_else(
date_confirmed %in% c( "NS", NA,""), date_reported , date_confirmed)) %>%
mutate (Date = if_else(
Date %in% c( "NS", NA,""), date_published, Date)) %>%
mutate(across(where(is.character), str_trim))
# Transform the column "Date" into format Date that will be recognized in R
sars_df$Date <- as.Date(sars_df$Date , format = "%Y-%m-%d")
# Total number of unique reports included to extract the data
reports <- c(sars_df$archive_event_number, sars_df$secondary_source_ID)
unique_rep <- length(unique(reports))
```
## Overview of the Dataset
We consider as an **event** when one single case or several epidemiologically related cases were identified by the presence of viral RNA (proof of infection) and/or antibodies (proof of exposure) in an animal. Epidemiologically related cases include e.g. animals belonging to the same farm, captive animals housed together, pets belonging to the same household, or animals sampled within the same (generally transversal) study, featuring similar event and patient attributes, i.e. they underwent the same laboratory test(s) and showed the same results (including variant), exhibited the same symptoms and disease outcome, and were confirmed, reported (when applicable), and published on the same date (e.g. when pets of the same species sharing the same household showed different symptoms, they are reported as two distinct events). Events include follow-up history reports of outbreaks (e.g. follow-up on the clinical status of the animal, variant identification after case confirmation). <br>
The SARS-ANI Dataset presents `r nrow(sars_df)` records of SARS-CoV-2 events in wild, farmed, domestic, and captive animals, which occurred between `r min(sars_df$Date)` and `r max(sars_df$Date)` and have been documented in `r length(unique(reports))` ProMED-mail and WAHIS reports. <br>
Natural infections with SARS-CoV-2 have been reported in `r length (unique(sars_df$host_sci_spec_res[!is.na(sars_df$host_sci_spec_res)]))` taxonomically-resolved animal species, belonging to `r length(unique(sars_df$family))` families in `r length(unique(sars_df$country_name))` countries worldwide (covering `r length(unique(sars_df$subnational_administration))` subnational administrative areas).<br>
The SARS-ANI dashboard, providing interactive visualizations of the dynamic version of the SARS-ANI Dataset, is available at: https://vis.csh.ac.at/sars-ani <br>
\newpage
## Remarks
* The number of reported SARS-CoV-2 events in animals in each country depends on the reporting strategy of the country to the World Organisation for Animal Health (WOAH), the intensity of the research and surveillance strategy in the different animal species (e.g. whether pets from infected households are systematically investigated or not), the media coverage on the diagnosed cases, and the uptake of the reported event by the ProMED-mail team. <br>
* Number of cases and deaths in mink were inconsistently reported, therefore data on SARS-CoV-2 events in mink are partial and does not allow accurate estimation of the impact of SARS-CoV-2 in the mink population. <br>
\newpage
## How many SARS-CoV-2 events in animals have been published ?
The figure includes follow-up history reports of outbreaks. Date of publication is used.
```{r number_reports, fig.height=5, fig.width=5, echo=FALSE, message=FALSE, warning=FALSE}
# Cumulative number of SARS-Cov-2 events in animals since the beginning of the COVID-19 pandemic. Date of publication is used.
# Note: this does not represent number of outbreaks but number of events, i.e. follow-up history reports are included
nb_report_day <- sars_df %>%
mutate (date_published = as.Date (date_published , format = "%Y-%m-%d")) %>%
group_by(date_published) %>%
dplyr::summarize(n()) %>%
arrange(date_published) %>%
dplyr::rename(number_reports = "n()") %>%
mutate(cum_sum=cumsum(number_reports))
# Make the curve of the cumulative number of events
plot_day_cum <- ggplot(nb_report_day , aes(x=date_published, y=cum_sum, group = 1)) +
geom_line()+
geom_point()+ # remove this line if you want to remove the dots on the plot line (or add "#" in front of this line)
scale_x_date(limits= as.Date(c(min(nb_report_day$date_published)-7, max(nb_report_day$date_published)+7)), date_breaks = "3 week", date_labels = "%Y-%m-%d", expand = c(0, 0))+
xlab("Date of publication") +
ylab("Cumulative number of events")+
theme_bw()+
theme(axis.text.x = element_text(angle=90,vjust = 0.5, hjust=.50))+
theme (axis.title.x = element_text(vjust= -1),
axis.title.y = element_text(vjust= 3),
text = element_text(size = 10))
plot_day_cum
```
\newpage
## Where did SARS-CoV-2 events in animals occur?
### For each country, how many species have been reported as infected with or exposed to SARS-CoV-2?
```{r map_species, fig.height=10, fig.width=21, fig.fullwidth=TRUE, echo=FALSE, message=FALSE, warning=FALSE}
# Get the world map
world_map <- map_data("world")
# Check that countries names are similar between the two datasets (world map and SARS-ANI data)
countries_world <- sort(unique(world_map$region))
countries_sars <- sort(unique(sars_df$country_name))
intersect <- intersect(countries_world,countries_sars )
#length(intersect) == length(countries_sars) # if FALSE some country names have to be correct
# Find values to correct
#setdiff(countries_sars, countries_world)
# Format the data to plot
# Correct countries names to match the world map data
sars_to_plot_host <- sars_df %>%
dplyr::mutate(country_name = replace(country_name, country_name == "United States", "USA")) %>%
dplyr::mutate(country_name = replace(country_name, country_name == "United Kingdom", "UK")) %>%
dplyr::mutate(country_name = replace(country_name, country_name == "Republic of Korea", "South Korea")) %>%
dplyr::mutate(country_name = replace(country_name, country_name == "Russian Federation", "Russia")) %>%
dplyr::group_by(country_name) %>%
dplyr::mutate(unique_species = n_distinct(host_sci_spec_res)) %>%
select(country_name,unique_species ) %>%
unique()
# Add species number to world map
sars.to.map <- left_join( world_map, sars_to_plot_host, by = c("region"="country_name"))
# Plot the map of the number of species with SARS-CoV-2 reported cases per countries
sars.to.map %>%
filter (region != "Antarctica") %>%
ggplot(aes(long, lat, group = group))+
geom_polygon(aes(fill = unique_species), color = "black") +
scale_fill_viridis_c(name = "Number of unique (sub)species", option = 'D', trans = "log2", breaks = trans_breaks("log2", function(x) 2^x),na.value="grey85") +
theme(axis.text.x = element_blank(),
axis.text.y = element_blank(), axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(), axis.title = element_blank(),
plot.margin = unit(0 * c(-1.5, -1.5, -1.5, -1.5), "lines"))+
xlab ("Longitude") +
ylab ("Latitude") +
theme_bw()+
theme(legend.position="bottom") +
guides(fill = guide_colourbar(barwidth = 20, barheight = 2)) +
theme(text = element_text(size = 20))
```
```{r table_countries_spec, echo=FALSE, message=FALSE, warning=FALSE}
# Print the table of the number of affected species per country
colnames(sars_to_plot_host) <- c("Country", "Number of species")
kable(sars_to_plot_host , booktabs = T,
caption = "Number of unique animal species with reported natural SARS-CoV-2 infection or exposure per country.")
```
\newpage
### For each country, how many outbreaks of SARS-CoV-2 have been reported in animals?
Outbreak: Occurrence of one or more cases in an epidemiological unit. <br>
*Outbreaks exclude follow-up history reports of SARS-CoV-2 events* <br>
*When two or more SARS-CoV-2 events are epidemiologically related by the link "living together", they are counted as one single outbreak.* <br>
```{r map_outbreaks, fig.height=10, fig.width=21, fig.fullwidth=TRUE, echo=FALSE, message=FALSE, warning=FALSE}
# Format the data to be plotted (keep outbreaks only, aggregate per country)
sars_to_plot_outbreaks <- sars_df %>%
dplyr::mutate(country_name = replace(country_name, country_name == "United States", "USA")) %>%
dplyr::mutate(country_name = replace(country_name, country_name == "United Kingdom", "UK")) %>%
dplyr::mutate(country_name = replace(country_name, country_name == "Republic of Korea", "South Korea")) %>%
dplyr::mutate(country_name = replace(country_name, country_name == "Russian Federation", "Russia")) %>%
filter (!related_to_other_entries %in% c("updated by", "living together")) %>% # follow-up history reports are removed, only the last update is kept (so that the number of cases or outbreaks -depending on the question- is only counted once) AND for epidemiologically related events in which animals are living together (they describe one single outbreak), only one event is included.
group_by(country_name) %>%
tally()
# Add outbreak number to world map
sars.to.map_2 <- left_join( world_map, sars_to_plot_outbreaks, by = c("region"="country_name"))
sars.to.map_2 %>%
filter (region != "Antarctica") %>% # crop the map
ggplot(aes(long, lat, group = group))+
geom_polygon(aes(fill = n), color = "black") +
theme(axis.text.x = element_blank(),
axis.text.y = element_blank(), axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(), axis.title = element_blank(),
plot.margin = unit(0 * c(-1.5, -1.5, -1.5, -1.5), "lines"))+
xlab ("Longitude") +
ylab ("Latitude") +
theme_bw()+
theme(legend.position="bottom") +
scale_fill_viridis_c(name = "Number of reported outbreaks", option = 'D', trans = "log2", breaks = trans_breaks("log2", function(x) 2^x),na.value="grey85") +
guides(fill = guide_colourbar(barwidth = 20, barheight = 2)) +
theme(text = element_text(size = 20))
# Export Figure 2
# Font size needs to be adapted for export in eps or tiff format
Fig2 <- sars.to.map_2 %>%
filter (region != "Antarctica") %>% # crop the map
ggplot(aes(long, lat, group = group))+
geom_polygon(aes(fill = n), color = "black") +
theme(axis.text.x = element_blank(),
axis.text.y = element_blank(), axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(), axis.title = element_blank(),
plot.margin = unit(0 * c(-1.5, -1.5, -1.5, -1.5), "lines"))+
xlab ("Longitude") +
ylab ("Latitude") +
theme_bw()+
theme(legend.position="bottom") +
scale_fill_viridis_c(name = "Number of reported outbreaks", option = 'D', trans = "log2", breaks = trans_breaks("log2", function(x) 2^x),na.value="grey85") +
guides(fill = guide_colourbar(barwidth = 8, barheight = 1)) +
theme(text = element_text(size = 12))
ggsave("Fig2.eps", Fig2 , width = 18, height = 12, units="cm", dpi = 600)
ggsave("Fig2.tiff", Fig2 , width = 18, height = 12, units="cm", dpi = 600)
```
```{r table_countries_outb, echo=FALSE, message=FALSE, warning=FALSE}
sars_to_plot_outbreaks_2 <- sars_to_plot_outbreaks %>%
arrange(-n)
colnames(sars_to_plot_outbreaks_2) <- c("Country", "Number of outbreaks")
kable(sars_to_plot_outbreaks_2, booktabs = T,
caption = "Number of reported SARS-CoV-2 outbreaks per country.")
```
\newpage
## How many SARS-CoV-2 cases (infections and/or exposures) have been reported per animal species?
### Family, scientific name, common name of the animal hosts mentioned in the dataset, and reported number of SARS-CoV-2 cases. <br>
**Table 3 above** <br>
*NCBI-resolved common and scientific names are displayed*.<br>
*Scientific name for the species "hamster" is inconsistently reported and is therefore given the value "NS" when not specified.* <br>
*Number of cases is reported inconsistently in mink (data on the number of cases in mink is missing most of the time). Therefore, the number of diagnosed cases is largely under-estimated in mink. This is also true for deer, but to a lesser extent.* <br>
```{r species_families, echo=FALSE, message=FALSE, warning=FALSE}
table_species <- sars_df %>%
filter (related_to_other_entries != "updated by") %>% # follow-up history reports are removed, only the last update is kept (so that the number of cases or outbreaks -depending on the question- is only counted once)
dplyr::mutate (number_cases = replace(number_cases, number_cases == "NS", 1)) %>% # change NS to 1 so that we count 1 case for each missing value. I put 1 so that the species appears on the graph.
dplyr::mutate(number_cases = as.numeric(as.character(number_cases))) %>%
dplyr::group_by(family, host_sci_res, host_sci_spec_res, host_com_res) %>%
dplyr::summarise(number_cases_tot= sum(number_cases)) %>%
dplyr::mutate(host_com_res = replace(host_com_res,
family == "Cricetidae" & is.na(host_com_res) == TRUE, "hamster (unspecified)")) %>% # replace missing value for hamster%>%
dplyr::arrange(-number_cases_tot)
```
The dataset reports `r sum(table_species$number_cases_tot)` cases in total **(we counted one case for any missing value on the number of cases)**.
```{r species_families_table, echo=FALSE, message=FALSE, warning=FALSE}
# Table of the data
colnames(table_species) <- c("Family", "Lowest taxonomy", "Species", "Common name", "Number cases")
kable(table_species, format = "latex", booktabs = T,
caption = "Number of reported SARS-CoV-2 cases (infections and/or exposures) per animal species.") %>%
kable_styling(latex_options = "scale_down")
```
\newpage
```{r species_families_pie, fig.height=15, fig.width=15, echo=FALSE, message=FALSE, warning=FALSE}
colnames(table_species) <- c("Family", "Lowest_taxonomy", "Species", "Common_name", "Number_cases")
treemap(table_species,
index=c("Family","Common_name"),
vSize="Number_cases",
type="index",
title="",
fontsize.labels=c(25,16),
fontcolor.labels=c("white","yellow"),
fontface.labels=c(2,1),
bg.labels=c("transparent"),
align.labels=list(
c("center", "center"),
c("right", "bottom")),
overlap.labels=0.5,
inflate.labels=F)
```
Note: For each species, we counted one case for any missing value on the number of cases.
\newpage
## How many SARS-CoV2 cases (infections or exposures) have been reported per animal host and country?
The number of cases in mink is inconsistently (or even not at all) reported. Therefore, the number of cases shown here largely under-estimate the true number of cases in this species.
```{r species_countries_scientific, fig.height=14, fig.width=18, echo=FALSE, message=FALSE, warning=FALSE}
# Use a heat map to visualize the data
sars_df %>%
filter (related_to_other_entries != "updated by") %>% # follow-up history reports are removed, only the last update is kept (so that the number of cases or outbreaks -depending on the question- is only counted once)
dplyr::mutate(host_sci_res = replace(host_sci_res,
host_com_orig == "hamster" & is.na(host_sci_res) == TRUE, "hamster (unspecified)")) %>% # replace missing value for hamster
mutate (number_cases = replace(number_cases, number_cases == "NS", NA)) %>% # change NS to NA (mostly data on mink --> could be replaced by average size of mink farm for each country?)
dplyr::mutate(number_cases = as.numeric(as.character(number_cases))) %>%
dplyr::group_by(country_name, host_sci_res) %>%
dplyr::summarize (number_cases_tot = sum(number_cases)) %>%
ggplot(aes(country_name, host_sci_res, fill= number_cases_tot)) +
geom_tile(color = "white") +
coord_fixed() +
xlab("") +
ylab ("Animal host")+
theme(axis.text.x = element_text(angle=90, hjust=1, vjust = 0.5)) +
theme (axis.title.x = element_text(vjust= -1),
axis.title.y = element_text(vjust= 3))+
scale_fill_viridis_c(option = 'D', trans = "log2", breaks = trans_breaks("log2", function(x) 2^x), name = "Number of cases", na.value = "firebrick4") + # NA values (was NS in the original dataset) are colored in firebrick
guides(fill = guide_colourbar(barwidth = 2, barheight = 10)) +
theme(text = element_text(size = 24))
```
Red color = Value non available.
*Scientific name of some hamsters was not specified, therefore scientific name could not be retrieved from provided information*.
\newpage
## How many SARS-CoV-2 outbreaks have been reported per animal host and country?
Outbreak: Occurrence of one or more cases in an epidemiological unit. <br>
*Outbreaks exclude follow-up history reports of SARS-CoV-2 events* <br>
*When two or more SARS-CoV-2 events are epidemiologically related by the link "living together", they are counted as one single outbreak.* <br>
The following figure displays animal host names at species level.<br>
```{r outbreaks, fig.height=14, fig.width=18,echo=FALSE, message=FALSE, warning=FALSE}
sars_df %>%
filter (!related_to_other_entries %in% c("updated by", "living together")) %>% # follow-up history reports are removed, only the last update is kept (so that the number of cases or outbreaks -depending on the question- is only counted once) AND for epidemiologically related events in which animals are living together (they describe one single outbreak), only one event is included.
dplyr::mutate(host_sci_spec_res = replace(host_sci_spec_res,
host_com_orig == "hamster" & is.na(host_sci_spec_res) == TRUE, "hamster (unspecified)")) %>% # replace missing value for hamster
dplyr::group_by(country_name, host_sci_spec_res) %>%
tally() %>% # Count the number of outbreaks
ggplot(aes(country_name, host_sci_spec_res, fill= n)) +
geom_tile(color = "white") +
coord_fixed() +
xlab("") +
ylab ("Animal host (species level)")+
theme(axis.text.x = element_text(angle=90, hjust=1, vjust = 0.5),
axis.title.x = element_text(vjust= -1),
axis.title.y = element_text(vjust= 3))+
scale_fill_viridis_c(option = 'D', trans = "log2", breaks = trans_breaks("log2", function(x) 2^x), name = "Number of outbreaks", na.value = "firebrick4")+
guides(fill = guide_colourbar(barwidth = 2, barheight = 10)) +
theme(text = element_text(size = 24))
```
*Scientific name of some hamsters was not specified, therefore scientific name could not be retrieved from provided information*.
\newpage
## How many animals have died directly or indirectly from SARS-CoV-2?
The figure below includes deaths that are directly related to an infection as well as deaths related to culling when implemented as control measure. <br>
**Note:** The number of deaths in mink is inconsistently reported and does not allow an accurate estimation of the true number of deaths.<br>
In the figure below, the colloquial name of the animal host is displayed.<br>
```{r deaths, fig.height=14, fig.width=18, echo=FALSE, message=FALSE, warning=FALSE}
sars_df %>%
filter (related_to_other_entries != "updated by") %>% # follow-up history reports are removed, only the last update is kept (so that the number of cases or outbreaks -depending on the question- is only counted once)
filter (!(related_to_other_entries == "living together" & control_measures %in% c("culling","selective culling"))) %>% # we remove animals that lived together and were culled because the number of deaths then appear several time in the data, i.e. for each event related to this specific farm.
filter (outcome != "death not related to Sars-CoV-2") %>% # remove animals which death is probably not related to covid
mutate (number_deaths = replace(number_deaths, number_deaths == "NS", NA)) %>%# change NS to NA (mostly data on mink --> could be replaced by average size of mink farm for each country?)
dplyr::mutate(number_deaths = as.numeric(as.character(number_deaths))) %>%
dplyr::group_by(country_name, host_colloq) %>%
dplyr::summarise (number_deaths_tot= sum(number_deaths)) %>%
ggplot(aes(country_name, host_colloq, fill= number_deaths_tot)) +
geom_tile(color = "white") +
coord_fixed()+
xlab("") +
ylab ("Animal host")+
theme(axis.text.x = element_text(angle=90, hjust=1, vjust = 0.5),
axis.title.x = element_text(vjust= -1),
axis.title.y = element_text(vjust= 3))+
scale_fill_viridis_c(option = 'D', trans = "sqrt", breaks = trans_breaks("sqrt", function(x) x^2), name = "Number of deaths", na.value = "firebrick4") + # NA values (was NS in the data) are colored in firebrick
guides(fill = guide_colourbar(barwidth = 2, barheight = 10)) +
theme(text = element_text(size = 24))
```
Red color = Value non available.
\newpage
## What is the case fatality rate of SARS-CoV-2 in the different animal species, per country?
The case fatality rate (CFR) per species and country is obtained by dividing the total number of reported deaths in one species by the total number of reported cases for this species in the country. Animals culled as part of a control strategy are excluded (not all were diagnosed as infected). Similarly, mink are not included here because data on case and death numbers are partial. The CFR depends strongly on testing and does not give information on the infection fatality rate (IFR, number of deaths divided by the total number of infected individuals) or mortality rate (MR, number of deaths divided by the total at-risk population).<br>
In the figure below, the colloquial name of the animal host is displayed.<br>
```{r fatality, fig.height=14, fig.width=18,echo=FALSE, message=FALSE, warning=FALSE}
# To visualize the case fatality rate (proportion of individuals who died from SARS-CoV-2 among all individuals diagnosed with the disease), we plot (number of reported deaths / number of reported cases) on the heat map
# We removed culled animals because they did not all die from SARS-CoV-2
sars_df %>%
filter (related_to_other_entries != "updated by") %>% # follow-up history reports are removed, only the last update is kept (so that the number of cases or outbreaks -depending on the question- is only counted once)
filter (!control_measures %in% c("culling", "selective culling")) %>% # culled animals excluded
filter (outcome != "death not related to Sars-CoV-2") %>% # remove animals which death is probably not related to covid
filter (host_colloq != "American mink") %>% # mink are removed because data on cases and deaths are not accurate enough and most of them were culled
dplyr::mutate (number_cases = replace(number_cases, number_cases == "NS", NA)) %>% # change NS to NA (mostly data on mink --> could be replaced by average size of mink farm for each country?)
dplyr::mutate(number_cases = as.numeric(as.character(number_cases))) %>%
dplyr::mutate (number_deaths = replace(number_deaths, number_deaths == "NS", NA)) %>%
dplyr::mutate(number_deaths = as.numeric(as.character(number_deaths))) %>%
dplyr::group_by(country_name, host_colloq ) %>%
dplyr::summarise (fatality_rate= sum(number_deaths, na.rm = T)/sum(number_cases, na.rm = T)) %>%
ggplot(aes(country_name, host_colloq , fill= fatality_rate)) +
geom_tile(color = "white") +
coord_fixed()+
xlab("") +
ylab ("Animal host")+
theme(axis.text.x = element_text(angle=90, hjust=1, vjust = 0.5),
axis.title.x = element_text(vjust= -1),
axis.title.y = element_text(vjust= 3))+
scale_fill_viridis_c(option = 'D', trans = "sqrt", breaks = trans_breaks("sqrt", function(x) x ^ 2), name = "Case fatality rate", na.value = "firebrick4") +
guides(fill = guide_colourbar(barwidth = 2, barheight = 10)) +
theme(text = element_text(size = 24))
# Export Fig. 3
Fig3 <- sars_df %>%
filter (related_to_other_entries != "updated by") %>% # follow-up history reports are removed, only the last update is kept (so that the number of cases or outbreaks -depending on the question- is only counted once)
filter (!control_measures %in% c("culling", "selective culling")) %>% # culled animals excluded
filter (outcome != "death not related to Sars-CoV-2") %>% # remove animals which death is probably not related to covid
filter (host_colloq != "American mink") %>% # mink are removed because data on cases and deaths are not accurate enough and most of them were culled
dplyr::mutate (number_cases = replace(number_cases, number_cases == "NS", NA)) %>% # change NS to NA (mostly data on mink --> could be replaced by average size of mink farm for each country?)
dplyr::mutate(number_cases = as.numeric(as.character(number_cases))) %>%
dplyr::mutate (number_deaths = replace(number_deaths, number_deaths == "NS", NA)) %>%
dplyr::mutate(number_deaths = as.numeric(as.character(number_deaths))) %>%
dplyr::group_by(country_name, host_colloq ) %>%
dplyr::summarise (fatality_rate= sum(number_deaths, na.rm = T)/sum(number_cases, na.rm = T)) %>%
ggplot(aes(country_name, host_colloq , fill= fatality_rate)) +
geom_tile(color = "white") +
coord_fixed()+
xlab("") +
ylab ("Animal host") +
theme(axis.text.x = element_text(angle=90, hjust=1, vjust = 0.3),
axis.title.x = element_text(vjust= -1),
axis.title.y = element_text(vjust= 3),
text = element_text(size = 9)) +
scale_fill_viridis_c(option = 'D', trans = "sqrt", breaks = trans_breaks("sqrt", function(x) x ^ 2), name = "Case fatality rate", na.value = "firebrick4") +
guides(fill = guide_colourbar(barwidth = 1, barheight = 6))
ggsave("Fig3.eps", Fig3 , width = 18, height = 14, units="cm", dpi = 600)
ggsave("Fig3.tiff", Fig3 , width = 18, height = 14, units="cm", dpi = 600)
```
Red color = Fatality rate could not be computed because number of cases and/or deaths is missing.
\newpage
## Which laboratory methods are used to detect infection with or exposure to SARS-CoV-2 in animals?
```{r tests, fig.height=14, fig.width=18, echo=FALSE, message=FALSE, warning=FALSE}
# Type of tests used to detect SARS-CoV-2 infection in animals
test_df <- sars_df %>%
select (host_colloq, test, test_2, test_3)
# Convert to long format
test_df_melt <- melt (test_df, id.vars=c("host_colloq"))
# remove NA value as well as records where test is not specified ("NS")
test_df_melt <- test_df_melt %>%
filter(!is.na(value)) %>%
filter (value != "NS")
# Heat map of the type of test used per species
test_df_melt %>%
ggplot(aes(host_colloq, value)) +
geom_tile(color = "white", fill = "black") +
coord_fixed()+
xlab("") +
ylab ("")+
theme(axis.text.x = element_text(angle=90, hjust=1),
axis.title.x = element_text(vjust= -1),
axis.title.y = element_text(vjust= 3),
text = element_text(size = 24))
```
\newpage
## Why were animals tested for SARS-CoV-2?
Note that only positive animals are reported in the dataset (investigations that led to negative results are not -or rarely- reported to the authorities or media).
```{r reasons_testing, fig.height=14, fig.width=20,echo=FALSE, message=FALSE, warning=FALSE}
# Reasons for testing
reasons_df <- sars_df %>%
mutate(reason_for_testing = replace(reason_for_testing , reason_for_testing == "NS", "not specified")) %>%
group_by(host_colloq,reason_for_testing) %>%
tally()
reasons_df %>%
ggplot(aes(host_colloq, reason_for_testing, fill = n)) +
geom_tile(color = "white") +
coord_fixed()+
xlab("") +
ylab ("")+
theme(axis.text.x = element_text(angle=90, hjust=1),
axis.title.x = element_text(vjust= -1),
axis.title.y = element_text(vjust= 3),
text = element_text(size = 22))+
scale_fill_viridis_c(option = 'D', trans = "log2", breaks = trans_breaks("log2", function(x) 2^x), name = "Number of events", na.value = "firebrick4") +
guides(fill = guide_colourbar(barwidth = 2, barheight = 10))
# Export Fig. 5
Fig5 <- reasons_df %>%
ggplot(aes(host_colloq, reason_for_testing, fill = n)) +
geom_tile(color = "white") +
coord_fixed()+
xlab("") +
ylab ("")+
theme(axis.text.x = element_text(angle=90, hjust=1, vjust = 0.2),
axis.title.x = element_text(vjust= -1),
axis.title.y = element_text(vjust= 3),
text = element_text(size = 9)) +
scale_fill_viridis_c(option = 'D', trans = "log2", breaks = trans_breaks("log2", function(x) 2^x), name = "Number of events", na.value = "firebrick4")+
guides(fill = guide_colourbar(barwidth = 1, barheight = 4))
ggsave("Fig5.eps", Fig5 , width = 18, height = 14, units="cm", dpi = 600)
ggsave("Fig5.tiff", Fig5 , width = 18, height = 14, units="cm", dpi = 600)
```
\newpage
## What are the reported potential sources of SARS-CoV-2 infection or exposure in animals?
Several reports referred to "an undetected human case" as the most likely source of infection. Those were given the value "NS" in the dataset because the information was considered not specific enough. The source of infection was reported as "human” only when the report mentioned a contact with a confirmed human case as the most likely source of infection.
```{r sources, fig.height=10, fig.width=10, echo=FALSE, message=FALSE, warning=FALSE}
# Suspected sources of infection
sars_df %>%
filter (related_to_other_entries != "updated by") %>% # follow-up history reports are removed, only the last update is kept (so that the number of cases or outbreaks -depending on the question- is only counted once)
group_by(source_of_infection) %>%
tally() %>%
mutate(perc = n*100/sum(n)) %>%
ggplot(aes(x=source_of_infection, y=perc, fill=source_of_infection)) +
geom_bar(stat="identity") +
xlab("") +
ylab ("Percentage") +
scale_fill_brewer(palette = "Dark2") +
theme_bw() +
theme(legend.position="none") +
theme(text = element_text(size = 24))
```
NS = Not specified.
\newpage
## Which SARS-CoV-2 variants have been identified in the different animal hosts?
The figures describe the number (and percentages) of **events** (one event may include one or more than one case).
```{r variants_perc, fig.height=15, fig.width=15, echo=FALSE, message=FALSE, warning=FALSE}
## Percentage of reported variants among reported events
variants_df <- sars_df %>%
filter (!is.na(variant)) %>%
filter (variant != "NS")
perc.variants <- nrow(variants_df)/nrow(sars_df)
```
Variants involved in SARS-CoV-2 events in animal were reported in `r round(perc.variants,1)`% of the events (`r nrow(variants_df)`/`r nrow(sars_df)`).
```{r variants, fig.height=15, fig.width=15, echo=FALSE, message=FALSE, warning=FALSE, message=FALSE,}
PieDonut(variants_df,aes(pies=host_colloq ,donuts=variant), selected=2,labelposition=1,title="", r0=0.7,
showPieName=FALSE, pieLabelSize = 5, donutLabelSize = 5, showRatioThreshold = 0.01)
```
```{r variants_Sankey, fig.height=20, fig.width=25, echo=FALSE, message=FALSE, warning=FALSE}
# Another visualization of the variants per animal host
n_col <- length(unique(sars_df$variant))
palette <- distinctColorPalette(n_col)
sars_df %>%
filter (!is.na(variant)) %>%
filter (variant != "NS") %>%
group_by(host_colloq, variant) %>%
tally() %>%
ggplot( aes(y = n, axis1 = host_colloq, axis2 = variant)) +
geom_alluvium(aes(fill = variant), width = 1/12) +
geom_stratum(alpha = .5) +
scale_x_discrete(limits = c("Host", "Variant"), expand = c(.05, .05)) +
scale_fill_manual(values=palette) +
ggtitle("")+
theme_minimal()+
theme(legend.position = "none",
axis.text.x = element_text(size = 35),
axis.text.y = element_text(size = 35),
axis.title.y = element_text(size = 30)) +
ggfittext::geom_fit_text(stat = "stratum", width = 1/6, min.size = 1, aes(label = after_stat(stratum)),grow = TRUE) +
ylab("Number of events")
# Export Figure 4
Fig4 <- sars_df %>%
filter (!is.na(variant)) %>%
filter (variant != "NS") %>%
group_by(host_colloq, variant) %>%
tally() %>%
ggplot( aes(y = n, axis1 = host_colloq, axis2 = variant)) +
geom_alluvium(aes(fill = variant), width = 1/12) +
geom_stratum(alpha = .5) +
scale_x_discrete(limits = c("Host", "Variant"), expand = c(.05, .05)) +
scale_fill_manual(values=palette) +
ggtitle("")+
theme_minimal()+
theme(legend.position = "none",
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
axis.title.y = element_text(size = 14)) +
ggfittext::geom_fit_text(stat = "stratum", width = 1/5, min.size = 1, aes(label = after_stat(stratum)),grow = TRUE) +
ylab("Number of events")
ggsave("Fig4.tiff", Fig4 , width = 22, height = 20, units="cm", dpi = 600)
ggsave("Fig4.eps", fallback_resolution=600, width = 22, height = 20, device=cairo_ps)
```
\newpage
## What are the reported clinical signs allegedly associated to natural SARS-CoV-2 infections in the different animal hosts?
The following bar plots summarize clinical signs of SARS-CoV-2 in some animal hosts after natural infection. The figures show the **number of events** in which each clinical sign was reported (one event may include one or more than one case).<br>
### Cats
```{r symptoms_cats, echo=FALSE, message=FALSE, warning=FALSE}
sars_df %>%
filter (symptoms != "NS") %>% # remove records where symptoms are not specified
filter (host_colloq == "cat") %>% # select the host of interest
group_by(symptoms) %>%
tally() %>%
ggplot(aes(x=symptoms, y=n)) +
geom_bar(stat="identity")+
coord_flip() +
ylab("Number of events") +
xlab("") +
ggtitle("Symptoms reported in cats") +
theme_bw()+
theme(title = element_text(size=rel(0.8)))
```
### Dogs
```{r symptoms_dog, echo=FALSE, message=FALSE, warning=FALSE}
sars_df %>%
filter (symptoms != "NS") %>% # remove records where symptoms are not specified
filter (host_colloq== "dog") %>% # select the host of interest
group_by(symptoms) %>%
tally() %>%
ggplot(aes(x=symptoms, y=n)) +
geom_bar(stat="identity")+
coord_flip() +
ylab("Number of events") +
xlab("") +
ggtitle("Symptoms reported in dogs") +
theme_bw()+
theme(title = element_text(size=rel(0.8)))
```
### Captive animals
```{r symptoms_captive, echo=FALSE, message=FALSE, warning=FALSE}
captive <- sars_df %>%
filter (symptoms != "NS") %>% # remove records where symptoms are not specified
filter (living_conditions == "zoo") %>% # select the hosts of interest
group_by(host_colloq, symptoms) %>%
tally()
n_col3 <- length(unique(captive$symptoms))
palette3 <- distinctColorPalette(n_col3)
ggplot(captive, aes(x=host_colloq, y=n, fill = symptoms)) +
geom_bar(stat="identity", position=position_dodge())+
coord_flip() +
ylab("Number of events") +
xlab("") +
ggtitle("Symptoms reported in captive animals") +
scale_fill_manual(values=palette3) +
theme_bw() +
theme(legend.text=element_text(size=rel(0.6)),
legend.title=element_text(size=rel(0.8)),
title = element_text(size=rel(0.8)),
legend.position="bottom") +
guides(fill=guide_legend(title="Symptoms"))
```
\newpage
## What are the implemented control measures and possible outcomes after detection of SARS-CoV-2 infection or exposure in the different animal species?
The computed network is saved as an .html file (network.html) in the working directory. <br>
The network shows, for each animal host (blue nodes), which control measures (yellow nodes) have been implemented in response to SARS-CoV-2 outbreaks (e.g. treatment, culling, exclusion zone) and what was the outcome (red nodes) for the animals (e.g. death, recovery). <br>
The size of each node in the network corresponds to the number of times the value appears in the dataset. <br>
```{r network, echo=FALSE, message=FALSE, warning=FALSE}
# Nodes
# We create nodes weighted with the number of time the animal host, control measure or outcome occur in the dataset
nodes_col1 <- sars_df %>%
group_by(host_colloq) %>%
tally()
colnames(nodes_col1) <- c("label", "value")
nodes_col2 <- sars_df %>%
group_by(control_measures) %>%
tally()%>%
mutate(control_measures = replace(control_measures, control_measures == "NS", "none")) %>%
mutate(control_measures = replace(control_measures, is.na(control_measures), "not applicable"))
colnames(nodes_col2) <- c("label", "value")
nodes_col3 <- sars_df %>%
group_by(outcome) %>%
tally()%>%
mutate(outcome = replace(outcome, outcome == "NS", "unknown"))
colnames(nodes_col3) <- c("label", "value")
nodes_col4 <- rbind(nodes_col1, nodes_col2, nodes_col3)
vertices.id <- c(0:(nrow(nodes_col4)-1))
nodes <- cbind.data.frame (vertices.id, nodes_col4)
nodes <- nodes %>%
mutate(group = case_when (label %in% c(nodes_col1$label) ~ "Host",
label %in% c(nodes_col2$label)~"Control measure",
label %in% c(nodes_col3$label) ~ "Outcome"))
colnames(nodes) <- c("id", "label", "value","group")
#id has to be the same like from and to columns in edges
nodes$id <- nodes$label
# Edges
edges_df <- sars_df %>%
group_by(host_colloq, control_measures, outcome) %>%
tally() %>%
mutate(control_measures = replace(control_measures, control_measures == "NS", "none")) %>%
mutate(control_measures = replace(control_measures, is.na(control_measures), "not applicable")) %>%
mutate(outcome = replace(outcome, outcome == "NS", "unknown"))
edges_df1 <- edges_df %>%
select(host_colloq, control_measures, n)
colnames(edges_df1) <- c("from", "to", "width")
edges_df2 <- edges_df[,-1]
colnames(edges_df2) <- c("from", "to", "width")
edges_df3 <- rbind(edges_df1, edges_df2)
edges_df3 <- as.data.frame(edges_df3)
# Do not weight the edges (only nodes will be)
# Do not run this command if you want to weight the vertices
edges_df3 <- edges_df3 %>%
select (-width)
# Give font size to the node labels
nodes <- nodes %>% mutate(font.size = 20)
network <- visNetwork(nodes, edges_df3) %>%
visPhysics(solver = "barnesHut", enabled = FALSE) %>%
visLegend(enabled = TRUE, useGroups = TRUE) # to add legend
# Save as html file (the html file is saved in your working directory)
visSave(network, "network.html", selfcontained = TRUE, background = "white")
```