diff --git a/README.Rmd b/README.Rmd
index 758d191..0386d77 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -1,7 +1,6 @@
---
title: "pspforecast"
output: github_document
-always_allow_html: true
---
@@ -29,14 +28,16 @@ remotes::install_github("BigelowLab/pspforecast")
```
```{r message=FALSE, warning=FALSE, echo=FALSE}
-library(dplyr)
-library(ggplot2)
-library(plotly)
-
-library(pspdata)
-library(pspforecast)
-
-library(patchwork)
+suppressPackageStartupMessages({
+ library(dplyr)
+ library(ggplot2)
+ library(plotly)
+
+ library(pspdata)
+ library(pspforecast)
+
+ library(patchwork)
+})
```
## Reading the forecast database
diff --git a/README.html b/README.html
deleted file mode 100644
index fb48e91..0000000
--- a/README.html
+++ /dev/null
@@ -1,771 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-pspforecast
-Shellfish toxicity forecast serving package
-Requirements
-
-Installation
-remotes::install_github("BigelowLab/pspforecast")
-Reading the forecast
-database
-Variables:
-
-version - the version/configuration of the model used to make the
-prediction
-ensemble_n - number of ensemble members used to generate
-prediction
-location - the sampling station the forecast is for
-date - the date the forecast was made on
-name - site name
-lat - latitude
-lon - longitude
-class_bins - the bins used to classify shellfish total toxicity
-(i.e. 0: 0-10, 1: 10-30, 2: 30-80, 3: >80)
-forecast_date - the date the forecast is valid for (i.e. one week
-ahead of when it was made)
-predicted_class - the predicted classification at the location
-listed on the forecast_date (in this case 0-3)
-p_0 - class 0 probability
-p_1 - class 1 probability
-p_2 - class 2 probability
-p_3 - class 3 probability
-p3_sd - class 3 probability standard deviation
-p_3_min - class 3 minimum probability (from ensemble
-run)
-p_3_max - class 3 maximum probability (from ensemble
-run)
-predicted_class - the predicted classification
-
-predictions <- read_forecast(year = "2024") |>
- distinct()
-
-glimpse(predictions)
-## Rows: 464
-## Columns: 19
-## $ version [3m[38;5;246m<chr>[39m[23m "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0…
-## $ ensemble_n [3m[38;5;246m<dbl>[39m[23m 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,…
-## $ location [3m[38;5;246m<chr>[39m[23m "PSP10.11", "PSP10.33", "PSP12.01", "PSP12.03", "PSP12.13", "PSP12.28", "PSP12.34", "PSP15.25", "PSP16.…
-## $ date [3m[38;5;246m<date>[39m[23m 2024-05-06, 2024-05-06, 2024-05-08, 2024-05-08, 2024-05-08, 2024-05-06, 2024-05-06, 2024-05-06, 2024-0…
-## $ name [3m[38;5;246m<chr>[39m[23m "Ogunquit River", "Spurwink River", "Basin Pt.", "Potts Pt.", "Lumbos Hole", "Bear Island", "Head Beach…
-## $ lat [3m[38;5;246m<dbl>[39m[23m 43.25030, 43.56632, 43.73848, 43.73064, 43.79553, 43.78556, 43.71711, 43.84476, 43.92526, 44.15419, 44.…
-## $ lon [3m[38;5;246m<dbl>[39m[23m -70.59540, -70.27305, -70.04343, -70.02556, -69.94557, -69.87415, -69.84999, -69.55365, -69.25900, -68.…
-## $ class_bins [3m[38;5;246m<chr>[39m[23m "0,10,30,80", "0,10,30,80", "0,10,30,80", "0,10,30,80", "0,10,30,80", "0,10,30,80", "0,10,30,80", "0,10…
-## $ forecast_start_date [3m[38;5;246m<date>[39m[23m 2024-05-10, 2024-05-10, 2024-05-12, 2024-05-12, 2024-05-12, 2024-05-10, 2024-05-10, 2024-05-10, 2024-0…
-## $ forecast_end_date [3m[38;5;246m<date>[39m[23m 2024-05-16, 2024-05-16, 2024-05-18, 2024-05-18, 2024-05-18, 2024-05-16, 2024-05-16, 2024-05-16, 2024-0…
-## $ p_0 [3m[38;5;246m<dbl>[39m[23m 93, 100, 100, 99, 31, 3, 95, 94, 95, 95, 100, 99, 100, 55, 91, 38, 39, 53, 98, 91, 69, 57, 37, 2, 93, 9…
-## $ p_1 [3m[38;5;246m<dbl>[39m[23m 6, 0, 0, 1, 44, 13, 4, 5, 4, 5, 0, 1, 0, 42, 9, 40, 46, 37, 2, 9, 26, 32, 36, 10, 6, 3, 28, 2, 0, 1, 2,…
-## $ p_2 [3m[38;5;246m<dbl>[39m[23m 1, 0, 0, 0, 18, 43, 0, 1, 0, 0, 0, 0, 0, 2, 0, 17, 12, 8, 0, 0, 4, 9, 21, 39, 0, 0, 5, 0, 0, 0, 0, 27, …
-## $ p_3 [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 7, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 3, 2, 0, 0, 1, 2, 6, 50, 0, 0, 1, 0, 0, 0, 0, 2, 21, 5…
-## $ p3_sd [3m[38;5;246m<dbl>[39m[23m 2.537746e-02, 1.702311e-04, 5.835063e-07, 3.170006e-04, 2.573652e+00, 1.032039e+01, 6.801030e-03, 1.598…
-## $ p_3_min [3m[38;5;246m<dbl>[39m[23m 2.803591e-02, 1.613240e-06, 4.298889e-09, 3.494154e-05, 3.757856e+00, 2.148448e+01, 6.643038e-03, 1.782…
-## $ p_3_max [3m[38;5;246m<dbl>[39m[23m 1.114067e-01, 5.424280e-04, 1.839769e-06, 9.452227e-04, 1.157185e+01, 5.338209e+01, 3.128168e-02, 7.217…
-## $ predicted_class [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 0…
-## $ f_id [3m[38;5;246m<chr>[39m[23m "PSP10.11_2024-05-06", "PSP10.33_2024-05-06", "PSP12.01_2024-05-08", "PSP12.03_2024-05-08", "PSP12.13_2…
-2024 Season Results
-
-
-Metrics
-Season Accuracy:
-## # A tibble: 1 × 1
-## accuracy
-## <dbl>
-## 1 0.717
-Closure-level (Class 3)
-Predictions
-
-- tp - The model predicted class 3 and the following week’s
-measurement was class 3
-- fp - The model predicted class 3 and the following week’s
-measurement was not class 3
-- tn - The model predicted class 0,1,2 and the following week’s
-measurement was in class 0,1,2
-- fn - The model predicted class 0,1,2 and the following week’s
-measurement was class 3
-- precision - TP/(TP+FP)
-- sensitivity - TP/(TP+FN)
-- specificity - TN/(TN+FP)
-
-
-
-## # A tibble: 1 × 8
-## tp fp tn fn cl_accuracy precision sensitivity specificity
-## <int> <int> <int> <int> <dbl> <dbl> <dbl> <dbl>
-## 1 2 4 397 7 0.973 0.333 0.222 0.990
-2023 Season Results
-predictions <- read_forecast(year = "2023")
-Confusion Matrix
-
-Probability
-of Closure-level Toxicity vs Measured Toxicity
-
-Metrics
-Season Accuracy:
-## # A tibble: 1 × 1
-## accuracy
-## <dbl>
-## 1 0.993
-Closure-level (Class 3)
-Predictions
-## # A tibble: 1 × 8
-## tp fp tn fn cl_accuracy precision sensitivity specificity
-## <int> <int> <int> <int> <dbl> <dbl> <dbl> <dbl>
-## 1 0 0 554 0 1 NaN NaN 1
-2022 Season Results
-Confusion Matrix
-
-Probability
-of Closure-level Toxicity vs Measured Toxicity
-
-Metrics
-Season Accuracy:
-## # A tibble: 1 × 1
-## accuracy
-## <dbl>
-## 1 0.799
-Closure-level (Class 3)
-Predictions
-## # A tibble: 1 × 8
-## tp fp tn fn cl_accuracy precision sensitivity specificity
-## <int> <int> <int> <int> <dbl> <dbl> <dbl> <dbl>
-## 1 16 20 603 12 0.951 0.444 0.571 0.968
-Timing of initial
-closure-level predictions
-
-2021 Season Results
-Confusion Matrix
-
-Probability
-of Closure-level Toxicity vs Measured Toxicity
-
-Metrics
-Season Accuracy:
-## # A tibble: 1 × 1
-## accuracy
-## <dbl>
-## 1 0.938
-Closure-level (Class 3)
-Predictions
-## # A tibble: 1 × 8
-## tp fp tn fn cl_accuracy precision sensitivity specificity
-## <int> <int> <int> <int> <dbl> <dbl> <dbl> <dbl>
-## 1 2 3 463 0 0.994 0.4 1 0.994
-Closure-level accuracy
-Timing of initial
-closure-level predictions
-
-Possible manuscript plot(s)
-
-
-Last Updated
-## [1] "2024-08-30"
-
-
-
diff --git a/README.md b/README.md
index 44ee083..32bec06 100644
--- a/README.md
+++ b/README.md
@@ -74,25 +74,25 @@ glimpse(predictions)
## Rows: 464
## Columns: 19
- ## $ version [3m[38;5;246m[39m[23m "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0…
- ## $ ensemble_n [3m[38;5;246m[39m[23m 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,…
- ## $ location [3m[38;5;246m[39m[23m "PSP10.11", "PSP10.33", "PSP12.01", "PSP12.03", "PSP12.13", "PSP12.28", "PSP12.34", "PSP15.25", "PSP16.…
- ## $ date [3m[38;5;246m[39m[23m 2024-05-06, 2024-05-06, 2024-05-08, 2024-05-08, 2024-05-08, 2024-05-06, 2024-05-06, 2024-05-06, 2024-0…
- ## $ name [3m[38;5;246m[39m[23m "Ogunquit River", "Spurwink River", "Basin Pt.", "Potts Pt.", "Lumbos Hole", "Bear Island", "Head Beach…
- ## $ lat [3m[38;5;246m[39m[23m 43.25030, 43.56632, 43.73848, 43.73064, 43.79553, 43.78556, 43.71711, 43.84476, 43.92526, 44.15419, 44.…
- ## $ lon [3m[38;5;246m[39m[23m -70.59540, -70.27305, -70.04343, -70.02556, -69.94557, -69.87415, -69.84999, -69.55365, -69.25900, -68.…
- ## $ class_bins [3m[38;5;246m[39m[23m "0,10,30,80", "0,10,30,80", "0,10,30,80", "0,10,30,80", "0,10,30,80", "0,10,30,80", "0,10,30,80", "0,10…
- ## $ forecast_start_date [3m[38;5;246m[39m[23m 2024-05-10, 2024-05-10, 2024-05-12, 2024-05-12, 2024-05-12, 2024-05-10, 2024-05-10, 2024-05-10, 2024-0…
- ## $ forecast_end_date [3m[38;5;246m[39m[23m 2024-05-16, 2024-05-16, 2024-05-18, 2024-05-18, 2024-05-18, 2024-05-16, 2024-05-16, 2024-05-16, 2024-0…
- ## $ p_0 [3m[38;5;246m[39m[23m 93, 100, 100, 99, 31, 3, 95, 94, 95, 95, 100, 99, 100, 55, 91, 38, 39, 53, 98, 91, 69, 57, 37, 2, 93, 9…
- ## $ p_1 [3m[38;5;246m[39m[23m 6, 0, 0, 1, 44, 13, 4, 5, 4, 5, 0, 1, 0, 42, 9, 40, 46, 37, 2, 9, 26, 32, 36, 10, 6, 3, 28, 2, 0, 1, 2,…
- ## $ p_2 [3m[38;5;246m[39m[23m 1, 0, 0, 0, 18, 43, 0, 1, 0, 0, 0, 0, 0, 2, 0, 17, 12, 8, 0, 0, 4, 9, 21, 39, 0, 0, 5, 0, 0, 0, 0, 27, …
- ## $ p_3 [3m[38;5;246m[39m[23m 0, 0, 0, 0, 7, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 3, 2, 0, 0, 1, 2, 6, 50, 0, 0, 1, 0, 0, 0, 0, 2, 21, 5…
- ## $ p3_sd [3m[38;5;246m[39m[23m 2.537746e-02, 1.702311e-04, 5.835063e-07, 3.170006e-04, 2.573652e+00, 1.032039e+01, 6.801030e-03, 1.598…
- ## $ p_3_min [3m[38;5;246m[39m[23m 2.803591e-02, 1.613240e-06, 4.298889e-09, 3.494154e-05, 3.757856e+00, 2.148448e+01, 6.643038e-03, 1.782…
- ## $ p_3_max [3m[38;5;246m[39m[23m 1.114067e-01, 5.424280e-04, 1.839769e-06, 9.452227e-04, 1.157185e+01, 5.338209e+01, 3.128168e-02, 7.217…
- ## $ predicted_class [3m[38;5;246m[39m[23m 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 0…
- ## $ f_id [3m[38;5;246m[39m[23m "PSP10.11_2024-05-06", "PSP10.33_2024-05-06", "PSP12.01_2024-05-08", "PSP12.03_2024-05-08", "PSP12.13_2…
+ ## $ version "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", …
+ ## $ ensemble_n 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10…
+ ## $ location "PSP10.11", "PSP10.33", "PSP12.01", "PSP12.03", "P…
+ ## $ date 2024-05-06, 2024-05-06, 2024-05-08, 2024-05-08, 2…
+ ## $ name "Ogunquit River", "Spurwink River", "Basin Pt.", "…
+ ## $ lat 43.25030, 43.56632, 43.73848, 43.73064, 43.79553, …
+ ## $ lon -70.59540, -70.27305, -70.04343, -70.02556, -69.94…
+ ## $ class_bins "0,10,30,80", "0,10,30,80", "0,10,30,80", "0,10,30…
+ ## $ forecast_start_date 2024-05-10, 2024-05-10, 2024-05-12, 2024-05-12, 2…
+ ## $ forecast_end_date 2024-05-16, 2024-05-16, 2024-05-18, 2024-05-18, 2…
+ ## $ p_0 93, 100, 100, 99, 31, 3, 95, 94, 95, 95, 100, 99, …
+ ## $ p_1 6, 0, 0, 1, 44, 13, 4, 5, 4, 5, 0, 1, 0, 42, 9, 40…
+ ## $ p_2 1, 0, 0, 0, 18, 43, 0, 1, 0, 0, 0, 0, 0, 2, 0, 17,…
+ ## $ p_3 0, 0, 0, 0, 7, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 3…
+ ## $ p3_sd 2.537746e-02, 1.702311e-04, 5.835063e-07, 3.170006…
+ ## $ p_3_min 2.803591e-02, 1.613240e-06, 4.298889e-09, 3.494154…
+ ## $ p_3_max 1.114067e-01, 5.424280e-04, 1.839769e-06, 9.452227…
+ ## $ predicted_class 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,…
+ ## $ f_id "PSP10.11_2024-05-06", "PSP10.33_2024-05-06", "PSP…
## 2024 Season Results
@@ -230,4 +230,4 @@ predictions <- read_forecast(year = "2023")
### Last Updated
- ## [1] "2024-08-30"
+ ## [1] "2024-09-10"
diff --git a/README_files/figure-gfm/unnamed-chunk-14-1.png b/README_files/figure-gfm/unnamed-chunk-14-1.png
index 1b14b60..b14468d 100644
Binary files a/README_files/figure-gfm/unnamed-chunk-14-1.png and b/README_files/figure-gfm/unnamed-chunk-14-1.png differ
diff --git a/README_files/figure-gfm/unnamed-chunk-19-1.png b/README_files/figure-gfm/unnamed-chunk-19-1.png
index 77d8d69..a66d56b 100644
Binary files a/README_files/figure-gfm/unnamed-chunk-19-1.png and b/README_files/figure-gfm/unnamed-chunk-19-1.png differ
diff --git a/README_files/figure-gfm/unnamed-chunk-20-1.png b/README_files/figure-gfm/unnamed-chunk-20-1.png
index 3cce599..e1c6a46 100644
Binary files a/README_files/figure-gfm/unnamed-chunk-20-1.png and b/README_files/figure-gfm/unnamed-chunk-20-1.png differ
diff --git a/inst/forecastdb/seasonal_results/psp_forecast_results_2024.csv.gz b/inst/forecastdb/seasonal_results/psp_forecast_results_2024.csv.gz
index 905af0d..f64b4ae 100644
Binary files a/inst/forecastdb/seasonal_results/psp_forecast_results_2024.csv.gz and b/inst/forecastdb/seasonal_results/psp_forecast_results_2024.csv.gz differ
diff --git a/inst/scripts/confusion_matrix_allyears.R b/inst/manuscript/confusion_matrix_allyears.R
similarity index 79%
rename from inst/scripts/confusion_matrix_allyears.R
rename to inst/manuscript/confusion_matrix_allyears.R
index e5a482c..4e177a4 100644
--- a/inst/scripts/confusion_matrix_allyears.R
+++ b/inst/manuscript/confusion_matrix_allyears.R
@@ -10,11 +10,13 @@ num_levels <- 4
levels <- seq(from=0, to=(num_levels-1))
-cm <- as.data.frame(table(predicted = factor(pred_w_results$predicted_class, levels), actual = factor(pred_w_results$class, levels), year=factor(pred_w_results$year, levels=2021:2023))) |>
+cm <- as.data.frame(table(predicted = factor(pred_w_results$predicted_class, levels),
+ actual = factor(pred_w_results$class, levels),
+ year=factor(pred_w_results$year, levels=2021:2024))) |>
dplyr::mutate(frac = round(Freq/sum(Freq)*100)) |>
dplyr::mutate(frac = sapply(.data$frac, function(x) if (x == "0") {x = "<1"} else {x}))
-ggplot2::ggplot(data = cm, ggplot2::aes(x = .data$predicted, y = .data$actual)) +
+plot1 <- ggplot2::ggplot(data = cm, ggplot2::aes(x=.data$predicted, y=.data$actual)) +
ggplot2::geom_tile(ggplot2::aes(fill = log(.data$Freq+1))) +
ggplot2::geom_text(ggplot2::aes(label = sprintf("%1.0f", .data$Freq)), size=8) +
ggplot2::facet_grid(cols=vars(.data$year)) +
@@ -31,4 +33,6 @@ ggplot2::ggplot(data = cm, ggplot2::aes(x = .data$predicted, y = .data$actual))
ggplot2::geom_rect(aes(xmin=0.5, xmax=3.5, ymin=0.5, ymax=3.5), alpha=0) +
ggplot2::geom_rect(aes(xmin=3.5, xmax=4.5, ymin=3.5, ymax=4.5), alpha=0)
+ggsave(filename = "inst/manuscript/cm_allyears.jpeg", plot=plot1, width=12, height=8)
+
diff --git a/inst/manuscript/scatter_allyears.R b/inst/manuscript/scatter_allyears.R
new file mode 100644
index 0000000..37623cf
--- /dev/null
+++ b/inst/manuscript/scatter_allyears.R
@@ -0,0 +1,19 @@
+
+library(pspforecast)
+library(pspdata)
+library(ggplot2)
+
+pred_w_results <- read_all_results()
+
+
+plot2 <- ggplot2::ggplot(data = pred_w_results, ggplot2::aes(x=.data$p_3, y=.data$toxicity, colour = correct)) +
+ ggplot2::geom_point(alpha=0.7, size=3) +
+ ggplot2::facet_grid(cols=vars(.data$year)) +
+ ggplot2::labs(x = "Predicted Probability of Closure-level Toxicity",
+ y = "Measured Toxicity") +
+ ggplot2::geom_hline(yintercept=80, linetype="dashed") +
+ ggplot2::theme_bw()
+
+plot2
+
+ggsave(filename = "inst/manuscript/scatter_allyears.jpeg", plot=plot2, width=12, height=9)
diff --git a/inst/scripts/station_metrics.R b/inst/manuscript/station_metrics.R
similarity index 100%
rename from inst/scripts/station_metrics.R
rename to inst/manuscript/station_metrics.R
diff --git a/inst/scripts/get_results.R b/inst/scripts/get_results.R
index b44e254..cfc067f 100644
--- a/inst/scripts/get_results.R
+++ b/inst/scripts/get_results.R
@@ -42,3 +42,14 @@ summary(xx)
xx |>
write_csv("inst/forecastdb/seasonal_results/psp_forecast_results_2023.csv.gz")
+
+## 2024
+
+predictions24 <- read_forecast(year=2024)
+xx <- add_forecast_results(predictions24, toxin_measurements = psp)
+
+summary(xx)
+
+xx |>
+ write_csv("inst/forecastdb/seasonal_results/psp_forecast_results_2024.csv.gz")
+