diff --git a/README.Rmd b/README.Rmd index 758d191..0386d77 100644 --- a/README.Rmd +++ b/README.Rmd @@ -1,7 +1,6 @@ --- title: "pspforecast" output: github_document -always_allow_html: true --- @@ -29,14 +28,16 @@ remotes::install_github("BigelowLab/pspforecast") ``` ```{r message=FALSE, warning=FALSE, echo=FALSE} -library(dplyr) -library(ggplot2) -library(plotly) - -library(pspdata) -library(pspforecast) - -library(patchwork) +suppressPackageStartupMessages({ + library(dplyr) + library(ggplot2) + library(plotly) + + library(pspdata) + library(pspforecast) + + library(patchwork) +}) ``` ## Reading the forecast database diff --git a/README.html b/README.html deleted file mode 100644 index fb48e91..0000000 --- a/README.html +++ /dev/null @@ -1,771 +0,0 @@ - - - - - - - - - - - - - - - - - - - -

pspforecast

-

Shellfish toxicity forecast serving package

-

Requirements

- -

Installation

-
remotes::install_github("BigelowLab/pspforecast")
-

Reading the forecast -database

-

Variables:

- -
predictions <- read_forecast(year = "2024") |>
-  distinct()
-
-glimpse(predictions)
-
## Rows: 464
-## Columns: 19
-## $ version             <chr> "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0…
-## $ ensemble_n          <dbl> 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,…
-## $ location            <chr> "PSP10.11", "PSP10.33", "PSP12.01", "PSP12.03", "PSP12.13", "PSP12.28", "PSP12.34", "PSP15.25", "PSP16.…
-## $ date                <date> 2024-05-06, 2024-05-06, 2024-05-08, 2024-05-08, 2024-05-08, 2024-05-06, 2024-05-06, 2024-05-06, 2024-0…
-## $ name                <chr> "Ogunquit River", "Spurwink River", "Basin Pt.", "Potts Pt.", "Lumbos Hole", "Bear Island", "Head Beach…
-## $ lat                 <dbl> 43.25030, 43.56632, 43.73848, 43.73064, 43.79553, 43.78556, 43.71711, 43.84476, 43.92526, 44.15419, 44.…
-## $ lon                 <dbl> -70.59540, -70.27305, -70.04343, -70.02556, -69.94557, -69.87415, -69.84999, -69.55365, -69.25900, -68.…
-## $ class_bins          <chr> "0,10,30,80", "0,10,30,80", "0,10,30,80", "0,10,30,80", "0,10,30,80", "0,10,30,80", "0,10,30,80", "0,10…
-## $ forecast_start_date <date> 2024-05-10, 2024-05-10, 2024-05-12, 2024-05-12, 2024-05-12, 2024-05-10, 2024-05-10, 2024-05-10, 2024-0…
-## $ forecast_end_date   <date> 2024-05-16, 2024-05-16, 2024-05-18, 2024-05-18, 2024-05-18, 2024-05-16, 2024-05-16, 2024-05-16, 2024-0…
-## $ p_0                 <dbl> 93, 100, 100, 99, 31, 3, 95, 94, 95, 95, 100, 99, 100, 55, 91, 38, 39, 53, 98, 91, 69, 57, 37, 2, 93, 9…
-## $ p_1                 <dbl> 6, 0, 0, 1, 44, 13, 4, 5, 4, 5, 0, 1, 0, 42, 9, 40, 46, 37, 2, 9, 26, 32, 36, 10, 6, 3, 28, 2, 0, 1, 2,…
-## $ p_2                 <dbl> 1, 0, 0, 0, 18, 43, 0, 1, 0, 0, 0, 0, 0, 2, 0, 17, 12, 8, 0, 0, 4, 9, 21, 39, 0, 0, 5, 0, 0, 0, 0, 27, …
-## $ p_3                 <dbl> 0, 0, 0, 0, 7, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 3, 2, 0, 0, 1, 2, 6, 50, 0, 0, 1, 0, 0, 0, 0, 2, 21, 5…
-## $ p3_sd               <dbl> 2.537746e-02, 1.702311e-04, 5.835063e-07, 3.170006e-04, 2.573652e+00, 1.032039e+01, 6.801030e-03, 1.598…
-## $ p_3_min             <dbl> 2.803591e-02, 1.613240e-06, 4.298889e-09, 3.494154e-05, 3.757856e+00, 2.148448e+01, 6.643038e-03, 1.782…
-## $ p_3_max             <dbl> 1.114067e-01, 5.424280e-04, 1.839769e-06, 9.452227e-04, 1.157185e+01, 5.338209e+01, 3.128168e-02, 7.217…
-## $ predicted_class     <dbl> 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 0…
-## $ f_id                <chr> "PSP10.11_2024-05-06", "PSP10.33_2024-05-06", "PSP12.01_2024-05-08", "PSP12.03_2024-05-08", "PSP12.13_2…
-

2024 Season Results

-

-

-

Metrics

-

Season Accuracy:

-
## # A tibble: 1 × 1
-##   accuracy
-##      <dbl>
-## 1    0.717
-

Closure-level (Class 3) -Predictions

- - - -
## # A tibble: 1 × 8
-##      tp    fp    tn    fn cl_accuracy precision sensitivity specificity
-##   <int> <int> <int> <int>       <dbl>     <dbl>       <dbl>       <dbl>
-## 1     2     4   397     7       0.973     0.333       0.222       0.990
-

2023 Season Results

-
predictions <- read_forecast(year = "2023")
-

Confusion Matrix

-

-

Probability -of Closure-level Toxicity vs Measured Toxicity

-

-

Metrics

-

Season Accuracy:

-
## # A tibble: 1 × 1
-##   accuracy
-##      <dbl>
-## 1    0.993
-

Closure-level (Class 3) -Predictions

-
## # A tibble: 1 × 8
-##      tp    fp    tn    fn cl_accuracy precision sensitivity specificity
-##   <int> <int> <int> <int>       <dbl>     <dbl>       <dbl>       <dbl>
-## 1     0     0   554     0           1       NaN         NaN           1
-

2022 Season Results

-

Confusion Matrix

-

-

Probability -of Closure-level Toxicity vs Measured Toxicity

-

-

Metrics

-

Season Accuracy:

-
## # A tibble: 1 × 1
-##   accuracy
-##      <dbl>
-## 1    0.799
-

Closure-level (Class 3) -Predictions

-
## # A tibble: 1 × 8
-##      tp    fp    tn    fn cl_accuracy precision sensitivity specificity
-##   <int> <int> <int> <int>       <dbl>     <dbl>       <dbl>       <dbl>
-## 1    16    20   603    12       0.951     0.444       0.571       0.968
-

Timing of initial -closure-level predictions

-

-

2021 Season Results

-

Confusion Matrix

-

-

Probability -of Closure-level Toxicity vs Measured Toxicity

-

-

Metrics

-

Season Accuracy:

-
## # A tibble: 1 × 1
-##   accuracy
-##      <dbl>
-## 1    0.938
-

Closure-level (Class 3) -Predictions

-
## # A tibble: 1 × 8
-##      tp    fp    tn    fn cl_accuracy precision sensitivity specificity
-##   <int> <int> <int> <int>       <dbl>     <dbl>       <dbl>       <dbl>
-## 1     2     3   463     0       0.994       0.4           1       0.994
-

Closure-level accuracy

-

Timing of initial -closure-level predictions

-

-

Possible manuscript plot(s)

-

-

-

Last Updated

-
## [1] "2024-08-30"
- - - diff --git a/README.md b/README.md index 44ee083..32bec06 100644 --- a/README.md +++ b/README.md @@ -74,25 +74,25 @@ glimpse(predictions) ## Rows: 464 ## Columns: 19 - ## $ version  "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0… - ## $ ensemble_n  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,… - ## $ location  "PSP10.11", "PSP10.33", "PSP12.01", "PSP12.03", "PSP12.13", "PSP12.28", "PSP12.34", "PSP15.25", "PSP16.… - ## $ date  2024-05-06, 2024-05-06, 2024-05-08, 2024-05-08, 2024-05-08, 2024-05-06, 2024-05-06, 2024-05-06, 2024-0… - ## $ name  "Ogunquit River", "Spurwink River", "Basin Pt.", "Potts Pt.", "Lumbos Hole", "Bear Island", "Head Beach… - ## $ lat  43.25030, 43.56632, 43.73848, 43.73064, 43.79553, 43.78556, 43.71711, 43.84476, 43.92526, 44.15419, 44.… - ## $ lon  -70.59540, -70.27305, -70.04343, -70.02556, -69.94557, -69.87415, -69.84999, -69.55365, -69.25900, -68.… - ## $ class_bins  "0,10,30,80", "0,10,30,80", "0,10,30,80", "0,10,30,80", "0,10,30,80", "0,10,30,80", "0,10,30,80", "0,10… - ## $ forecast_start_date  2024-05-10, 2024-05-10, 2024-05-12, 2024-05-12, 2024-05-12, 2024-05-10, 2024-05-10, 2024-05-10, 2024-0… - ## $ forecast_end_date  2024-05-16, 2024-05-16, 2024-05-18, 2024-05-18, 2024-05-18, 2024-05-16, 2024-05-16, 2024-05-16, 2024-0… - ## $ p_0  93, 100, 100, 99, 31, 3, 95, 94, 95, 95, 100, 99, 100, 55, 91, 38, 39, 53, 98, 91, 69, 57, 37, 2, 93, 9… - ## $ p_1  6, 0, 0, 1, 44, 13, 4, 5, 4, 5, 0, 1, 0, 42, 9, 40, 46, 37, 2, 9, 26, 32, 36, 10, 6, 3, 28, 2, 0, 1, 2,… - ## $ p_2  1, 0, 0, 0, 18, 43, 0, 1, 0, 0, 0, 0, 0, 2, 0, 17, 12, 8, 0, 0, 4, 9, 21, 39, 0, 0, 5, 0, 0, 0, 0, 27, … - ## $ p_3  0, 0, 0, 0, 7, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 3, 2, 0, 0, 1, 2, 6, 50, 0, 0, 1, 0, 0, 0, 0, 2, 21, 5… - ## $ p3_sd  2.537746e-02, 1.702311e-04, 5.835063e-07, 3.170006e-04, 2.573652e+00, 1.032039e+01, 6.801030e-03, 1.598… - ## $ p_3_min  2.803591e-02, 1.613240e-06, 4.298889e-09, 3.494154e-05, 3.757856e+00, 2.148448e+01, 6.643038e-03, 1.782… - ## $ p_3_max  1.114067e-01, 5.424280e-04, 1.839769e-06, 9.452227e-04, 1.157185e+01, 5.338209e+01, 3.128168e-02, 7.217… - ## $ predicted_class  0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 0… - ## $ f_id  "PSP10.11_2024-05-06", "PSP10.33_2024-05-06", "PSP12.01_2024-05-08", "PSP12.03_2024-05-08", "PSP12.13_2… + ## $ version "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", "v0.3.0", … + ## $ ensemble_n 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10… + ## $ location "PSP10.11", "PSP10.33", "PSP12.01", "PSP12.03", "P… + ## $ date 2024-05-06, 2024-05-06, 2024-05-08, 2024-05-08, 2… + ## $ name "Ogunquit River", "Spurwink River", "Basin Pt.", "… + ## $ lat 43.25030, 43.56632, 43.73848, 43.73064, 43.79553, … + ## $ lon -70.59540, -70.27305, -70.04343, -70.02556, -69.94… + ## $ class_bins "0,10,30,80", "0,10,30,80", "0,10,30,80", "0,10,30… + ## $ forecast_start_date 2024-05-10, 2024-05-10, 2024-05-12, 2024-05-12, 2… + ## $ forecast_end_date 2024-05-16, 2024-05-16, 2024-05-18, 2024-05-18, 2… + ## $ p_0 93, 100, 100, 99, 31, 3, 95, 94, 95, 95, 100, 99, … + ## $ p_1 6, 0, 0, 1, 44, 13, 4, 5, 4, 5, 0, 1, 0, 42, 9, 40… + ## $ p_2 1, 0, 0, 0, 18, 43, 0, 1, 0, 0, 0, 0, 0, 2, 0, 17,… + ## $ p_3 0, 0, 0, 0, 7, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 3… + ## $ p3_sd 2.537746e-02, 1.702311e-04, 5.835063e-07, 3.170006… + ## $ p_3_min 2.803591e-02, 1.613240e-06, 4.298889e-09, 3.494154… + ## $ p_3_max 1.114067e-01, 5.424280e-04, 1.839769e-06, 9.452227… + ## $ predicted_class 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,… + ## $ f_id "PSP10.11_2024-05-06", "PSP10.33_2024-05-06", "PSP… ## 2024 Season Results @@ -230,4 +230,4 @@ predictions <- read_forecast(year = "2023") ### Last Updated - ## [1] "2024-08-30" + ## [1] "2024-09-10" diff --git a/README_files/figure-gfm/unnamed-chunk-14-1.png b/README_files/figure-gfm/unnamed-chunk-14-1.png index 1b14b60..b14468d 100644 Binary files a/README_files/figure-gfm/unnamed-chunk-14-1.png and b/README_files/figure-gfm/unnamed-chunk-14-1.png differ diff --git a/README_files/figure-gfm/unnamed-chunk-19-1.png b/README_files/figure-gfm/unnamed-chunk-19-1.png index 77d8d69..a66d56b 100644 Binary files a/README_files/figure-gfm/unnamed-chunk-19-1.png and b/README_files/figure-gfm/unnamed-chunk-19-1.png differ diff --git a/README_files/figure-gfm/unnamed-chunk-20-1.png b/README_files/figure-gfm/unnamed-chunk-20-1.png index 3cce599..e1c6a46 100644 Binary files a/README_files/figure-gfm/unnamed-chunk-20-1.png and b/README_files/figure-gfm/unnamed-chunk-20-1.png differ diff --git a/inst/forecastdb/seasonal_results/psp_forecast_results_2024.csv.gz b/inst/forecastdb/seasonal_results/psp_forecast_results_2024.csv.gz index 905af0d..f64b4ae 100644 Binary files a/inst/forecastdb/seasonal_results/psp_forecast_results_2024.csv.gz and b/inst/forecastdb/seasonal_results/psp_forecast_results_2024.csv.gz differ diff --git a/inst/scripts/confusion_matrix_allyears.R b/inst/manuscript/confusion_matrix_allyears.R similarity index 79% rename from inst/scripts/confusion_matrix_allyears.R rename to inst/manuscript/confusion_matrix_allyears.R index e5a482c..4e177a4 100644 --- a/inst/scripts/confusion_matrix_allyears.R +++ b/inst/manuscript/confusion_matrix_allyears.R @@ -10,11 +10,13 @@ num_levels <- 4 levels <- seq(from=0, to=(num_levels-1)) -cm <- as.data.frame(table(predicted = factor(pred_w_results$predicted_class, levels), actual = factor(pred_w_results$class, levels), year=factor(pred_w_results$year, levels=2021:2023))) |> +cm <- as.data.frame(table(predicted = factor(pred_w_results$predicted_class, levels), + actual = factor(pred_w_results$class, levels), + year=factor(pred_w_results$year, levels=2021:2024))) |> dplyr::mutate(frac = round(Freq/sum(Freq)*100)) |> dplyr::mutate(frac = sapply(.data$frac, function(x) if (x == "0") {x = "<1"} else {x})) -ggplot2::ggplot(data = cm, ggplot2::aes(x = .data$predicted, y = .data$actual)) + +plot1 <- ggplot2::ggplot(data = cm, ggplot2::aes(x=.data$predicted, y=.data$actual)) + ggplot2::geom_tile(ggplot2::aes(fill = log(.data$Freq+1))) + ggplot2::geom_text(ggplot2::aes(label = sprintf("%1.0f", .data$Freq)), size=8) + ggplot2::facet_grid(cols=vars(.data$year)) + @@ -31,4 +33,6 @@ ggplot2::ggplot(data = cm, ggplot2::aes(x = .data$predicted, y = .data$actual)) ggplot2::geom_rect(aes(xmin=0.5, xmax=3.5, ymin=0.5, ymax=3.5), alpha=0) + ggplot2::geom_rect(aes(xmin=3.5, xmax=4.5, ymin=3.5, ymax=4.5), alpha=0) +ggsave(filename = "inst/manuscript/cm_allyears.jpeg", plot=plot1, width=12, height=8) + diff --git a/inst/manuscript/scatter_allyears.R b/inst/manuscript/scatter_allyears.R new file mode 100644 index 0000000..37623cf --- /dev/null +++ b/inst/manuscript/scatter_allyears.R @@ -0,0 +1,19 @@ + +library(pspforecast) +library(pspdata) +library(ggplot2) + +pred_w_results <- read_all_results() + + +plot2 <- ggplot2::ggplot(data = pred_w_results, ggplot2::aes(x=.data$p_3, y=.data$toxicity, colour = correct)) + + ggplot2::geom_point(alpha=0.7, size=3) + + ggplot2::facet_grid(cols=vars(.data$year)) + + ggplot2::labs(x = "Predicted Probability of Closure-level Toxicity", + y = "Measured Toxicity") + + ggplot2::geom_hline(yintercept=80, linetype="dashed") + + ggplot2::theme_bw() + +plot2 + +ggsave(filename = "inst/manuscript/scatter_allyears.jpeg", plot=plot2, width=12, height=9) diff --git a/inst/scripts/station_metrics.R b/inst/manuscript/station_metrics.R similarity index 100% rename from inst/scripts/station_metrics.R rename to inst/manuscript/station_metrics.R diff --git a/inst/scripts/get_results.R b/inst/scripts/get_results.R index b44e254..cfc067f 100644 --- a/inst/scripts/get_results.R +++ b/inst/scripts/get_results.R @@ -42,3 +42,14 @@ summary(xx) xx |> write_csv("inst/forecastdb/seasonal_results/psp_forecast_results_2023.csv.gz") + +## 2024 + +predictions24 <- read_forecast(year=2024) +xx <- add_forecast_results(predictions24, toxin_measurements = psp) + +summary(xx) + +xx |> + write_csv("inst/forecastdb/seasonal_results/psp_forecast_results_2024.csv.gz") +