Merge pull request #524 from jessesadler/plot-data

Create copy of plotting data
datacarpentry · Sep 26, 2024 · b85c073 · b85c073
2 parents 1e5d11b + fd246ae
commit b85c073
Show file tree

Hide file tree

Showing 4 changed files with 189 additions and 9 deletions.
diff --git a/episodes/04-tidyr.Rmd b/episodes/04-tidyr.Rmd
@@ -435,7 +435,7 @@ dataset where each of the columns includes only one data value. Now we can save
 this data frame to our `data_output` directory.
 
 ```{r, purl=FALSE, eval=FALSE}
-write_csv (interviews_plotting, file = "data_output/interviews_plotting.csv")
+write_csv(interviews_plotting, file = "data_output/interviews_plotting.csv")
 ```
 
 ```{r, purl=FALSE, eval=TRUE, echo=FALSE}

diff --git a/episodes/05-ggplot2.Rmd b/episodes/05-ggplot2.Rmd
@@ -11,9 +11,16 @@ source("data/download_data.R")
 
 :::: instructor
 
-- This lesson is a broad overview of ggplot2 and focuses on (1) getting familiar
-  with the layering system of ggplot2, (2) using the argument `group` in the
-  `aes()` function, (3) basic customization of the plots.
+- This episode is a broad overview of ggplot2 and focuses on (1) getting
+  familiar with the layering system of ggplot2, (2) using the argument `group`
+  in the `aes()` function, (3) basic customization of the plots.
+- The episode depends on data created in the Data Wrangling with tidyr
+  episode. If you did not get to or through all of the tidyr episode,
+  you can have the learners access the data by either downloading it or
+  quickly creating it using the tidyr code below. You will probably want to
+  copy the code into the Etherpad.
+- If you did skip the tidyr episode, you might want to go over the exporting
+  data section in that episode.
 
 ::::::::::::
 
@@ -50,10 +57,21 @@ interviews_plotting <- read_csv("data_output/interviews_plotting.csv")
 ```
 
 If you were unable to complete the previous lesson or did not save the data,
-then you can create it now.
+then you can create it now. Either download it using `read_csv()` (Option 1)
+or create it with the **dplyr** and **tidyr** code (Option 2).
+
+::: tab
+
+### Option 1: Download the data
 
 ```{r, purl=FALSE, eval=FALSE}
-## Not run, but can be used to load in data from previous lesson!
+interviews_plotting <- read_csv("https://raw.githubusercontent.com/datacarpentry/r-socialsci/main/episodes/data/interviews_plotting.csv")
+```
+
+### Option 2: Create the data
+
+```{r, purl=FALSE, eval=FALSE}
+## Can be used to load in data from previous lesson!
 interviews_plotting <- interviews %>%
   ## pivot wider by items_owned
   separate_rows(items_owned, sep = ";") %>%
@@ -74,6 +92,8 @@ interviews_plotting <- interviews %>%
   mutate(number_items = rowSums(select(., bicycle:car)))
 ```
 
+:::
+
 ## Plotting with **`ggplot2`**
 
 **`ggplot2`** is a plotting package that makes it simple to create complex plots

diff --git a/episodes/data/download_data.R b/episodes/data/download_data.R
@@ -8,16 +8,44 @@ if (!dir.exists("data"))
 if (! file.exists("data/SAFI_clean.csv")) {
   download.file("https://ndownloader.figshare.com/files/11492171",
                 "data/SAFI_clean.csv", mode = "wb")
-  
+
   # Clean data
   df <- read.csv("data/SAFI_clean.csv",
                  stringsAsFactors = FALSE)
-  
+
   # Remove white space
   df$respondent_wall_type <- trimws(df$respondent_wall_type, which = "both")
   # Replace duplicate ids
   df[[2, 1]] <- 2
   df[[53, 1]] <- 53
-  
+
   write.csv(df, "data/SAFI_clean.csv", row.names = FALSE)
 }
+
+# Plotting data -----------------------------------------------------------
+
+# Create plotting data for ggplot episode
+library(tidyr)
+library(dplyr)
+
+if (! file.exists("data/interviews_plotting.csv")) {
+  # Copy code from ggplot episode to create data
+  interviews_plotting <- df %>%
+    separate_rows(items_owned, sep = ";") %>%
+    replace_na(list(items_owned = "no_listed_items")) %>%
+    mutate(items_owned_logical = TRUE) %>%
+    pivot_wider(names_from = items_owned,
+                values_from = items_owned_logical,
+                values_fill = list(items_owned_logical = FALSE)) %>%
+    separate_rows(months_lack_food, sep = ";") %>%
+    mutate(months_lack_food_logical = TRUE) %>%
+    pivot_wider(names_from = months_lack_food,
+                values_from = months_lack_food_logical,
+                values_fill = list(months_lack_food_logical = FALSE)) %>%
+    mutate(number_months_lack_food = rowSums(select(., Jan:May))) %>%
+    mutate(number_items = rowSums(select(., bicycle:car)))
+
+  write.csv(interviews_plotting, "data/interviews_plotting.csv", row.names = FALSE)
+}
+
+