From d007f8c53774bee0a249fa9ba6acfa2254f429bc Mon Sep 17 00:00:00 2001 From: gwerbin Date: Thu, 25 Sep 2014 16:10:19 -0400 Subject: [PATCH 1/4] blank homework file --- hw1/_posts/2014-09-18-HW1-Greg-Werbin.Rmd | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 hw1/_posts/2014-09-18-HW1-Greg-Werbin.Rmd diff --git a/hw1/_posts/2014-09-18-HW1-Greg-Werbin.Rmd b/hw1/_posts/2014-09-18-HW1-Greg-Werbin.Rmd new file mode 100644 index 0000000..a71c6e6 --- /dev/null +++ b/hw1/_posts/2014-09-18-HW1-Greg-Werbin.Rmd @@ -0,0 +1,9 @@ +--- +title: "Homework 1" +author: "Greg Werbin" +output: html_document +published: false +tags: hw1 +--- + + From 10cdb02d270470b188207c90fff2f747ecf02508 Mon Sep 17 00:00:00 2001 From: gwerbin Date: Fri, 26 Sep 2014 10:33:34 -0400 Subject: [PATCH 2/4] intro done --- hw1/_posts/2014-09-18-HW1-Greg-Werbin.Rmd | 25 +++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/hw1/_posts/2014-09-18-HW1-Greg-Werbin.Rmd b/hw1/_posts/2014-09-18-HW1-Greg-Werbin.Rmd index a71c6e6..2e5e314 100644 --- a/hw1/_posts/2014-09-18-HW1-Greg-Werbin.Rmd +++ b/hw1/_posts/2014-09-18-HW1-Greg-Werbin.Rmd @@ -6,4 +6,29 @@ published: false tags: hw1 --- +## Introduction +This data comes from the National Longitudinal Survey of Freshmen (NLSF). I intend to use it for my master's thesis, and I am currently analyzing it for a preliminary modeling project. + +The NLSF is a longitudinal survey of students entering college for the first time in the fall of 1999, at one of 28 selective institutions. They were surveyed once that fall, and then once each spring for the next four years. In this homework, I analyze some summary statistics describing students' self-reported time use, GPAs, and majors over the four-year survey period. The sample here is limited to students who stayed at the same institution for exactly four years. That is, students who dropped out, transferred, or graduated early are not represented here, although they were followed and interviewed by the NLSF and make up a substantial portion of the original sample. + +Some preliminary code: + +```{r, warning = FALSE, message = FALSE} +library(ggplot2) +library(reshape2) + +load("~/Class/Causal Methods/causal methods paper/data/design.RData") + +gpa <- design[, grep("gpa$", names(design))] +major <- design[, grep("w\\d_(?!(gpa|wd))", names(design), perl = TRUE)] +time_use <- design[, grep("_wd_", names(design))] +``` + +## GPA + +```{r} +gpa.means <- data.frame(mean_gpa = colMeans(gpa), wave = 1:5) + +ggplot(gpa.means) + geom_point(aes(wave, mean_gpa)) + geom_line(aes(wave, mean_gpa)) +``` From 313f708910833f8a655f1926279fb4f4f17ada29 Mon Sep 17 00:00:00 2001 From: gwerbin Date: Fri, 26 Sep 2014 16:06:02 -0400 Subject: [PATCH 3/4] added GPA missingness plot, but need to add by-semester details --- hw1/_posts/2014-09-18-HW1-Greg-Werbin.Rmd | 40 +++++++++++++++++++---- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/hw1/_posts/2014-09-18-HW1-Greg-Werbin.Rmd b/hw1/_posts/2014-09-18-HW1-Greg-Werbin.Rmd index 2e5e314..4e2e5b7 100644 --- a/hw1/_posts/2014-09-18-HW1-Greg-Werbin.Rmd +++ b/hw1/_posts/2014-09-18-HW1-Greg-Werbin.Rmd @@ -17,18 +17,46 @@ Some preliminary code: ```{r, warning = FALSE, message = FALSE} library(ggplot2) library(reshape2) +library(dplyr) load("~/Class/Causal Methods/causal methods paper/data/design.RData") -gpa <- design[, grep("gpa$", names(design))] -major <- design[, grep("w\\d_(?!(gpa|wd))", names(design), perl = TRUE)] -time_use <- design[, grep("_wd_", names(design))] +gpa <- design[, c("caseid", grep("gpa$", names(design), value = TRUE))] +gpa$cumulative <- rowMeans(gpa[, -1]) +major <- design[, c("caseid", grep("w\\d_(?!(gpa|wd))", names(design), perl = TRUE, value = TRUE))] +time_use <- design[, c("caseid", grep("_wd_", names(design), value = TRUE))] ``` ## GPA ```{r} -gpa.means <- data.frame(mean_gpa = colMeans(gpa), wave = 1:5) - -ggplot(gpa.means) + geom_point(aes(wave, mean_gpa)) + geom_line(aes(wave, mean_gpa)) +load("~/Class/Causal Methods/causal methods paper/data/gpa_number_NA_matrix.RData") +gpa.melted <- melt(gpa, id.vars = "caseid", variable.name = "wave", value.name = "GPA") +gpa.melted$wave <- as.numeric(gsub("\\D", "", gpa.melted$wave)) + +ncourses <- c(20, 30, 22, 18, 6) + +gpa_data <- merge( + merge( + gpa.melted, + melt(as.data.frame(cbind(caseid = nlsf$caseid, gpa_number_NA_matrix)), + id.vars = "caseid", variable.name = "wave", value.name = "Number missing") + ), + melt(as.data.frame(cbind(caseid = nlsf$caseid, t(apply(gpa_number_NA_matrix, 1, `/`, ncourses)))), + id.vars = "caseid", variable.name = "wave", value.name = "Percent reported") + ) +gpa_data.melted <- melt(gpa_data, id.vars = c("caseid", "wave")) + +ggplot(gpa_data.melted) + xlab("Wave") + ylab("") + + geom_boxplot(aes(x = as.factor(wave), y = value)) + +# geom_line( +# aes(x = wave, y = avg), +# summarize(group_by(gpa_data.melted, wave, variable), avg = mean(value)) +# ) + + facet_grid(variable ~ ., scales = "free") + +din <- par("din") / 2 +ggsave("~/class/data viz/tmp.png", + width = din[1], height = din[2], scale = 2) +system('open "/Users/hotdog2/class/data viz/tmp.png"') ``` From 740153cb2e36f85856db923f78fb2e7e371b19e4 Mon Sep 17 00:00:00 2001 From: Greg Werbin Date: Tue, 4 Nov 2014 08:40:20 -0500 Subject: [PATCH 4/4] knitted html --- hw1/_posts/2014-09-18-HW1-Greg-Werbin.Rmd | 15 +-- hw1/_posts/2014-09-18-HW1-Greg-Werbin.html | 133 +++++++++++++++++++++ 2 files changed, 141 insertions(+), 7 deletions(-) create mode 100644 hw1/_posts/2014-09-18-HW1-Greg-Werbin.html diff --git a/hw1/_posts/2014-09-18-HW1-Greg-Werbin.Rmd b/hw1/_posts/2014-09-18-HW1-Greg-Werbin.Rmd index 4e2e5b7..30c1580 100644 --- a/hw1/_posts/2014-09-18-HW1-Greg-Werbin.Rmd +++ b/hw1/_posts/2014-09-18-HW1-Greg-Werbin.Rmd @@ -20,6 +20,8 @@ library(reshape2) library(dplyr) load("~/Class/Causal Methods/causal methods paper/data/design.RData") +load("~/Class/Causal Methods/causal methods paper/data/gpa_number_NA_matrix.RData") +load("~/Class/Causal Methods/causal methods paper/data/caseid.RData") gpa <- design[, c("caseid", grep("gpa$", names(design), value = TRUE))] gpa$cumulative <- rowMeans(gpa[, -1]) @@ -30,7 +32,6 @@ time_use <- design[, c("caseid", grep("_wd_", names(design), value = TRUE))] ## GPA ```{r} -load("~/Class/Causal Methods/causal methods paper/data/gpa_number_NA_matrix.RData") gpa.melted <- melt(gpa, id.vars = "caseid", variable.name = "wave", value.name = "GPA") gpa.melted$wave <- as.numeric(gsub("\\D", "", gpa.melted$wave)) @@ -39,10 +40,10 @@ ncourses <- c(20, 30, 22, 18, 6) gpa_data <- merge( merge( gpa.melted, - melt(as.data.frame(cbind(caseid = nlsf$caseid, gpa_number_NA_matrix)), + melt(as.data.frame(cbind(caseid = caseid, gpa_number_NA_matrix)), id.vars = "caseid", variable.name = "wave", value.name = "Number missing") ), - melt(as.data.frame(cbind(caseid = nlsf$caseid, t(apply(gpa_number_NA_matrix, 1, `/`, ncourses)))), + melt(as.data.frame(cbind(caseid = caseid, t(apply(gpa_number_NA_matrix, 1, `/`, ncourses)))), id.vars = "caseid", variable.name = "wave", value.name = "Percent reported") ) gpa_data.melted <- melt(gpa_data, id.vars = c("caseid", "wave")) @@ -55,8 +56,8 @@ ggplot(gpa_data.melted) + xlab("Wave") + ylab("") + # ) + facet_grid(variable ~ ., scales = "free") -din <- par("din") / 2 -ggsave("~/class/data viz/tmp.png", - width = din[1], height = din[2], scale = 2) -system('open "/Users/hotdog2/class/data viz/tmp.png"') +# din <- par("din") / 2 +# ggsave("~/class/data viz/tmp.png", +# width = din[1], height = din[2], scale = 2) +# system('open "/Users/hotdog2/class/data viz/tmp.png"') ``` diff --git a/hw1/_posts/2014-09-18-HW1-Greg-Werbin.html b/hw1/_posts/2014-09-18-HW1-Greg-Werbin.html new file mode 100644 index 0000000..5d45cb0 --- /dev/null +++ b/hw1/_posts/2014-09-18-HW1-Greg-Werbin.html @@ -0,0 +1,133 @@ + + + + + + + + + + + + + +Homework 1 + + + + + + + + + + + + + + + + + + + + +
+ + + + + +
+

Introduction

+

This data comes from the National Longitudinal Survey of Freshmen (NLSF). I intend to use it for my master’s thesis, and I am currently analyzing it for a preliminary modeling project.

+

The NLSF is a longitudinal survey of students entering college for the first time in the fall of 1999, at one of 28 selective institutions. They were surveyed once that fall, and then once each spring for the next four years. In this homework, I analyze some summary statistics describing students’ self-reported time use, GPAs, and majors over the four-year survey period. The sample here is limited to students who stayed at the same institution for exactly four years. That is, students who dropped out, transferred, or graduated early are not represented here, although they were followed and interviewed by the NLSF and make up a substantial portion of the original sample.

+

Some preliminary code:

+
library(ggplot2)
+library(reshape2)
+library(dplyr)
+
+load("~/Class/Causal Methods/causal methods paper/data/design.RData")
+load("~/Class/Causal Methods/causal methods paper/data/gpa_number_NA_matrix.RData")
+load("~/Class/Causal Methods/causal methods paper/data/caseid.RData")
+
+gpa <- design[, c("caseid", grep("gpa$", names(design), value = TRUE))]
+gpa$cumulative <- rowMeans(gpa[, -1])
+major <- design[, c("caseid", grep("w\\d_(?!(gpa|wd))", names(design), perl = TRUE, value = TRUE))]
+time_use <- design[, c("caseid", grep("_wd_", names(design), value = TRUE))]
+
+
+

GPA

+
gpa.melted <- melt(gpa, id.vars = "caseid", variable.name = "wave", value.name = "GPA")
+gpa.melted$wave <- as.numeric(gsub("\\D", "", gpa.melted$wave))
+
+ncourses <- c(20, 30, 22, 18, 6)
+
+gpa_data <- merge(
+  merge(
+    gpa.melted,
+    melt(as.data.frame(cbind(caseid = caseid, gpa_number_NA_matrix)),
+         id.vars = "caseid", variable.name = "wave", value.name = "Number missing")
+    ),
+  melt(as.data.frame(cbind(caseid = caseid, t(apply(gpa_number_NA_matrix, 1, `/`, ncourses)))),
+       id.vars = "caseid", variable.name = "wave", value.name = "Percent reported")
+  )
+gpa_data.melted <- melt(gpa_data, id.vars = c("caseid", "wave"))
+
+ggplot(gpa_data.melted) + xlab("Wave") + ylab("") +
+  geom_boxplot(aes(x = as.factor(wave), y = value)) +
+#   geom_line(
+#     aes(x = wave, y = avg),
+#     summarize(group_by(gpa_data.melted, wave, variable), avg = mean(value))
+#     ) +
+  facet_grid(variable ~ ., scales = "free")
+

+
# din <- par("din") / 2
+# ggsave("~/class/data viz/tmp.png",
+#        width = din[1], height = din[2], scale = 2)
+# system('open "/Users/hotdog2/class/data viz/tmp.png"')
+
+ + +
+ + + + + + + +