diff --git a/hw/2014-10-22-hw3-greg-werbin.html b/hw/2014-10-22-hw3-greg-werbin.html new file mode 100644 index 0000000..e8b1308 --- /dev/null +++ b/hw/2014-10-22-hw3-greg-werbin.html @@ -0,0 +1,422 @@ + + + + + + + + + + + + + +Homework 3 + + + + + + + + + + + + + + + + + + + + +
+ + + + + +
+

Part A

+

I’ll be comparing Question 5 of the 1999 and 2008 European Values Surveys – Great Britain, which has the same exact wording in both versions:

+
+

Please look carefully at the following list of voluntary organisations and activities and say … a) which, if any, do you belong to? (Code all mentioned under (a) as ‘1’) b) which, if any, are you currently doing unpaid voluntary work for? (Code all mentioned under (b) as ‘1’)

+
+

with options:

+
+
    +
  • Social welfare services for elderly, handicapped or deprived people
  • +
  • Religious or church organisations
  • +
  • Education, arts, music or cultural activities
  • +
  • Trade unions
  • +
  • Political parties or groups
  • +
  • Local community action on issues like poverty, employment, housing, racial equality
  • +
  • Third world development or human rights
  • +
  • Conservation, the environment, ecology, animal rights
  • +
  • Professional associations
  • +
  • Youth work (e.g. scouts, guides, youth clubs etc.)
  • +
  • Sports or recreation
  • +
  • Women’s groups
  • +
  • Peace movement
  • +
  • Voluntary organisations concerned with health
  • +
  • Other groups
  • +
  • None

    (Source: p. 5 of each Field Questionnaire)
  • +
+
+
+
+

Part B

+

I’m going to graph the log-base-10 proportion of respondents who answer “yes” to each question. There will be two panels, one for each question. In each panel, I’ll draw a horizontal dot chart with the 1999 an 2008 values plotted on the same row, distinguished by the plotting character. If it helps clarity, I’ll draw straight lines to connect points from the same year. The goal is to compare the popularity of volunteer activities between 1999 and 2008. I’m using a log scale because I’m expecting that some groups will be more popular than others.

+
+
+

Part C

+

I’ll use ggplot2 in R, so I’ll need a data.frame that looks like:

+ + + + + + + + + + + + + + + + + +
CategoryQuestionYearProportion
WelfareA19990.15
+
+
+

Part D

+
library(foreign)
+library(reshape2)
+library(memisc)
+
## Loading required package: lattice
+## Loading required package: MASS
+## 
+## Attaching package: 'memisc'
+## 
+## The following objects are masked from 'package:stats':
+## 
+##     contr.sum, contr.treatment, contrasts
+## 
+## The following object is masked from 'package:base':
+## 
+##     as.array
+
pdt <- data.table:::print.data.table
+
+d1999 <- read.dta("ZA3777_v3-0-1.dta")
+
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
+## else paste0(labels, : duplicated levels in factors are deprecated
+
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
+## else paste0(labels, : duplicated levels in factors are deprecated
+
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
+## else paste0(labels, : duplicated levels in factors are deprecated
+
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
+## else paste0(labels, : duplicated levels in factors are deprecated
+
d2008 <- read.dta("ZA4752_v1-0-0.dta")
+
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
+## else paste0(labels, : duplicated levels in factors are deprecated
+
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
+## else paste0(labels, : duplicated levels in factors are deprecated
+
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
+## else paste0(labels, : duplicated levels in factors are deprecated
+
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
+## else paste0(labels, : duplicated levels in factors are deprecated
+
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
+## else paste0(labels, : duplicated levels in factors are deprecated
+
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
+## else paste0(labels, : duplicated levels in factors are deprecated
+
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
+## else paste0(labels, : duplicated levels in factors are deprecated
+
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
+## else paste0(labels, : duplicated levels in factors are deprecated
+
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
+## else paste0(labels, : duplicated levels in factors are deprecated
+
## Warning in read.dta("ZA4752_v1-0-0.dta"): value labels ('GB15') for 'GB15'
+## are missing
+
## Checking out the structure of the file
+
+dim(d1999)
+
## [1] 1000  417
+
dim(d2008)
+
## [1] 1561  456
+
head(names(d1999), 20)
+
##  [1] "studyno"  "version"  "id_cocas" "caseno"   "intno"    "intno2gb"
+##  [7] "year"     "weight"   "v1"       "v2"       "v3"       "v4"      
+## [13] "v5"       "v6"       "v6a_gb"   "v7"       "v8"       "v9"      
+## [19] "v10"      "v11"
+
head(names(d2008), 20)
+
##  [1] "StudyNo"  "Version"  "id_cocas" "caseno"   "intno"    "wave"    
+##  [7] "year"     "country"  "country1" "c_abrv"   "c_abrv1"  "weight"  
+## [13] "cntry_y"  "cntry1_y" "v1"       "v2"       "v3"       "v4"      
+## [19] "v5"       "v6"
+
pdt(d1999[1:5, 1:8])
+
##    studyno            version     id_cocas caseno intno intno2gb year
+## 1:    3777 3.0.1 (2012-12-28) 199982600001      1  2100        8 1999
+## 2:    3777 3.0.1 (2012-12-28) 199982600002      2  2100        9 1999
+## 3:    3777 3.0.1 (2012-12-28) 199982600003      3  2100       10 1999
+## 4:    3777 3.0.1 (2012-12-28) 199982600004      4  2100        4 1999
+## 5:    3777 3.0.1 (2012-12-28) 199982600005      5  2100        3 1999
+##      weight
+## 1: 1.647713
+## 2: 1.210022
+## 3: 1.585010
+## 4: 1.585010
+## 5: 1.294967
+
pdt(d2008[1:5, 1:14])
+
##    StudyNo            Version     id_cocas caseno intno      wave year
+## 1:    4752 1.0.0 "2010-11-30" 200882610080    397  1899 wave 2008 2009
+## 2:    4752 1.0.0 "2010-11-30" 200882610093    475  3383 wave 2008 2009
+## 3:    4752 1.0.0 "2010-11-30" 200882610284   1042  2410 wave 2008 2009
+## 4:    4752 1.0.0 "2010-11-30" 200882611058   4401  2408 wave 2008 2009
+## 5:    4752 1.0.0 "2010-11-30" 200882611516   6998  3032 wave 2008 2009
+##           country       country1 c_abrv c_abrv1 weight
+## 1:  Great Britain  Great Britain     GB      GB      0
+## 2:  Great Britain  Great Britain     GB      GB      0
+## 3:  Great Britain  Great Britain     GB      GB      0
+## 4:  Great Britain  Great Britain     GB      GB      0
+## 5:  Great Britain  Great Britain     GB      GB      0
+##                 cntry_y                              cntry1_y
+## 1: Great Britain (2009) Great Britain (2009) [with split ups]
+## 2: Great Britain (2009) Great Britain (2009) [with split ups]
+## 3: Great Britain (2009) Great Britain (2009) [with split ups]
+## 4: Great Britain (2009) Great Britain (2009) [with split ups]
+## 5: Great Britain (2009) Great Britain (2009) [with split ups]
+
unique(d2008$year) # just checking, 2008/2009 difference looked suspicious
+
## [1] 2009
+## Levels: 2008 2009
+
## Saving the columns I want and dumping the rest
+
+select <- function(...) paste0("v", unlist(lapply(as.list(sys.call())[-1], eval)))
+a99 <- select(12:27)
+b99 <- select(30:45)
+a08 <- select(10:25)
+b08 <- select(28:43)
+# c(length(a99), length(b99), length(a08), length(b08))
+# 16 categories
+
+d1999 <- d1999[, c("id_cocas", "year", a99, b99)]
+d2008 <- d2008[, c("id_cocas", "year", a08, b08, "f25", "f43")]
+

Variables f25 and f43 are for flagging inconsistencies in the 2008 survey. Unfortunately there aren’t any flags for the 1999 survey. The inconsistency codes for f43 are:

+
+
    +
  • Inconsistent 1: If respondent mentiones at least one organisation and “none”. if v43=1 and any of v28 to v42=1 then f43=1
  • +
  • Inconsistent 2: If respondent does not know for at least one organization whether s/he works for it and mentiones “none”. if v43=1 and none of v28 to v42=1 and any of v28 to v42=8 then f43=2
  • +
  • Inconsistent 3: If respondent does not know for at least one organization whether s/he works for it and does not mention “none”. if v43=2 and none of v28 to v42=1 and any of v28 to v42=8 then f43=3
  • +
  • Inconsistent 4: If respondent does not mention any organisation and does not mention “none”. if v43=2 and all of v28 to v42=2 then f43=4
  • +
  • Inconsistent 5: If respondent mentions at least one organization and does not know whether s/he works for “none”. if v43=8 and any of v28 to v42=1 then f43=5
  • +
  • Inconsistent 6: If respondent does not mention any organization and does not know whether s/he works for “none”. if v43=8 and all of v28 to v42=2 then f43=6
  • +
  • Inconsistent 7: If respondent mentions at least one organization and does not answer whether s/he works for “none”. if v43=9 and any of v28 to v42=1 then f43=7
  • +
  • Inconsistent 8: If respondent does not mention any organization and does not answer whether s/he works for “none”. if v43=9 and all of v28 to v42=2 then f43=8

    (Source: p. 57 of the 2008 Variable Report)
  • +
+
+
knitr::kable(rbind("belong to" = table(d2008$f25),"work for" = table(d2008$f43)))
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
consistentinconsistent 1inconsistent 2inconsistent 3inconsistent 4inconsistent 5inconsistent 6inconsistent 7inconsistent 8
belong to155350020010
work for155310070000
+

There are so few inconsistent responses in the 2008 survey that in my opinion it’s not even worth deleting them. Hopefully the 1999 survey is equally clean. In principle, I should reconstruct the consistency checks and apply them to both questions in both surveys. Then I could decide what to do with each type of inconsistency and recode accordingly.

+
reset <- function() {
+  d1999 <<- read.dta("ZA3777_v3-0-1.dta")
+  d2008 <<- read.dta("ZA4752_v1-0-0.dta")
+
+  d1999 <<- d1999[, c("id_cocas", "year", a99, b99)]
+  d2008 <<- d2008[, c("id_cocas", "year", a08, b08, "f25", "f43")]
+}
+# for fixing stuff in case I mess up
+
+categories <- c(
+  "Social welfare",
+  "Religious",
+  "Education, arts, music or cultural",
+  "Trade unions",
+  "Political",
+  "Local community action",
+  "Third world development or human rights",
+  "Conservation, the environment, ecology, animal rights",
+  "Professional associations",
+  "Youth work",
+  "Sports or recreation",
+  "Women's groups",
+  "Peace movement",
+  "Organization concerned with health",
+  "Other groups",
+  "None"
+  )
+varnames <- apply(expand.grid(categories, c("A", "B")), 1, paste, collapse = "_")
+
+names(d1999)[seq.int(3, length.out=2*16)] <- varnames
+
+names(d2008)[seq.int(3, length.out=2*16)] <- c(varnames)
+d2008$f25 <- d2008$f43 <- NULL
+d2008$year <- 2008
+
+calc_proportions <- function(x) {
+  x <- as.character(x)
+  x[x %nin% c("mentioned", "not mentioned")] <- NA
+  x[x == "mentioned"] <- 1
+  x[x == "not mentioned"] <- 0
+  x <- as.numeric(x)
+  mean(x, na.rm = TRUE)
+}
+
+melt_and_split <- function(DF) {
+  DF <- melt(DF, id.vars = "year",
+             variable.name = "category", value.name = "proportion")
+  # it's not a "proportion" column yet, but it will be
+  tmp <- do.call(rbind, strsplit(as.character(DF$category), "_"))
+  DF[c("category", "question")] <- tmp
+  DF
+}
+
+calc_melt_split <- function(DF) {
+  out <- c(year = as.character(DF$year[1]), lapply(DF[-(1:2)], calc_proportions))
+  out <- melt_and_split(data.frame(out, check.names = FALSE))
+  out$question <- recode(out$question, "member" <- "A", "volunteer" <- "B")
+  out
+}
+
+d <- rbind(calc_melt_split(d1999), calc_melt_split(d2008))
+pdt(d, 5)
+
##     year                           category  proportion  question
+##  1: 1999                     Social welfare 0.068000000    member
+##  2: 1999                          Religious 0.048000000    member
+##  3: 1999 Education, arts, music or cultural 0.104000000    member
+##  4: 1999                       Trade unions 0.073000000    member
+##  5: 1999                          Political 0.026000000    member
+## ---                                                              
+## 60: 2008                     Women's groups 0.011553273 volunteer
+## 61: 2008                     Peace movement 0.003851091 volunteer
+## 62: 2008 Organization concerned with health 0.031450578 volunteer
+## 63: 2008                       Other groups 0.046885035 volunteer
+## 64: 2008                               None 0.779345734 volunteer
+
+
+

Part E

+
library(grid)
+library(ggplot2)
+
+d$year <- factor(d$year)
+
+ord <- order(d[d$year == "2008" & d$question == "member", "proportion"])
+d$category <- factor(d$category, levels = unique(d$category)[ord])
+
+g <- ggplot(d, aes(x = proportion, y = category)) +
+  geom_point(aes(shape = year), color = NA) +
+  geom_hline(aes(yintercept = as.numeric(category)), color = "lightgray") +
+  geom_point(aes(shape = year), size = 3) +
+  scale_x_log10() +
+  scale_shape_manual(values = c(1, 16)) +
+  facet_grid(~ question) +
+  theme_classic() + theme(
+    axis.line = element_line(color = NA),
+    legend.position = "top",
+    panel.border = element_rect(fill = NA),
+    plot.title = element_text(size = 11, face = "bold")
+    ) +
+  ylab("") + xlab("log10 proportion") +
+  ggtitle("Proportion of EVS 1999 and 2008 respondents\nwho belong to or volunteer in each of sixteen organizations")
+
+## Draw the graph with the title centered properly
+# from http://stackoverflow.com/a/10976398/2954547
+gt <- ggplot_gtable(ggplot_build(g))
+
## Warning: Removed 1 rows containing missing values (geom_point).
+
## Warning: Removed 1 rows containing missing values (geom_point).
+
gt$layout[which(gt$layout$name == "title"), c("l", "r")] <- c(1, max(gt$layout$r))
+plot.new()
+grid.draw(gt)
+

+
+
+

Part F, G

+

I think this is plenty encapsulated as-is.

+
+ + +
+ + + + + + + + diff --git a/hw/2014-10-22-hw3-greg-werbin.rmd b/hw/2014-10-22-hw3-greg-werbin.rmd new file mode 100644 index 0000000..983c9f3 --- /dev/null +++ b/hw/2014-10-22-hw3-greg-werbin.rmd @@ -0,0 +1,211 @@ +--- +title: "Homework 3" +author: "Greg Werbin" +output: html_document +--- + +```{r, echo=FALSE, warning=FALSE, message=FALSE} +setwd("/Users/hotdog2/class/data viz/hw 3") +``` + +## Part A + +I'll be comparing Question 5 of the 1999 and 2008 European Values Surveys – Great Britain, which has the same exact wording in both versions: + +> Please look carefully at the following list of voluntary organisations and activities and say ... +a) which, if any, do you belong to? (Code all mentioned under (a) as ‘1’) +b) which, if any, are you currently doing unpaid voluntary work for? (Code all mentioned under (b) as ‘1’) + +with options: + +> * Social welfare services for elderly, handicapped or deprived people +* Religious or church organisations +* Education, arts, music or cultural activities +* Trade unions +* Political parties or groups +* Local community action on issues like poverty, employment, housing, racial equality +* Third world development or human rights +* Conservation, the environment, ecology, animal rights +* Professional associations +* Youth work (e.g. scouts, guides, youth clubs etc.) +* Sports or recreation +* Women's groups +* Peace movement +* Voluntary organisations concerned with health +* Other groups +* None

+_(Source: p. 5 of each Field Questionnaire)_ + +## Part B + +I'm going to graph the log-base-10 proportion of respondents who answer "yes" to each question. There will be two panels, one for each question. In each panel, I'll draw a horizontal dot chart with the 1999 an 2008 values plotted on the same row, distinguished by the plotting character. If it helps clarity, I'll draw straight lines to connect points from the same year. The goal is to compare the popularity of volunteer activities between 1999 and 2008. I'm using a log scale because I'm expecting that some groups will be more popular than others. + +## Part C + +I'll use `ggplot2` in R, so I'll need a data.frame that looks like: + +|Category |Question |Year |Proportion | +|:--------|:--------|:----|:----------| +|Welfare |A |1999 |0.15 | + +## Part D + +```{r, echo = -(5:6)} +library(foreign) +library(reshape2) +library(memisc) +pdt <- data.table:::print.data.table + +setwd("/Users/hotdog2/class/data viz/hw 3") + +d1999 <- read.dta("ZA3777_v3-0-1.dta") +d2008 <- read.dta("ZA4752_v1-0-0.dta") + +## Checking out the structure of the file + +dim(d1999) +dim(d2008) + +head(names(d1999), 20) +head(names(d2008), 20) + +pdt(d1999[1:5, 1:8]) +pdt(d2008[1:5, 1:14]) + +unique(d2008$year) # just checking, 2008/2009 difference looked suspicious + +## Saving the columns I want and dumping the rest + +select <- function(...) paste0("v", unlist(lapply(as.list(sys.call())[-1], eval))) +a99 <- select(12:27) +b99 <- select(30:45) +a08 <- select(10:25) +b08 <- select(28:43) +# c(length(a99), length(b99), length(a08), length(b08)) +# 16 categories + +d1999 <- d1999[, c("id_cocas", "year", a99, b99)] +d2008 <- d2008[, c("id_cocas", "year", a08, b08, "f25", "f43")] +``` + +Variables `f25` and `f43` are for flagging inconsistencies in the 2008 survey. Unfortunately there aren't any flags for the 1999 survey. The inconsistency codes for `f43` are: + +> * Inconsistent 1: If respondent mentiones at least one organisation and "none". if v43=1 and any of v28 to v42=1 then f43=1 +* Inconsistent 2: If respondent does not know for at least one organization whether s/he works for it and mentiones "none". if v43=1 and none of v28 to v42=1 and any of v28 to v42=8 then f43=2 +* Inconsistent 3: If respondent does not know for at least one organization whether s/he works for it and does not mention "none". if v43=2 and none of v28 to v42=1 and any of v28 to v42=8 then f43=3 +* Inconsistent 4: If respondent does not mention any organisation and does not mention "none". if v43=2 and all of v28 to v42=2 then f43=4 +* Inconsistent 5: If respondent mentions at least one organization and does not know whether s/he works for "none". if v43=8 and any of v28 to v42=1 then f43=5 +* Inconsistent 6: If respondent does not mention any organization and does not know whether s/he works for "none". if v43=8 and all of v28 to v42=2 then f43=6 +* Inconsistent 7: If respondent mentions at least one organization and does not answer whether s/he works for "none". if v43=9 and any of v28 to v42=1 then f43=7 +* Inconsistent 8: If respondent does not mention any organization and does not answer whether s/he works for "none". if v43=9 and all of v28 to v42=2 then f43=8

+_(Source: p. 57 of the 2008 Variable Report)_ + +```{r, results='asis'} +knitr::kable(rbind("belong to" = table(d2008$f25),"work for" = table(d2008$f43))) +``` + +There are so few inconsistent responses in the 2008 survey that in my opinion it's not even worth deleting them. Hopefully the 1999 survey is equally clean. In principle, I should reconstruct the consistency checks and apply them to both questions in both surveys. Then I could decide what to do with each type of inconsistency and recode accordingly. + +```{r} +reset <- function() { + d1999 <<- read.dta("ZA3777_v3-0-1.dta") + d2008 <<- read.dta("ZA4752_v1-0-0.dta") + + d1999 <<- d1999[, c("id_cocas", "year", a99, b99)] + d2008 <<- d2008[, c("id_cocas", "year", a08, b08, "f25", "f43")] +} +# for fixing stuff in case I mess up + +categories <- c( + "Social welfare", + "Religious", + "Education, arts, music or cultural", + "Trade unions", + "Political", + "Local community action", + "Third world development or human rights", + "Conservation, the environment, ecology, animal rights", + "Professional associations", + "Youth work", + "Sports or recreation", + "Women's groups", + "Peace movement", + "Organization concerned with health", + "Other groups", + "None" + ) +varnames <- apply(expand.grid(categories, c("A", "B")), 1, paste, collapse = "_") + +names(d1999)[seq.int(3, length.out=2*16)] <- varnames + +names(d2008)[seq.int(3, length.out=2*16)] <- c(varnames) +d2008$f25 <- d2008$f43 <- NULL +d2008$year <- 2008 + +calc_proportions <- function(x) { + x <- as.character(x) + x[x %nin% c("mentioned", "not mentioned")] <- NA + x[x == "mentioned"] <- 1 + x[x == "not mentioned"] <- 0 + x <- as.numeric(x) + mean(x, na.rm = TRUE) +} + +melt_and_split <- function(DF) { + DF <- melt(DF, id.vars = "year", + variable.name = "category", value.name = "proportion") + # it's not a "proportion" column yet, but it will be + tmp <- do.call(rbind, strsplit(as.character(DF$category), "_")) + DF[c("category", "question")] <- tmp + DF +} + +calc_melt_split <- function(DF) { + out <- c(year = as.character(DF$year[1]), lapply(DF[-(1:2)], calc_proportions)) + out <- melt_and_split(data.frame(out, check.names = FALSE)) + out$question <- recode(out$question, "member" <- "A", "volunteer" <- "B") + out +} + +d <- rbind(calc_melt_split(d1999), calc_melt_split(d2008)) +pdt(d, 5) +``` + +## Part E + +```{r, fig.width=9} +library(grid) +library(ggplot2) + +d$year <- factor(d$year) + +ord <- order(d[d$year == "2008" & d$question == "member", "proportion"]) +d$category <- factor(d$category, levels = unique(d$category)[ord]) + +g <- ggplot(d, aes(x = proportion, y = category)) + + geom_point(aes(shape = year), color = NA) + + geom_hline(aes(yintercept = as.numeric(category)), color = "lightgray") + + geom_point(aes(shape = year), size = 3) + + scale_x_log10() + + scale_shape_manual(values = c(1, 16)) + + facet_grid(~ question) + + theme_classic() + theme( + axis.line = element_line(color = NA), + legend.position = "top", + panel.border = element_rect(fill = NA), + plot.title = element_text(size = 11, face = "bold") + ) + + ylab("") + xlab("log10 proportion") + + ggtitle("Proportion of EVS 1999 and 2008 respondents\nwho belong to or volunteer in each of sixteen organizations") + +## Draw the graph with the title centered properly +# from http://stackoverflow.com/a/10976398/2954547 +gt <- ggplot_gtable(ggplot_build(g)) +gt$layout[which(gt$layout$name == "title"), c("l", "r")] <- c(1, max(gt$layout$r)) +plot.new() +grid.draw(gt) +``` + +## Part F, G +I think this is plenty encapsulated as-is. + diff --git a/hw4/2014-11-13-hw4-gw2286.css b/hw4/2014-11-13-hw4-gw2286.css new file mode 100644 index 0000000..6dc6ddf --- /dev/null +++ b/hw4/2014-11-13-hw4-gw2286.css @@ -0,0 +1,49 @@ +.figure { + margin: 0px; + padding: 10px; + border: 1px solid black; +} + +.caption { + font-style: italic; + text-align: right; +} + +.plot { + background-color: lightgray; + + padding: 0px; + margin: 0px; +} + +rect { + fill: steelblue; +} + +circle { + fill: goldenrod; +} + +.main-title { + font: 15pt courier; +} + +.axis-title { + font: 11pt courier; +} + +.plot-labels { + font: 10pt sans-serif; +} + +.axis-labels { + font: 12pt sans-serif; +} + +.axis-ticks { + stroke: black; +} + +.axis-line { + stroke: black; +} diff --git a/hw4/2014-11-13-hw4-gw2286.html b/hw4/2014-11-13-hw4-gw2286.html new file mode 100644 index 0000000..8bae767 --- /dev/null +++ b/hw4/2014-11-13-hw4-gw2286.html @@ -0,0 +1,39 @@ + + + + + + + + + +
+

+ Here's some text. +

+ +

+ Here's a plot: +

+
+ + +
+

+ isn't it cool? +

+
+ + + diff --git a/hw4/2014-11-13-hw4-gw2286.js b/hw4/2014-11-13-hw4-gw2286.js new file mode 100644 index 0000000..bad07f5 --- /dev/null +++ b/hw4/2014-11-13-hw4-gw2286.js @@ -0,0 +1,113 @@ +// to do: use translate to replace fiddling around with margins + +// var data = [20, 24, 38, 110, 115, 26, 17, 24, 132]; +// var RADlevel = [1, 2, 3, 4, 5, 6, 7, 8, 24]; + +d3.csv("rad.csv") + .row(function(r) { + r.count = parseInt(r.count) + r.rad = parseInt(r.rad) + return r + }) + .get(function(error, data) { + var plot = d3.select(".plot"), + plotHeight = plot.attr("height"), + plotWidth = plot.attr("width"), + margin = {"left": 20, "right": 5, "bottom": 40, "top": 50}, + dropHeight = plotHeight - margin.bottom + + var barSpace = 5, + barWidth = (plotWidth - margin.left - margin.right)/data.length + + var x = d3.scale.linear() + .domain([0, data.length]) + .range([margin.left, plotWidth - barSpace/2 - margin.right]) + + var countMax = d3.max(data, function(datum) { + return datum.count + }) + + var y = d3.scale.linear() + .domain([0, countMax]) + .range([0, plotHeight - margin.top - margin.bottom]) + + plot.selectAll("rect") + .data(data).enter().append("svg:rect") + .attr("x", function(d, i) { return x(i) + barSpace/2; }) + .attr("y", function(d) { return dropHeight - y(d.count); }) + .attr("height", function(d) { return y(d.count); }) + .attr("width", barWidth - barSpace/2) + + plot.selectAll("text") + .data(data).enter().append("text") + .attr("x", function(d, i) { return x(i) + barWidth/2; }) + .attr("y", function(d) { return dropHeight - y(d); }) + .text(function(d) { d.rad; }) + .attr("class", "plot-labels") + + var title = plot.append("text") + .attr("x", plotWidth / 2) + .attr("y", margin.top / 2) + .attr("text-anchor", "middle") + .attr("class", "title main-title") + + title.append("tspan") + .attr("x", plotWidth / 2) + .text("Index of Access to Radial Highways") + .append("tspan") + .attr("x", plotWidth / 2) + .attr("dy", "1.4em") + .text("in Boston suburbs") + + plot.append("text") + .attr("x", plotWidth / 2) + .attr("y", dropHeight + margin.bottom / 1.25) + .attr("text-anchor", "middle") + .text("Level of index") + .attr("class", "title axis-title x-axis-title") + + plot.selectAll("text.x-axis-labels") + .data(data).enter().append("svg:text") + .attr("x", function(d, i) { return x(i); }) + .attr("y", dropHeight) + .attr("text-anchor", "middle") + .attr("dx", barWidth/2) + .attr("dy", 15) + .text(function(d) { return d.rad; }) + .attr("class", "axis-labels x-axis-labels") + + plot.append("text") + .attr("x", margin.left / 1.25) + .attr("y", dropHeight / 2) + .attr("text-anchor", "middle") + .attr("transform", "rotate(270 " + margin.left / 1.25 + " " + dropHeight / 2 + ")") + .text("Number of suburbs") + .attr("class", "axis-title y-axis-title") + + // plot.selectAll("text.y-axis-labels") + // .data(d3.range(0, d3.max(data) + 1, 2)).enter().append("svg:text") + // .attr("x", margin.left - 3) + // .attr("y", function(d, i) { return dropHeight - y(d); }) + // .attr("text-anchor", "end") + // .attr("dx", -5) + // .attr("dy", 0.3 + "em") + // .text(function(d, i) { return d;}) + // .attr("class", "axis-labels y-axis-labels") + + // plot.selectAll("ticks.y-axis"). + // data(d3.range(0, d3.max(data) + 1)).enter().append("svg:line") + // .attr("x1", margin.left - 5) + // .attr("x2", margin.left) + // .attr("y1", function(d) { return dropHeight - y(d); }) + // .attr("y2", function(d) { return dropHeight - y(d); }) + // .attr("class", "axis-ticks y-axis-ticks") + + // plot.selectAll("line.y-axis"). + // data(data).enter().append("svg:line") + // .attr("x1", margin.left) + // .attr("x2", margin.left) + // .attr("y1", dropHeight) + // .attr("y2", margin.top) + // .attr("class", "axis-line y-axis-line") + }) + diff --git a/hw4/rad.csv b/hw4/rad.csv new file mode 100644 index 0000000..b756888 --- /dev/null +++ b/hw4/rad.csv @@ -0,0 +1,10 @@ +count,rad +20,1 +24,2 +38,3 +110,4 +115,5 +26,6 +17,7 +24,8 +132,24