diff --git a/.gitignore b/.gitignore index 4f7159a..e15fb01 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,10 @@ _site .RData .Rhistory Thumbs.db +<<<<<<< HEAD +.Rproj.user +||||||| merged common ancestors +======= .Rproj.user ## Rproj is seriously annoying. Sorry. @@ -18,3 +22,4 @@ Thumbs.db *.Rbuildignore .Rproj.user *cache +>>>>>>> upstream/gh-pages diff --git a/hw2/_posts/2014-11-19-hw2-Liangquan-Zhou-lz2377.Rmd b/hw2/_posts/2014-11-19-hw2-Liangquan-Zhou-lz2377.Rmd new file mode 100644 index 0000000..1d19de1 --- /dev/null +++ b/hw2/_posts/2014-11-19-hw2-Liangquan-Zhou-lz2377.Rmd @@ -0,0 +1,54 @@ +--- +title: "HW2" +author: "Liangquan Zhou lz2377" +date: "Wednesday, November 19, 2014" +output: html_document +--- + + +## Now let's finish today's hw(2014-09-18). First load sample data. + +```{r, cache=TRUE} +setwd("C:/Liangquan Zhou/Study/2014 fall/data visualization/hw1") +sample_data= read.csv("sample_health_facilities.csv") # read the .csv file +``` + +1. Select all facilities located in the southern zones of Nigeria. + +```{r, cache=TRUE} +new_data= subset(sample_data, zone %in% c("Southwest","Southeast","South-South")) +``` + +2. Incorporate the pop2006 column from the `lgas.csv` file into the new data.frame containing only those facilities located in Southern Nigeria. (Hint: your id column is `lga_id`) + +```{r, cache=TRUE} +lgas= read.csv("lgas.csv", stringsAsFactors=T) +new_data= merge(new_data, lgas[c("lga_id","pop_2006")],by= "lga_id") +``` + +And we can use `str` to see the new dataset: +```{r} +str(new_data) +``` + +3. Calculate the total number of full time nurses and doctors for all health facilities in each state. +```{r, cache=TRUE} +tapply(new_data$num_doctors_fulltime, new_data$state,sum) +tapply(new_data$num_nurses_fulltime, new_data$state,sum) +``` + +4. Sort the resulting dataset by state population, in descending order. +```{r,cache=TRUE} +data1=subset(new_data,select=c(num_doctors_fulltime,num_nurses_fulltime,pop_2006,state)) +data1$state=as.factor(as.character(data1$state)) +result=data.frame(tapply(data1$num_doctors_fulltime, data1$state,sum), + tapply(data1$num_nurses_fulltime, data1$state,sum), + tapply(data1$pop_2006, data1$state,sum)) +names(result)=c("num_doctors_fulltime","num_nurses_fulltime","pop_2006") +result=result[order(result$pop_2006),] +``` + +The result is: +```{r,cache=TRUE} +result +``` \ No newline at end of file diff --git a/hw2/_posts/2014-11-19-hw2-Liangquan-Zhou-lz2377.html b/hw2/_posts/2014-11-19-hw2-Liangquan-Zhou-lz2377.html new file mode 100644 index 0000000..14c575a --- /dev/null +++ b/hw2/_posts/2014-11-19-hw2-Liangquan-Zhou-lz2377.html @@ -0,0 +1,158 @@ + + + + +
+ + + + + + + + +setwd("C:/Liangquan Zhou/Study/2014 fall/data visualization/hw1")
+sample_data= read.csv("sample_health_facilities.csv") # read the .csv file
+new_data= subset(sample_data, zone %in% c("Southwest","Southeast","South-South"))
+lgas.csv
file into the new data.frame containing only those facilities located in Southern Nigeria. (Hint: your id column is lga_id
)lgas= read.csv("lgas.csv", stringsAsFactors=T)
+new_data= merge(new_data, lgas[c("lga_id","pop_2006")],by= "lga_id")
+And we can use str
to see the new dataset:
str(new_data)
+## 'data.frame': 26 obs. of 11 variables:
+## $ lga_id : int 49 67 76 101 183 191 218 304 312 316 ...
+## $ lga : Factor w/ 50 levels "Aliero","Anaocha",..: 2 3 4 8 12 13 14 19 20 21 ...
+## $ state : Factor w/ 24 levels "Abia","Adamawa",..: 3 20 3 6 10 3 10 20 3 19 ...
+## $ zone : Factor w/ 6 levels "North-Central",..: 5 6 5 4 5 5 5 6 5 6 ...
+## $ c_section_yn : logi FALSE FALSE FALSE FALSE FALSE TRUE ...
+## $ num_nurses_fulltime : int 2 0 0 3 1 0 7 6 2 0 ...
+## $ gps : Factor w/ 50 levels "10.50716994 7.39845258 633.4000244140625 5.0",..: 27 41 30 34 22 26 18 44 25 37 ...
+## $ num_lab_techs_fulltime: int NA 0 1 2 0 0 1 1 0 0 ...
+## $ management : Factor w/ 1 level "public": 1 1 1 1 NA NA NA NA NA 1 ...
+## $ num_doctors_fulltime : int NA 0 1 0 0 1 0 1 1 0 ...
+## $ pop_2006 : int 285002 68643 158410 105822 130931 158231 165593 96748 302158 284336 ...
+tapply(new_data$num_doctors_fulltime, new_data$state,sum)
+## Abia Adamawa Anambra Bauchi Benue Cross River
+## 308 NA NA NA NA 0
+## Delta Edo Ekiti Imo Jigawa Kaduna
+## 2 0 1 0 NA NA
+## Kano Katsina Kebbi Kogi Lagos Niger
+## NA NA NA NA 4 NA
+## Ogun Osun Plateau Rivers Taraba Zamfara
+## 2 1 NA 2 NA NA
+tapply(new_data$num_nurses_fulltime, new_data$state,sum)
+## Abia Adamawa Anambra Bauchi Benue Cross River
+## NA NA 4 NA NA 3
+## Delta Edo Ekiti Imo Jigawa Kaduna
+## 10 0 2 8 NA NA
+## Kano Katsina Kebbi Kogi Lagos Niger
+## NA NA NA NA 4 NA
+## Ogun Osun Plateau Rivers Taraba Zamfara
+## 0 6 NA 2 NA NA
+data1=subset(new_data,select=c(num_doctors_fulltime,num_nurses_fulltime,pop_2006,state))
+data1$state=as.factor(as.character(data1$state))
+result=data.frame(tapply(data1$num_doctors_fulltime, data1$state,sum),
+ tapply(data1$num_nurses_fulltime, data1$state,sum),
+ tapply(data1$pop_2006, data1$state,sum))
+names(result)=c("num_doctors_fulltime","num_nurses_fulltime","pop_2006")
+result=result[order(result$pop_2006),]
+The result is:
+result
+## num_doctors_fulltime num_nurses_fulltime pop_2006
+## Ekiti 1 2 113754
+## Edo 0 0 120813
+## Osun 1 6 165391
+## Abia 308 NA 220660
+## Rivers 2 2 284010
+## Imo 0 8 439241
+## Cross River 0 3 470167
+## Ogun 2 0 597659
+## Delta 2 10 828912
+## Anambra NA 4 903801
+## Lagos 4 4 1802377
+