diff --git a/.gitignore b/.gitignore index 4f7159a..e15fb01 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,10 @@ _site .RData .Rhistory Thumbs.db +<<<<<<< HEAD +.Rproj.user +||||||| merged common ancestors +======= .Rproj.user ## Rproj is seriously annoying. Sorry. @@ -18,3 +22,4 @@ Thumbs.db *.Rbuildignore .Rproj.user *cache +>>>>>>> upstream/gh-pages diff --git a/hw2/_posts/2014-11-19-hw2-Liangquan-Zhou-lz2377.Rmd b/hw2/_posts/2014-11-19-hw2-Liangquan-Zhou-lz2377.Rmd new file mode 100644 index 0000000..1d19de1 --- /dev/null +++ b/hw2/_posts/2014-11-19-hw2-Liangquan-Zhou-lz2377.Rmd @@ -0,0 +1,54 @@ +--- +title: "HW2" +author: "Liangquan Zhou lz2377" +date: "Wednesday, November 19, 2014" +output: html_document +--- + + +## Now let's finish today's hw(2014-09-18). First load sample data. + +```{r, cache=TRUE} +setwd("C:/Liangquan Zhou/Study/2014 fall/data visualization/hw1") +sample_data= read.csv("sample_health_facilities.csv") # read the .csv file +``` + +1. Select all facilities located in the southern zones of Nigeria. + +```{r, cache=TRUE} +new_data= subset(sample_data, zone %in% c("Southwest","Southeast","South-South")) +``` + +2. Incorporate the pop2006 column from the `lgas.csv` file into the new data.frame containing only those facilities located in Southern Nigeria. (Hint: your id column is `lga_id`) + +```{r, cache=TRUE} +lgas= read.csv("lgas.csv", stringsAsFactors=T) +new_data= merge(new_data, lgas[c("lga_id","pop_2006")],by= "lga_id") +``` + +And we can use `str` to see the new dataset: +```{r} +str(new_data) +``` + +3. Calculate the total number of full time nurses and doctors for all health facilities in each state. +```{r, cache=TRUE} +tapply(new_data$num_doctors_fulltime, new_data$state,sum) +tapply(new_data$num_nurses_fulltime, new_data$state,sum) +``` + +4. Sort the resulting dataset by state population, in descending order. +```{r,cache=TRUE} +data1=subset(new_data,select=c(num_doctors_fulltime,num_nurses_fulltime,pop_2006,state)) +data1$state=as.factor(as.character(data1$state)) +result=data.frame(tapply(data1$num_doctors_fulltime, data1$state,sum), + tapply(data1$num_nurses_fulltime, data1$state,sum), + tapply(data1$pop_2006, data1$state,sum)) +names(result)=c("num_doctors_fulltime","num_nurses_fulltime","pop_2006") +result=result[order(result$pop_2006),] +``` + +The result is: +```{r,cache=TRUE} +result +``` \ No newline at end of file diff --git a/hw2/_posts/2014-11-19-hw2-Liangquan-Zhou-lz2377.html b/hw2/_posts/2014-11-19-hw2-Liangquan-Zhou-lz2377.html new file mode 100644 index 0000000..14c575a --- /dev/null +++ b/hw2/_posts/2014-11-19-hw2-Liangquan-Zhou-lz2377.html @@ -0,0 +1,158 @@ + + + + + + + + + + + + + +HW2 + + + + + + + + + + + + + + + + + + + + +
+ + + + + +
+

Now let’s finish today’s hw(2014-09-18). First load sample data.

+
setwd("C:/Liangquan Zhou/Study/2014 fall/data visualization/hw1")
+sample_data= read.csv("sample_health_facilities.csv") # read the .csv file
+
    +
  1. Select all facilities located in the southern zones of Nigeria.
  2. +
+
new_data= subset(sample_data, zone %in% c("Southwest","Southeast","South-South"))
+
    +
  1. Incorporate the pop2006 column from the lgas.csv file into the new data.frame containing only those facilities located in Southern Nigeria. (Hint: your id column is lga_id)
  2. +
+
lgas= read.csv("lgas.csv", stringsAsFactors=T)
+new_data= merge(new_data, lgas[c("lga_id","pop_2006")],by= "lga_id")
+

And we can use str to see the new dataset:

+
str(new_data)
+
## 'data.frame':    26 obs. of  11 variables:
+##  $ lga_id                : int  49 67 76 101 183 191 218 304 312 316 ...
+##  $ lga                   : Factor w/ 50 levels "Aliero","Anaocha",..: 2 3 4 8 12 13 14 19 20 21 ...
+##  $ state                 : Factor w/ 24 levels "Abia","Adamawa",..: 3 20 3 6 10 3 10 20 3 19 ...
+##  $ zone                  : Factor w/ 6 levels "North-Central",..: 5 6 5 4 5 5 5 6 5 6 ...
+##  $ c_section_yn          : logi  FALSE FALSE FALSE FALSE FALSE TRUE ...
+##  $ num_nurses_fulltime   : int  2 0 0 3 1 0 7 6 2 0 ...
+##  $ gps                   : Factor w/ 50 levels "10.50716994 7.39845258 633.4000244140625 5.0",..: 27 41 30 34 22 26 18 44 25 37 ...
+##  $ num_lab_techs_fulltime: int  NA 0 1 2 0 0 1 1 0 0 ...
+##  $ management            : Factor w/ 1 level "public": 1 1 1 1 NA NA NA NA NA 1 ...
+##  $ num_doctors_fulltime  : int  NA 0 1 0 0 1 0 1 1 0 ...
+##  $ pop_2006              : int  285002 68643 158410 105822 130931 158231 165593 96748 302158 284336 ...
+
    +
  1. Calculate the total number of full time nurses and doctors for all health facilities in each state.
  2. +
+
tapply(new_data$num_doctors_fulltime,  new_data$state,sum)
+
##        Abia     Adamawa     Anambra      Bauchi       Benue Cross River 
+##         308          NA          NA          NA          NA           0 
+##       Delta         Edo       Ekiti         Imo      Jigawa      Kaduna 
+##           2           0           1           0          NA          NA 
+##        Kano     Katsina       Kebbi        Kogi       Lagos       Niger 
+##          NA          NA          NA          NA           4          NA 
+##        Ogun        Osun     Plateau      Rivers      Taraba     Zamfara 
+##           2           1          NA           2          NA          NA
+
tapply(new_data$num_nurses_fulltime,  new_data$state,sum)
+
##        Abia     Adamawa     Anambra      Bauchi       Benue Cross River 
+##          NA          NA           4          NA          NA           3 
+##       Delta         Edo       Ekiti         Imo      Jigawa      Kaduna 
+##          10           0           2           8          NA          NA 
+##        Kano     Katsina       Kebbi        Kogi       Lagos       Niger 
+##          NA          NA          NA          NA           4          NA 
+##        Ogun        Osun     Plateau      Rivers      Taraba     Zamfara 
+##           0           6          NA           2          NA          NA
+
    +
  1. Sort the resulting dataset by state population, in descending order.
  2. +
+
data1=subset(new_data,select=c(num_doctors_fulltime,num_nurses_fulltime,pop_2006,state))
+data1$state=as.factor(as.character(data1$state))
+result=data.frame(tapply(data1$num_doctors_fulltime, data1$state,sum),
+  tapply(data1$num_nurses_fulltime,  data1$state,sum),
+  tapply(data1$pop_2006,  data1$state,sum))
+names(result)=c("num_doctors_fulltime","num_nurses_fulltime","pop_2006")
+result=result[order(result$pop_2006),]
+

The result is:

+
result
+
##             num_doctors_fulltime num_nurses_fulltime pop_2006
+## Ekiti                          1                   2   113754
+## Edo                            0                   0   120813
+## Osun                           1                   6   165391
+## Abia                         308                  NA   220660
+## Rivers                         2                   2   284010
+## Imo                            0                   8   439241
+## Cross River                    0                   3   470167
+## Ogun                           2                   0   597659
+## Delta                          2                  10   828912
+## Anambra                       NA                   4   903801
+## Lagos                          4                   4  1802377
+
+ + +
+ + + + + + + +