Merge branch 'develop'

saudiwin · Dec 7, 2018 · efc5b4b · efc5b4b
2 parents c52ac78 + 7ca0c80
commit efc5b4b
Show file tree

Hide file tree

Showing 17 changed files with 547 additions and 787 deletions.
diff --git a/.gitignore b/.gitignore
@@ -15,3 +15,4 @@ irt_standard_tst*
 bfg-1.12.16.jar
 src-i386
 src-x64
+*.Rproj
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: idealstan
 Type: Package
 Title: Generalized IRT Ideal Point Models with 'Stan'
-Version: 0.5.0
-Date: 2018-10-27
+Version: 0.5.1
+Date: 2018-11-26
 Authors@R: c(person("Robert","Kubinec", role = c("aut","cre"),
                      email = "[email protected]"),
               person("Jonah", "Gabry", role = "ctb"),
@@ -16,7 +16,7 @@ Depends:
     R (>= 3.4.0),
     Rcpp (>= 0.12.18)
 Imports: 
-    rstan (>= 2.18.1),
+    rstan (>= 2.18.2),
     rstantools (>= 1.5.1),
     dplyr,
     tidyr,
@@ -36,7 +36,7 @@ Suggests:
     R.rsp
 LinkingTo: 
     StanHeaders (>= 2.18.0), 
-    rstan (>= 2.18.1), 
+    rstan (>= 2.18.2), 
     BH (>= 1.66), 
     Rcpp (>= 0.12.7), 
     RcppEigen (>= 0.3.2.9.0)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+#Release v0.5.1
+* Fixed bugs in plotting functions related to plotting two groups.
+* Fixed bug in AR(1) model with restricted time variance.
+* Updated dependencies to rstan 2.18.2.
+* Added error-catching in covariate creation.
+
 #Release v0.5.0
 * New models for Poisson, ordinal-graded response, Normal and Log-normal outcomes.
 * Time-varying ideal point processes: random-walks and auto-regressive priors.

diff --git a/R/Estimate.R b/R/Estimate.R
@@ -315,7 +315,7 @@ id_make <- function(score_data=NULL,
     } else {
       # variable does not need to be recoded, only move missing to the end
       score_rename$outcome <- factor(score_rename$outcome)
-      score_rename$outcome <- fct_relevel(score_rename$outcome,miss_val,after=Inf)
+      score_rename$outcome <- fct_relevel(score_rename$outcome,as.character(miss_val),after=Inf)
     }
   } 
 
@@ -671,16 +671,18 @@ id_estimate <- function(idealdata=NULL,model_type=2,
     # this handles the situation in which the data is fake and only 
     # groups are used as parameters
     legis_pred <- idealdata@group_cov
+    lx <- dim(idealdata@group_cov)[3]
   } else {
     legispoints <- as.numeric(idealdata@score_matrix$person_id)
     num_legis <- max(legispoints)
     legis_pred <- idealdata@person_cov
+    lx <- dim(idealdata@person_cov)[3]
   }
 
   billpoints <- as.numeric(idealdata@score_matrix$item_id)
   timepoints <- as.numeric(factor(idealdata@score_matrix$time_id))
-  max_t <- max(timepoints)
-  num_bills <- max(billpoints)
+  max_t <- max(timepoints,na.rm=T)
+  num_bills <- max(billpoints,na.rm=T)
 
   Y <- idealdata@score_matrix$outcome
 
@@ -720,7 +722,7 @@ id_estimate <- function(idealdata=NULL,model_type=2,
 
   # set identification options
 
-  if(length(idealdata@restrict_var)==0 && is.null(prior_fit)) {
+  if(length(idealdata@restrict_var)==0 && is.null(prior_fit) && is.null(restrict_var)) {
       if(vary_ideal_pts %in% c('none','AR1')) {
         idealdata@restrict_var <- FALSE
       } else {
@@ -793,7 +795,7 @@ id_estimate <- function(idealdata=NULL,model_type=2,
                     bb=billpoints,
                     num_fix_high=as.integer(1),
                     num_fix_low=as.integer(1),
-                    LX=dim(idealdata@person_cov)[3],
+                    LX=lx,
                     SRX=ncol(idealdata@item_cov),
                     SAX=ncol(idealdata@item_cov_miss),
                     legis_pred=legis_pred,
@@ -839,15 +841,17 @@ id_estimate <- function(idealdata=NULL,model_type=2,
   if(use_groups==T) {
     legispoints <- as.numeric(idealdata@score_matrix$group_id)
     num_legis <- max(legispoints)
+    lx <- dim(idealdata@group_cov)[3]
   } else {
     legispoints <- as.numeric(idealdata@score_matrix$person_id)
     num_legis <- max(legispoints)
+    lx <- dim(idealdata@person_cov)[3]
   }
 
   billpoints <- as.numeric(idealdata@score_matrix$item_id)
   timepoints <- as.numeric(factor(idealdata@score_matrix$time_id))
-  max_t <- max(timepoints)
-  num_bills <- max(billpoints)
+  max_t <- max(timepoints,na.rm=T)
+  num_bills <- max(billpoints,na.rm=T)
 
   Y <- idealdata@score_matrix$outcome
 
@@ -888,7 +892,7 @@ id_estimate <- function(idealdata=NULL,model_type=2,
                     bb=billpoints,
                     num_fix_high=as.integer(1),
                     num_fix_low=as.integer(1),
-                    LX=dim(idealdata@person_cov)[3],
+                    LX=lx,
                     SRX=ncol(idealdata@item_cov),
                     SAX=ncol(idealdata@item_cov_miss),
                     legis_pred=legis_pred,

diff --git a/R/Generics.R b/R/Generics.R
@@ -267,7 +267,7 @@ setMethod('summary',signature(object='idealstan'),
             if(pars=='items') {
 
               # a bit trickier with item points
-              item_plot <- unique(object@score_data@score_matrix$item_id)
+              item_plot <- levels(object@score_data@score_matrix$item_id)
               if(object@model_type %in% c(1,2) || (object@model_type>6 && object@model_type<13)) {
                 # binary models and continuous
                 item_points <- lapply(item_plot,.item_plot_binary,object=object,

diff --git a/R/Helpers.R b/R/Helpers.R
@@ -130,6 +130,10 @@
     ideal_pts_mean <- rstan::extract(post_modes,'L_tp1')[[1]] %>% apply(3,mean) %>% .[new_order]
     sign_match <- sign(ideal_pts_low) == sign(ideal_pts_high)
     constrain_mean <- which(sign_match)
+    # need to select the largest single one if no confidence intervals that don't cross zero
+    if(length(constrain_mean)==0) {
+      constrain_mean <- which(ideal_pts_mean==min(ideal_pts_mean))
+    }
     if(length(constrain_mean)>1) {
       constrain_mean <- constrain_mean[abs(ideal_pts_mean[constrain_mean])==max(abs(ideal_pts_mean[constrain_mean]))]
     }
@@ -635,7 +639,13 @@
 
     to_array <- lapply(split(to_spread,pull(to_spread,!!third_dim_var)), function(this_data) {
       # spread and stuff into a list
-      spread_it <- spread(this_data,key=!!col_var_name,value=!!col_var_value) %>% 
+      spread_it <- try(spread(this_data,key=!!col_var_name,value=!!col_var_value))
+      if('try-error' %in% class(spread_it)) {
+        print('Failed to find unique covariate values for dataset:')
+        print(this_data)
+        stop()
+      }
+      spread_it <- spread_it %>% 
         select(-!!row_var,-!!third_dim_var) %>% as.matrix
       row.names(spread_it) <- unique(pull(this_data,!!row_var))
       return(spread_it)

diff --git a/R/Plot.R b/R/Plot.R
@@ -324,7 +324,7 @@ id_plot_legis <- function(object,return_data=FALSE,
 #'  seed=84520)
 #' # We plot the variances for all the Senators
 #' 
-#' id_plot_legis_var(senate114_fit,item_plot=5)
+#' id_plot_legis_var(senate114_fit)
 #' }
 id_plot_legis_var <- function(object,return_data=FALSE,
                               include=NULL,
@@ -343,10 +343,17 @@ id_plot_legis_var <- function(object,return_data=FALSE,
                                        low_limit=low_limit,
                                        type='variance')
 
+  if(object@use_groups) {
+    person_params$person_id <- person_params$group_id 
+    person_params <- person_params %>% distinct
+  }
+
   if(!is.null(include)) {
     person_params <- filter(person_params, person_id %in% include)
   }
 
+
+
   # Default plot: group names plotted as points
 
   if(group_color==TRUE) {
@@ -558,7 +565,12 @@ id_plot_legis_dyn <- function(object,return_data=FALSE,
   }
 
   if(!is.null(include)) {
-    person_params <- filter(person_params, person_id %in% include)
+    if(object@use_groups) {
+      person_params <- filter(person_params, group_id %in% include)
+    } else {
+      person_params <- filter(person_params, person_id %in% include)
+    }
+
   }
 
   if(object@use_groups) {
@@ -618,7 +630,7 @@ id_plot_legis_dyn <- function(object,return_data=FALSE,
     } else {
 
       outplot <- outplot + 
-        geom_line(aes_(y=~median_pt),
+        geom_line(aes_(y=~median_pt,group=base_id),
                   alpha=person_ci_alpha,
                   size=line_size)
     }
@@ -882,9 +894,10 @@ id_plot_rhats <- function(obj) {
 #' column names otherwise. 
 #' 
 #' @param object A fitted \code{idealstan} object
-#' @param cov_type Either 'person_cov' for person-level hierarchical parameters,
-#' 'discrim_reg_cov' for bill/item discrimination parameters from regular (non-inflated) model, and 
-#' 'discrim_infl_cov' for bill/item discrimination parameters from inflated model.
+#' @param cov_type Either \code{'person_cov'} for person-level hierarchical parameters,
+#' \code{'group_cov'} for group-level hierarchical parameters,
+#' \code{'discrim_reg_cov'} for bill/item discrimination parameters from regular (non-inflated) model, and 
+#' \code{'discrim_infl_cov'} for bill/item discrimination parameters from inflated model.
 #' @param filter_cov A character vector of coefficients from covariate plots to exclude from
 #' plotting (should be the names of coefficients as they appear in the plots)
 #' @param ... Any additional parameters passed on to \code{\link[bayesplot]{mcmc_intervals}}
@@ -902,8 +915,13 @@ id_plot_cov <- function(object,
   to_plot <- as.array(object@stan_samples,
                    pars=param_name)
 
+  if(object@use_groups && cov_type=='person_cov') {
+    cov_type <- 'group_cov'
+  }
+
   # reset names of parameters
   new_names <- switch(cov_type,person_cov=attributes(object@score_data@person_cov)$dimnames$colnames,
+                      group_cov=attributes(object@score_data@group_cov)$dimnames$colnames,
                       discrim_reg=attributes(object@score_data@item_cov)$dimnames$colnames,
                       discrim_abs=attributes(object@score_data@item_cov_miss)$dimnames$colnames)
 

diff --git a/R/rstan_generics.R b/R/rstan_generics.R
@@ -51,9 +51,16 @@ setMethod('id_post_pred',signature(object='idealstan'),function(object,draws=100
                                                                 type='predict',
                                                                 sample_scores=NULL,...) {
   #all_params <- rstan::extract(object@stan_samples)
-  
+
   n_votes <- nrow(object@score_data@score_matrix)
-  n_iters <- (object@stan_samples@stan_args[[1]]$iter-object@stan_samples@stan_args[[1]]$warmup)*length(object@stan_samples@stan_args)
+
+  if(object@stan_samples@stan_args[[1]]$method != 'variational') {
+    n_iters <- (object@stan_samples@stan_args[[1]]$iter-object@stan_samples@stan_args[[1]]$warmup)*length(object@stan_samples@stan_args)
+  } else {
+    # there is no warmup for VB
+    n_iters <- dim(object@stan_samples)[1]
+  }
+
   if(!is.null(sample_scores) && type!='log_lik') {
     this_sample <- sample(1:n_votes,sample_scores)
   } else {
@@ -84,7 +91,7 @@ setMethod('id_post_pred',signature(object='idealstan'),function(object,draws=100
   }
 
   bill_points <- as.numeric(object@score_data@score_matrix$item_id)[this_sample]
-  time_points <- as.numeric(object@score_data@score_matrix$time_id)[this_sample]
+  time_points <- as.numeric(factor(object@score_data@score_matrix$time_id))[this_sample]
 
   remove_nas <- !is.na(y) & !is.na(person_points) & !is.na(bill_points) & !is.na(time_points)
   y <- y[remove_nas]

diff --git a/cran-comments.md b/cran-comments.md
@@ -1,25 +1,15 @@
 ## Test environments
 * ubuntu 14.04 (on travis-ci), R devel and release
 * mac os x sierra (on travis-ci), R release
-* win-builder, R release
+* win-builder, R devel
 
 ## R CMD check results
-There were no ERRORs or WARNINGs. 
-
-* checking CRAN incoming feasibility ... NOTE
-Maintainer: 'Robert Kubinec <[email protected]>'
-
-New maintainer:
-  Robert Kubinec <[email protected]>
-Old maintainer(s):
-  Robert Kubinec <[email protected]>
-
-Explanation: I am keeping my contact information up to date.
+There were no ERRORs or WARNINGs. 2 NOTEs:
 
 * checking installed package size ... NOTE
-  installed size is  7.3Mb
+  installed size is  5.5Mb
   sub-directories of 1Mb or more:
-    libs   5.9Mb
+    libs   4.1Mb
 
 Explanation: This package has a large installed library because it uses the Stan MCMC engine as a backend, and it comes with pre-compiled C++ modules that are loaded into Stan through the package `rstan`. As a result, the libraries are large, but the actual R code in the package is relatively small.
-Original file line number
+Diff line change
@@ Expand Up / @@ -15,3 +15,4 @@ irt_standard_tst* @@
     bfg-1.12.16.jar
     src-i386
     src-x64
+    *.Rproj