diff --git a/ChangeLog b/ChangeLog
deleted file mode 100644
index ae9f1e1..0000000
--- a/ChangeLog
+++ /dev/null
@@ -1,15 +0,0 @@
-Date        Version  Comment
-2015-12-09  0.0-1    First version on CRAN
-2016-03-04  0.0-2    Minor improvements
-2016-12-13  0.1-1    New function to pre-download Census data and other minor improvements
-2017-03-03  0.1-2    Updated surname handling, enhanced demographics option, and improved error handling and documentation
-2017-04-10  0.1-3    Allows Census data download at level user prefers (block, tract, or county)
-2017-05-03  0.1-4    Fixed error in merge_surnames.R and updated relevant documentation
-2017-06-08  0.1-5    Updated http to https to access U.S. Census API
-2017-08-16  0.1-6    Removed extraneous documentation, renamed 2010 Surname List object, and added place as geography
-2017-08-31  0.1-7    Added testthat functionality
-2019-02-12  0.1-8    Updated to be compatible with U.S. Census API updates
-2019-02-20  0.1-9    Fixed census_helper.R so that state field can be lower case in user data
-2020-10-08  0.1-10   Fixed minor warning message
-2020-04-22  0.1-11   Minor fix requested by CRAN
-2020-05-17  0.1-12   Some Census API improvements (thanks to Silvia Kim)
diff --git a/DESCRIPTION b/DESCRIPTION
index c84e6a1..376bbee 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,13 +1,11 @@
 Package: wru
 Version: 0.1-12
 Date: 2021-05-17
-Title: Who are You? Bayesian Prediction of Racial Category Using Surname and
-    Geolocation
+Title: Who are You? Bayesian Prediction of Racial Category Using
+        Surname and Geolocation
 Authors@R: c(
   person("Kabir", "Khanna", email = "kabirkhanna@gmail.com", role = c("aut", "cre")),
-  person("Kosuke", "Imai", email = "imai@harvard.edu", role = c("aut")),
-  person("Evan", "Rosenman", email = "etrrosenman@gmail.com", role = c("aut")),
-  person("Santiago", "Olivella", email = "olivella@unc.edu", role = c("aut"))
+  person("Kosuke", "Imai", email = "imai@harvard.edu", role = c("aut"))
   )
 Description: Predicts individual race/ethnicity using surname, geolocation,
     and other attributes, such as gender and age. The method utilizes the Bayes'
@@ -17,24 +15,13 @@ Description: Predicts individual race/ethnicity using surname, geolocation,
     Registration Records" Political Analysis <DOI:10.1093/pan/mpw001>.
 URL: https://github.com/kosukeimai/wru
 BugReports: https://github.com/kosukeimai/wru/issues
-Depends:
-    R (>= 3.5.0),
-    utils
-Imports:
-    devtools (>= 1.10.0),
-    PL94171,
-    stringr,
-    Rcpp,
-    dplyr (>= 1.0.0)
-LinkingTo: 
-    Rcpp,
-    RcppEigen,
-    RcppProgress
-Suggests:
-    testthat
+Depends: R (>= 3.5.0), utils
+Imports: devtools (>= 1.10.0)
+Suggests: testthat
 LazyLoad: yes
 LazyData: yes
 LazyDataCompression: xz
 License: GPL (>= 3)
-RoxygenNote: 7.1.2
+RoxygenNote: 7.2.0
 Encoding: UTF-8
+NeedsCompilation: no
diff --git a/NAMESPACE b/NAMESPACE
index 542d5c9..f9efb29 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,21 +2,10 @@
 
 export(census_geo_api)
 export(census_helper)
-export(census_helper_new)
-export(co_cluster)
-export(format_legacy_data)
 export(get_census_api)
 export(get_census_api_2)
 export(get_census_data)
-export(merge_names)
 export(merge_surnames)
 export(predict_race)
-export(predict_race_new)
 export(vec_to_chunk)
-import(PL94171)
 import(devtools)
-import(stringr)
-importFrom(Rcpp,evalCpp)
-importFrom(dplyr,.data)
-importFrom(dplyr,`%>%`)
-useDynLib(wru, .registration=TRUE)
diff --git a/R/RcppExports.R b/R/RcppExports.R
deleted file mode 100644
index 96ed265..0000000
--- a/R/RcppExports.R
+++ /dev/null
@@ -1,31 +0,0 @@
-# Generated by using Rcpp::compileAttributes() -> do not edit by hand
-# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
-
-#' Collapsed Gibbs sampler for keyWRU. Internal function
-#'
-#' @param data A list with the following elements
-#' \itemize{
-#'  \item{name_type_n}{Number of name types}
-#'  \item{race_n}{Number of races}
-#'  \item{geo_n}{Number of geolocations}
-#'  \item{geo_race_table}{Matrix of conditional probabilities Pr(Race | Geolocation), with geolocations in the rows}
-#'  \item{voters_per_geo}{Number of voterfile records per geolocation}
-#'  \item{race_inits}{Table of initial race assignments per voterfile record}
-#'  \item{name_data}{
-#'                    \itemize{
-#'                     \item{n_unique_names}{Number of unique names}
-#'                     \item{record_name_id}{Name id corresponding to each voterfile record}
-#'                     \item{keynames}{Integer matrix of name id's used as keynames for each race (race in the columns)}
-#'                     \item{census_table}{Matrix of Pr(Name | Race), with races in the columns}
-#'                     \item{beta_prior}{Scalar prior for name-race symmetric Dirichlet distribution}
-#'                     \item{gamma_prior}{Vector prior shapes for keyname/non-keyname Beta mixture}
-#'                    }
-#'                  }
-#' }
-#' @param ctrl A list of control arguments; see \code{co_cluster} function for details.
-#'
-#' @keywords internal
-keyWRU_fit <- function(data, ctrl) {
-    .Call(`_wru_keyWRU_fit`, data, ctrl)
-}
-
diff --git a/R/census_helper_v2.R b/R/census_helper_v2.R
deleted file mode 100644
index c3adf78..0000000
--- a/R/census_helper_v2.R
+++ /dev/null
@@ -1,137 +0,0 @@
-#' Census helper function.
-#'
-#' \code{census_helper_v2} links user-input dataset with Census geographic data.
-#'
-#' This function allows users to link their geocoded dataset (e.g., voter file) 
-#' with U.S. Census 2010 data. The function extracts Census Summary File data 
-#' at the county, tract, or block level using the 'UScensus2010' package. Census data 
-#' calculated are Pr(Geolocation | Race) where geolocation is county, tract, or block.
-#'
-#' @param key A required character object. Must contain user's Census API
-#'  key, which can be requested \href{https://api.census.gov/data/key_signup.html}{here}.
-#' @param voter.file An object of class \code{data.frame}. Must contain field(s) named 
-#'  \code{\var{county}}, \code{\var{tract}}, \code{\var{block}}, and/or \code{\var{place}}
-#'  specifying geolocation. These should be character variables that match up with 
-#'  U.S. Census categories. County should be three characters (e.g., "031" not "31"), 
-#'  tract should be six characters, and block should be four characters. 
-#'  Place should be five characters if it is included.
-#' @param states A character vector specifying which states to extract 
-#'  Census data for, e.g. \code{c("NJ", "NY")}. Default is \code{"all"}, which extracts 
-#'  Census data for all states contained in user-input data.
-#' @param geo A character object specifying what aggregation level to use. 
-#'  Use \code{"county"}, \code{"tract"}, or \code{"block"}. Default is \code{"tract"}. 
-#'  Warning: extracting block-level data takes very long.
-#' @param age A \code{TRUE}/\code{FALSE} object indicating whether to condition on 
-#'  age or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race).
-#'  If \code{TRUE}, function will return Pr(Geolocation, Age | Race). 
-#'  If \code{\var{sex}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race).
-#' @param sex A \code{TRUE}/\code{FALSE} object indicating whether to condition on 
-#'  sex or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race). 
-#'  If \code{TRUE}, function will return Pr(Geolocation, Sex | Race). 
-#'  If \code{\var{age}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race).
-#' @param census.data A optional census object of class \code{list} containing 
-#' pre-saved Census geographic data. Can be created using \code{get_census_data} function.
-#' If \code{\var{census.data}} is provided, the \code{\var{age}} element must have the same value
-#' as the \code{\var{age}} option specified in this function (i.e., \code{TRUE} in both or 
-#' \code{FALSE} in both). Similarly, the \code{\var{sex}} element in the object provided in 
-#' \code{\var{census.data}} must have the same value as the \code{\var{sex}} option here.
-#' If \code{\var{census.data}} is missing, Census geographic data will be obtained via Census API. 
-#' @param retry The number of retries at the census website if network interruption occurs.
-#' @return Output will be an object of class \code{data.frame}. It will 
-#'  consist of the original user-input data with additional columns of 
-#'  Census data.
-#'
-#' @examples
-#' \dontshow{data(voters)}
-#' \dontrun{census_helper(key = "...", voter.file = voters, states = "nj", geo = "block")}
-#' \dontrun{census_helper(key = "...", voter.file = voters, states = "all", geo = "tract", 
-#' age = TRUE, sex = TRUE)}
-#'
-#' @export
-census_helper_new <- function(key, voter.file, states = "all", geo = "tract", age = FALSE, sex = FALSE, census.data = NA, retry = 0) {
-  
-  if (is.na(census.data) || (typeof(census.data) != "list")) {
-    toDownload = TRUE
-  } else {
-    toDownload = FALSE
-  }
-  
-  if (toDownload) {
-    if (missing(key)) {
-      stop('Must enter U.S. Census API key, which can be requested at https://api.census.gov/data/key_signup.html.')
-    }
-  } 
-  
-  states <- toupper(states)
-  if (states == "ALL") {
-    states <- toupper(as.character(unique(voter.file$state)))
-  }
-  
-  df.out <- NULL
-  
-  for (s in 1:length(states)) {
-    
-    print(paste("State ", s, " of ", length(states), ": ", states[s], sep  = ""))
-    state <- toupper(states[s])
-    
-    if (geo == "place") {
-      geo.merge <- c("place")
-      if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) {
-        census <- census_geo_api(key, state, geo = "place", age, sex, retry)
-      } else {
-        census <- census.data[[toupper(state)]]$place
-      }
-    }
-    
-    if (geo == "county") {
-      geo.merge <- c("county")
-      if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) {
-        census <- census_geo_api(key, state, geo = "county", age, sex, retry)
-      } else {
-        census <- census.data[[toupper(state)]]$county
-      }
-    }
-    
-    if (geo == "tract") {
-      geo.merge <- c("county", "tract")
-      if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) {
-        census <- census_geo_api(key, state, geo = "tract", age, sex, retry)
-      } else {
-        census <- census.data[[toupper(state)]]$tract
-      }
-    }
-    
-    if (geo == "block") {
-      geo.merge <- c("county", "tract", "block")
-      if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) {
-        census <- census_geo_api(key, state, geo = "block", age, sex, retry)
-      } else {
-        census <- census.data[[toupper(state)]]$block
-      }
-    }
-    
-    census$state <- state
-    
-    if (age == F & sex == F) {
-      
-      ## Calculate Pr(Geolocation | Race)
-      geoPopulations <- rowSums(census[,grepl("P00", names(census))])
-      census$r_whi <- census$P005003 / geoPopulations #Pr(White | Geo)
-      census$r_bla <- census$P005004 / geoPopulations #Pr(Black | Geo)
-      census$r_his <- census$P005010 / geoPopulations #Pr(Latino | Geo)
-      census$r_asi <- (census$P005006 + census$P005007) / geoPopulations #Pr(Asian or NH/PI | Geo)
-      census$r_oth <- (census$P005005 + census$P005008 + census$P005009) / geoPopulations #Pr(AI/AN, Other, or Mixed | Geo)
-      
-      drop <- c(grep("state", names(census)), grep("P005", names(census)))
-      voters.census <- merge(voter.file[toupper(voter.file$state) == toupper(states[s]), ], census[, -drop], by = geo.merge, all.x  = T)
-      
-    }
-    
-    keep.vars <- c(names(voter.file)[names(voter.file) != "agecat"], 
-                   paste("r", c("whi", "bla", "his", "asi", "oth"), sep = "_"))
-    df.out <- as.data.frame(rbind(df.out, voters.census[keep.vars]))
-    
-  }
-  
-  return(df.out)
-}
diff --git a/R/co_cluster.R b/R/co_cluster.R
deleted file mode 100644
index d43d642..0000000
--- a/R/co_cluster.R
+++ /dev/null
@@ -1,232 +0,0 @@
-#' Name-Race Co-clustering Using Keynames
-#' 
-#' Estimate a Bayesian mixed-membership, keyword-assisted race classification model for geo-locations
-#' in a voterfile.  
-#'
-#' @param voter.file 	An object of class data.frame. Must contain a row for each individual being predicted, 
-#'                    as well as fields named "surname", and/or"first", and/or "middle", containing each 
-#'                    individual's corresponding names. It must also contain a field named state, which 
-#'                    contains the two-character lower-case abbreviation for each individual's state of residence (e.g., "nj" for New Jersey).
-#'                    If using Census geographic data in race/ethnicity
-#'                    predictions, voter.file must also contain at least one of the following fields: county, tract, block,
-#'                    and/or place. These fields should contain character strings matching U.S. Census categories. County is three
-#'                    characters (e.g., "031" not "31"), tract is six characters, and block is four characters. Place is
-#'                    five characters. See \code{\link{predict_race}} for other optional fields.
-#' @param name_types  Character vector. Must have elements in "surname", "first", and "middle".
-#' @param name_race_tables Named list, with as many elements as there are names in \code{name_types}, and names matching
-#'                         elements in \code{name_types}. Each list element should be a data.frame of unique names (first column) by race (remaining columns),
-#'                         with conditional probabilities p(Name|Race). 
-#' @param census.geo Required character vector. One of "county", "tract", "block" or "place". See \code{\link{predict_race}}.                        
-#' @param ... Arguments passed to \code{\link{predict_race}}.
-#' @param control List of control arguments, including 
-#' \itemize{
-#' \item{race_init}{ Initial race for each observation in voter.file. Must be an integer, with
-#'                  0=white, 1=black, 2=hispanic, 3=asian, and 4=other.}
-#'  \item{fit_insample}{ Boolean. Should model check in-sample fit of race prediction for each
-#'                      record? If \code{TRUE}, \code{race_obs} cannot be \code{NULL}. Defaults to \code{FALSE}.}
-#'  \item{race_obs}{ Observed race for each record in \code{voter.file}. Must be an integer, with
-#'                  0=white, 1=black, 2=hispanic, 3=asian, and 4=other.}
-#'  \item{iter}{ Number of MCMC iterations. Defaults to 1000.}
-#'  \item{burnin}{ Number of iterations discarded as burnin. Defaults to half of \code{iter}.}
-#'  \item{thin}{ Thinning interval for MCMC. Defaults to 1.}
-#'  \item{log_post_interval}{ Interval for storing the log_posterior. Defaults to 10.}
-#'  \item{beta_prior}{ Parameter for symmetric Dirichlet prior over names for each race. Defaults to 5.}
-#'  \item{gamma_prior}{ Parameter for Beta prior over keyname/non-keyname mixture components. Defaults to c(5, 5).}
-#'  \item{verbose}{ Print progress information. Defaults to \code{TRUE}.}
-#'  \item{seed}{ RNG seed. If \code{NULL}, a seed is generated and stored for reproducibility.}
-#' }
-#'
-#'
-#' @return A named list:
-#' \itemize{
-#' \item{name_by_race}{ Named list of predicted distributions of name by race for each name type.}
-#' \item{race_by_record}{ A copy of \code{voter.file}, with additional columns of predicted
-#'                       race probabilities, names \code{pred.<race>}. }
-#' \item{loglik}{ Values of log likelihood, evaluated every \code{log_post_interval}.}
-#' \item{fit_insample}{ When \code{fit_insample=TRUE}, a probability of correct in_sample prediction 
-#'                      for each record in \code{vote.file}.} 
-#' } 
-#' 
-#' 
-#' @export
-#'
-co_cluster <- function(voter.file,
-                       name_types,
-                       name_race_tables,
-                       census.geo,
-                       ...,
-                       control = NULL)
-{
-  ##Data quality checks
-  n_race <- ncol(name_race_tables[[1]])-1
-  stopifnot(all(sapply(name_race_tables, ncol) == n_race+1),
-            all(name_types %in% c("surname","first", "middle")),
-            all(names(name_race_tables) %in% name_types),
-            name_types %in% names(voter.file),
-            census.geo %in% c("county","tract","block","place")
-            )
-  
-  
-  voter.file$state <- toupper(voter.file$state)
-  
-  ## Form control list
-  ctrl <- list(iter = 1000,
-               thin = 1,
-               race_init = NULL,
-               log_post_interval = 10,
-               beta_prior = 0.01,
-               gamma_prior = c(1, 1),
-               verbose = TRUE, 
-               fit_insample = FALSE,
-               race_obs = NULL,
-               max_keynames = 1000,
-               seed = sample(1:1000, 1))
-  ctrl$burnin <- floor(ctrl$iter/2)
-  ctrl[names(control)] <- control
-  
-  ## Set RNG seed
-  set.seed(ctrl$seed)
-  
-  if(ctrl$fit_insample){
-    if(is.null(ctrl$race_obs) | any(is.na(ctrl$race_obs))){
-      stop("If `fit_insample' is TRUE, `race_obs' must be a complete vector of observed races for each record in the voter.file.")
-    }
-  }
-  
-  ## Initial race 
-  race_pred_args <- list(census.surname = TRUE,
-                         surname.only = FALSE,
-                         surname.year = 2010,
-                         census.geo = census.geo,
-                         age = FALSE,
-                         sex = FALSE,
-                         retry = 0)
-  args_usr <- list(...)
-  ## level of geo aggregation
-  geo_id_names <- c("state", switch(race_pred_args$census.geo,
-                                    "county" = c("county"),
-                                    "tract" = c("county","tract"),
-                                    "block" = c("county","tract","block"),
-                                    "place" = c("place"),
-                                    "zip" = c("zip")))
-  
-  race_pred_args[names(args_usr)] <- args_usr
-  if(is.null(race_pred_args$census.data)){
-    if(is.null(race_pred_args$census.key)){
-      stop("Geographic data is required. When `census.data' is NULL, you must provide a census API Key using `census.key' so I can download the required data.")
-    }
-    all_states <- unique(voter.file$state)
-    race_pred_args$census.data <- get_census_data(race_pred_args$census.key, 
-                                   all_states, 
-                                   race_pred_args$age,
-                                   race_pred_args$sex, 
-                                   race_pred_args$census.geo, 
-                                   race_pred_args$retry)
-  }
-  race.suff <- c("whi", "bla", "his", "asi", "oth")
-  if(is.null(ctrl$race_init)){
-    race_pred_args$voter.file <- voter.file
-    race_pred <- do.call(predict_race, race_pred_args)
-    tmp <- base::merge(voter.file, race_pred, sort=FALSE)
-    ctrl$race_init <- apply(tmp[,paste0("pred.",race.suff)], 1, which.max) - 1 
-  }
-  if(any(is.na(ctrl$race_init))){
-    stop("Some initial race values are NA. If you didn't provide initial values, check the results of calling predict_race() on the voter.file you want me to work on.")
-  }
-  geo_id <- do.call(paste, voter.file[,geo_id_names])
-  ctrl$race_init <- split(ctrl$race_init, geo_id)
- 
-  
-  
-  ## P(race | geo)
-  g_r_t <- do.call(rbind, lapply(race_pred_args$census.data, 
-                                 function(x){
-                                   all_names <- names(x[[race_pred_args$census.geo]])
-                                   tmp <- x[[race_pred_args$census.geo]][,c(geo_id_names, grep("P00", all_names, value=TRUE))]
-                                   tmp[,grep("P00", all_names, value=TRUE)] <- proportions(as.matrix(tmp[,grep("P00", all_names, value=TRUE)]), 1)
-                                   tmp$r_whi <- tmp$P005003  #Pr(White|Geo)
-                                   tmp$r_bla <- tmp$P005004  #Pr(Black|Geo)
-                                   tmp$r_his <- tmp$P005010  #Pr(Latino|Geo)
-                                   tmp$r_asi <- (tmp$P005006 + tmp$P005007) #Pr(Asian or NH/PI|Geo)
-                                   tmp$r_oth <- (tmp$P005005 + tmp$P005008 + tmp$P005009)#Pr(AI/AN, Other, or Mixed|Geo)
-                                   return(tmp)
-                                 }))
-  g_r_t_geo <- do.call(paste, g_r_t[,geo_id_names])
-  ##Subset to geo's in vf
-  g_r_t <- g_r_t[g_r_t_geo %in% geo_id, ]
-  if(nrow(g_r_t) != length(unique(geo_id))){
-    stop("Some records in voter.file have unique geographic locations that I wasn't able to find in the census.data. Some records in voter.file may have mis-matched geographic units that do not exist in the census.")
-  }
-  g_r_t_geo_new <- do.call(paste, g_r_t[,geo_id_names])
-  geo_ord <- match(names(ctrl$race_init),g_r_t_geo_new)
-  geo_race_table <- as.matrix(g_r_t[geo_ord,grep("r_", names(g_r_t))])
-  
-  
-  ##Name-specific data
-  name_data <- vector("list", length(name_types))
-  names(name_data) <- name_types 
-  for(ntype in name_types){
-    str_names <- toupper(name_race_tables[[ntype]][,1])
-    proc_names_str <- .name_preproc(voter.file[,ntype], c(str_names))
-    u_obs_names <- unique(proc_names_str)
-    keynames_str_all <- str_names
-    kw_in_ind <- keynames_str_all %in% proc_names_str
-    keynames_str <- keynames_str_all[kw_in_ind]
-    u_kw <- unique(keynames_str)
-    n_u_kw <- length(u_kw)
-    reord <- order(match(u_obs_names, u_kw))
-    u_obs_names <- u_obs_names[reord]
-    n_names <- length(u_obs_names)
-    w_names <- match(proc_names_str, u_obs_names) - 1
-    keynames <- match(keynames_str, table = u_kw) - 1 
-    w_names_list <- split(w_names, geo_id)
-    phi_tilde <- array(0.0, c(n_u_kw, n_race))
-    for(x in 1:n_race){
-      phi_tilde[, x] <- proportions(name_race_tables[[ntype]][which(kw_in_ind),x+1]) 
-    }
-    
-    colnames(phi_tilde) <- paste("p", race.suff, sep="_")
-    name_data[[ntype]] <- list(n_unique_names = n_names,
-                               record_name_id = w_names_list,
-                               largest_keyword = as.integer(max(keynames)),
-                               census_table = phi_tilde,
-                               beta_prior = ctrl$beta_prior,
-                               gamma_prior = ctrl$gamma_prior,
-                               u_obs_names = u_obs_names)
-  }
-  
-  ## Create data for keyWRU
-  data_list <- list(name_type_n = length(name_types),
-                    race_n = ncol(name_race_tables[[1]][,-1]),
-                    geo_n = length(unique(geo_id)),
-                    geo_race_table = geo_race_table,
-                    voters_per_geo = sapply(split(voter.file, geo_id), nrow), 
-                    race_inits = ctrl$race_init,
-                    race_obs = if(ctrl$fit_insample){split(ctrl$race_obs, geo_id)} else {list()},
-                    name_data = name_data)
-  
-  full_res <- keyWRU_fit(data_list, ctrl)
-  pred_list <- full_res$phi
-  res <- lapply(name_types, 
-                function(ntype){
-                  colnames(pred_list[[ntype]]) <- paste0("pred.",race.suff)
-                  pred_list[[ntype]] <- as.data.frame(pred_list[[ntype]])
-                  pred_list[[ntype]] <- cbind(name_data[[ntype]]$u_obs_names,
-                                              pred_list[[ntype]])
-                  names(pred_list[[ntype]])[1] <- ntype
-                  return(pred_list[[ntype]])
-                })
-  v.file.s <- split(voter.file, geo_id)
-  race_samp <- cbind(do.call(rbind, v.file.s),
-                     proportions(as.matrix(do.call(rbind, full_res$predict_race)), 1))
-  names(race_samp) <- c(names(voter.file), paste0("pred.",race.suff))
-  names(res) <- names(pred_list)
-  ret_obj <- list()
-  ret_obj$name_by_race <- res
-  ret_obj$race_by_record <- race_samp
-  ret_obj$loglik <- full_res$ll
-  if(ctrl$fit_insample){
-    ret_obj$fit_insample <- do.call(c,full_res$r_insample)/(ctrl$iter - ctrl$burnin)
-  }
-  return(ret_obj)
-}
\ No newline at end of file
diff --git a/R/data.R b/R/data.R
deleted file mode 100644
index 0b6034d..0000000
--- a/R/data.R
+++ /dev/null
@@ -1,59 +0,0 @@
-#' Dictionary of First Names
-#'
-#' An example dataset containing voter file information.
-#'
-#' @format A data frame with 1,043,742 rows and 6 columns:
-#' \describe{
-#'   \item{first_name}{Voter identifier (numeric)}
-#'   \item{p_whi_first}{Probability of white}
-#'   \item{p_bla_first}{Probability of black}
-#'   \item{p_his_first}{Probability of Hispanic}
-#'   \item{p_asi_first}{Probability of asian},
-#'   \item{p_oth_first}{Probability of other}
-#'   }
-#'
-#' @keywords datasets
-#' @name firstNameDict
-#' @examples
-#' data(firstNameDict)
-NULL
-
-#' Dictionary of Last Names
-#'
-#' An example dataset containing voter file information.
-#'
-#' @format A data frame with 1,502,541 rows and 6 columns:
-#' \describe{
-#'   \item{last_name}{Voter identifier (numeric)}
-#'   \item{p_whi_last}{Probability of white}
-#'   \item{p_bla_last}{Probability of black}
-#'   \item{p_his_last}{Probability of Hispanic}
-#'   \item{p_asi_last}{Probability of asian},
-#'   \item{p_oth_last}{Probability of other}
-#'   }
-#'
-#' @keywords datasets
-#' @name lastNameDict
-#' @examples
-#' data(lastNameDict)
-NULL
-
-#' Dictionary of Middle Names
-#'
-#' An example dataset containing voter file information.
-#'
-#' @format A data frame with 1,182,133 rows and 6 columns:
-#' \describe{
-#'   \item{middle_name}{Voter identifier (numeric)}
-#'   \item{p_whi_middle}{Probability of white}
-#'   \item{p_bla_middle}{Probability of black}
-#'   \item{p_his_middle}{Probability of Hispanic}
-#'   \item{p_asi_middle}{Probability of asian},
-#'   \item{p_oth_middle}{Probability of other}
-#'   }
-#'
-#' @keywords datasets
-#' @name middleNameDict
-#' @examples
-#' data(middleNameDict)
-NULL
diff --git a/R/format_legacy_data.R b/R/format_legacy_data.R
deleted file mode 100644
index abbba84..0000000
--- a/R/format_legacy_data.R
+++ /dev/null
@@ -1,78 +0,0 @@
-#' Legacy data formatting function.
-#'
-#' \code{format_legacy_data} formats legacy data from the U.S. census to allow
-#' for Bayesian name geocoding. 
-#'
-#' This function allows users to construct datasets for analysis using the census legacy data format. 
-#' These data are available for the 2020 census at 
-#' https://www2.census.gov/programs-surveys/decennial/2020/data/01-Redistricting_File--PL_94-171/. 
-#' This function returns data structured analogously to data from the Census API, which is not yet 
-#' available for the 2020 Census as of September 2021. 
-#'
-#' @param legacyFilePath A character vector giving the location of a legacy census data folder,
-#' sourced from https://www2.census.gov/programs-surveys/decennial/2020/data/01-Redistricting_File--PL_94-171/. 
-#' These file names should end in ".pl". 
-#' @param outFile Optional character vector determining whether the formatted RData object should be saved. The
-#' filepath should end in ".RData". 
-#'
-#' @import PL94171
-#' @importFrom dplyr `%>%` .data
-#' 
-#' @examples
-#' \dontrun{
-#' gaCensusData <- format_legacy_data(PL94171::pl_url('GA', 2020))
-#' predict_race_new(ga.voter.file, namesToUse = 'last, first, mid', census.geo = 'block',
-#'      census.data = gaCensusData)
-#' }
-#'
-#' @export
-format_legacy_data <- function(legacyFilePath, outFile = NULL) {
-  
-  # aggregation levels to convert (county, tract, block group, and block)
-  summaryLevels <- c('050', '140', '150', '750')
-  
-  # read in the data
-  pl <- PL94171::pl_read(legacyFilePath)
-  pl <- PL94171::pl_select_standard(pl) 
-  
-  # iterate through the levels 
-  censusData.2020 <- lapply(summaryLevels, FUN = function(level) {
-    levelData <- PL94171::pl_subset(pl, level)
-    
-    # construct the base data frame
-    df <- levelData %>% 
-      dplyr::select(GEOID = .data$GEOID, 
-                    state = toupper(.data$state), 
-                    county = .data$county,
-                    P005003 = .data$pop_white, 
-                    P005004 = .data$pop_black, 
-                    P005010 = .data$pop_hisp,
-                    P005006 = .data$pop_asian,
-                    P005007 = .data$pop_nhpi,
-                    P005005 = .data$pop_aian, 
-                    P005008 = .data$pop_other, 
-                    P005009 = .data$pop_two
-      )
-    
-    # add geographic levels
-    if(level != '050') {
-      df <- df %>% dplyr::mutate(tract = substr(.data$GEOID, nchar(.data$GEOID) - 5, nchar(.data$GEOID)))
-      if(level != '140') {
-        df <- df %>% dplyr::mutate(blockGroup = substr(.data$GEOID, nchar(.data$GEOID), nchar(.data$GEOID)))
-        if(level != '150') {
-          df <- df %>% dplyr::mutate(block = substr(.data$GEOID, nchar(.data$GEOID) - 2, nchar(.data$GEOID)))
-        }
-      }
-    }
-    
-    df
-  })
-  
-  # format and optionally save the file 
-  names(censusData.2020) <- c('county', 'tract', 'blockGroup', 'block')
-  if(!is.null(outFile))
-    save(censusData.2020, file = outFile)
-  
-  # return the object
-  return(censusData.2020)
-}
diff --git a/R/get_census_data.R b/R/get_census_data.R
index 2c6d49f..e647539 100644
--- a/R/get_census_data.R
+++ b/R/get_census_data.R
@@ -56,4 +56,4 @@ get_census_data <- function(key, states, age = FALSE, sex = FALSE, census.geo =
     }
   }
   return(CensusObj)
-}
\ No newline at end of file
+}
diff --git a/R/merge_names.R b/R/merge_names.R
deleted file mode 100644
index 2aed0a5..0000000
--- a/R/merge_names.R
+++ /dev/null
@@ -1,240 +0,0 @@
-#' Surname probability merging function.
-#'
-#' \code{merge_names} merges names in a user-input dataset with corresponding
-#'  race/ethnicity probabilities derived from both the U.S. Census Surname List
-#'  and Spanish Surname List and voter files from states in the Southern U.S.
-#'
-#' This function allows users to match names in their dataset with database entries
-#'  estimating P(name | ethnicity) for each of the five major racial groups for each
-#'  name. The database probabilities are derived from both the U.S. Census Surname List
-#'  and Spanish Surname List and voter files from states in the Southern U.S.
-#'
-#'  By default, the function matches names as follows:
-#'  1) Search raw surnames in the database;
-#'  2) Remove any punctuation and search again;
-#'  3) Remove any spaces and search again;
-#'  4) Remove suffixes (e.g., "Jr") and search again (last names only)
-#'  5) Split double-barreled names into two parts and search first part of name;
-#'  6) Split double-barreled names into two parts and search second part of name;
-#'
-#'  Each step only applies to names not matched in a previous step.
-#'  Steps 2 through 6 are not applied if \code{clean.surname} is FALSE.
-#'
-#'  Note: Any name appearing only on the Spanish Surname List is assigned a
-#'  probability of 1 for Hispanics/Latinos and 0 for all other racial groups.
-#'
-#' @param voter.file An object of class \code{data.frame}.  Must contain a row for each individual being predicted,
-#' as well as a field named \code{\var{last}} containing each individual's surname.
-#' If first name is also being used for prediction, the file must also contain a field
-#' named \code{\var{first}}. If middle name is also being used for prediction, the field
-#' must also contain a field named \code{\var{middle}}.
-#' @param namesToUse A character vector identifying which names to use for the prediction.
-#' The default value is \code{"last"}, indicating that only the last name will be used.
-#' Other options are \code{"last, first"}, indicating that both last and first names will be
-#' used, and \code{"last, first, middle"}, indicating that last, first, and middle names will all
-#' be used.
-#' @param clean.names A \code{TRUE}/\code{FALSE} object. If \code{TRUE},
-#' any surnames in \code{\var{voter.file}} that cannot initially be matched
-#' to the database will be cleaned, according to U.S. Census specifications,
-#' in order to increase the chance of finding a match. Default is \code{TRUE}.
-#' @return Output will be an object of class \code{data.frame}. It will
-#'  consist of the original user-input data with additional columns that
-#'  specify the part of the name matched with Census data (\code{\var{surname.match}}),
-#'  and the probabilities Pr(Race | Surname) for each racial group
-#'  (\code{\var{p_whi}} for White, \code{\var{p_bla}} for Black,
-#'  \code{\var{p_his}} for Hispanic/Latino,
-#'  \code{\var{p_asi}} for Asian and Pacific Islander, and
-#'  \code{\var{p_oth}} for Other/Mixed).
-#'
-#' @import devtools
-#' @import stringr
-#'
-#' @examples
-#' \donttest{
-#' data(voters)
-#' merge_names(voters, 'last, first')
-#'}
-#' @export
-merge_names <- function(voter.file, namesToUse, clean.names = TRUE) {
-
-  # check the names
-  if(namesToUse == 'last') {
-    if(!("last" %in% names(voter.file)))
-      stop("Voter data frame needs to have a column named 'last'.")
-
-  } else if(namesToUse == 'last, first') {
-    if(!("last" %in% names(voter.file)) || !("first" %in% names(voter.file)))
-      stop("Voter data frame needs to have a column named 'last' and a column called 'first'.")
-
-  } else if(namesToUse == 'last, first, middle') {
-    if(!("last" %in% names(voter.file)) || !("first" %in% names(voter.file))
-       || !("middle" %in% names(voter.file)))
-      stop("Voter data frame needs to have a column named 'last', a column called 'first', and a column called 'middle'.")
-  }
-
-  # read in the name files and cast NA to the null string
-  firstNameDict[is.na(firstNameDict$first_name),]$first_name <- ''
-  middleNameDict[is.na(middleNameDict$middle_name),]$middle_name <- ''
-  lastNameDict[is.na(lastNameDict$last_name),]$last_name <- ''
-
-  nameDict <- list('first' = firstNameDict,
-                   'middle' = middleNameDict,
-                   'last' = lastNameDict)
-
-  ## Convert names in voter file to upper case
-  p_eth <- c("p_whi", "p_bla", "p_his", "p_asi", "p_oth")
-  df <- voter.file
-  df$caseid <- 1:nrow(df)
-
-  df$lastname.match <- df$lastname.upper <- toupper(as.character(df$last))
-  if(grepl('first', namesToUse))
-    df$firstname.match <- df$firstname.upper <- toupper(as.character(df$first))
-  if(grepl('middle', namesToUse)) {
-    df$middlename.match <- df$middlename.upper <- toupper(as.character(df$middle))
-    df$middlename.match[is.na(df$middlename.match)] <- ''
-  }
-
-  ## Merge Surnames with Census List (No Cleaning Yet)
-  df <- merge(df, lastNameDict, by.x = "lastname.match", by.y = "last_name", all.x = TRUE)
-  if(grepl('first', namesToUse))
-    df <- merge(df, firstNameDict, by.x = "firstname.match", by.y = "first_name", all.x = TRUE)
-  if(grepl('middle', namesToUse)) {
-    df <- merge(df, middleNameDict, by.x = "middlename.match", by.y = "middle_name", all.x = TRUE)
-  }
-
-  if(namesToUse == 'last' && sum(!(df$lastname.upper %in% lastNameDict$last_name)) == 0)
-     return(df[order(df$caseid), c(names(voter.file), "lastname.match", p_eth)])
-  if(namesToUse == 'last, first' && sum(!(df$lastname.match %in% lastNameDict$last_name)) == 0 &&
-      sum(!(df$firstname.upper %in% firstNameDict$first_name)) == 0)
-    return(df[order(df$caseid), c(names(voter.file), "lastname.match", "firstname.match", p_eth)])
-  if(namesToUse == 'last, first, middle' && sum(!(df$lastname.match %in% lastNameDict$last_name)) == 0 &&
-      sum(!(df$firstname.upper %in% firstNameDict$first_name)) == 0 && sum(!(df$middlename.upper %in% middleNameDict$middle_name)) == 0)
-    return(df[order(df$caseid), c(names(voter.file), "lastname.match", "firstname.match", "middlename.match", p_eth)])
-
-  ## Clean names (if specified by user)
-  if(clean.names) {
-
-    for(nameType in str_split(namesToUse, ', ')[[1]]) {
-
-      df1 <- df[!is.na(df[,paste('p_whi_', nameType, sep = '')]), ] #Matched names
-      df2 <- df[is.na(df[,paste('p_whi_', nameType, sep = '')]), ] #Unmatched names
-
-      ## Remove All Punctuation and Try Merge Again
-      if(nrow(df2) > 0) {
-        df2[,paste(nameType, "name.match", sep = "")] <- gsub("[^[:alnum:] ]", "", df2[,paste(nameType, "name.upper", sep = "")])
-
-        df2 <- merge(df2[,!grepl(paste('_', nameType, sep = ''), names(df2))], nameDict[[nameType]], all.x = TRUE,
-                     by.x = paste(nameType, "name.match", sep = ""), by.y = paste(nameType, "name", sep = '_'))
-        df2 <- df2[,names(df1)] # reorder the columns
-
-        if (sum(!is.na(df2[,paste('p_whi_', nameType, sep = ''),])) > 0) {
-          df1 <- rbind(df1, df2[!is.na(df2[,paste('p_whi_', nameType, sep = ''),]), ])
-          df2 <- df2[is.na(df2[,paste('p_whi_', nameType, sep = '')]), ]
-        }
-      }
-
-      ## Remove All Spaces and Try Merge Again
-      if(nrow(df2) > 0) {
-        df2[,paste(nameType, "name.match", sep = "")] <- gsub(" ", "", df2[,paste(nameType, "name.match", sep = "")])
-        df2 <- merge(df2[,!grepl(paste('_', nameType, sep = ''), names(df2))], nameDict[[nameType]], all.x = TRUE,
-                     by.x = paste(nameType, "name.match", sep = ""), by.y = paste(nameType, "name", sep = '_'))
-        df2 <- df2[,names(df1)] # reorder the columns
-
-        if (sum(!is.na(df2[,paste('p_whi_', nameType, sep = ''),])) > 0) {
-          df1 <- rbind(df1, df2[!is.na(df2[,paste('p_whi_', nameType, sep = ''),]), ])
-          df2 <- df2[is.na(df2[,paste('p_whi_', nameType, sep = '')]), ]
-        }
-      }
-
-      # Edits specific to common issues with last names
-      if(nameType == 'last' & nrow(df2) > 0) {
-
-        ## Remove Jr/Sr/III Suffixes for last names
-        suffix <- c("JUNIOR", "SENIOR", "THIRD", "III", "JR", " II", " J R", " S R", " IV")
-        for (i in 1:length(suffix)) {
-          df2$lastname.match <- ifelse(substr(df2$lastname.match, nchar(df2$lastname.match) - (nchar(suffix)[i] - 1), nchar(df2$lastname.match)) == suffix[i],
-                                       substr(df2$lastname.match, 1, nchar(df2$lastname.match) - nchar(suffix)[i]),
-                                       df2$lastname.match)
-        }
-        df2$lastname.match <- ifelse(nchar(df2$lastname.match) >= 7,
-                                     ifelse(substr(df2$lastname.match, nchar(df2$lastname.match) - 1, nchar(df2$lastname.match)) == "SR",
-                                            substr(df2$lastname.match, 1, nchar(df2$lastname.match) - 2),
-                                            df2$lastname.match),
-                                     df2$lastname.match) #Remove "SR" only if name has at least 7 characters
-
-        df2 <- merge(df2[,!grepl(paste('_', nameType, sep = ''), names(df2))], lastNameDict, by.x = "lastname.match", by.y = "last_name", all.x = TRUE)
-        df2 <- df2[,names(df1)] # reorder the columns
-
-        if (sum(!is.na(df2[,paste('p_whi_', nameType, sep = ''),])) > 0) {
-          df1 <- rbind(df1, df2[!is.na(df2[,paste('p_whi_', nameType, sep = ''),]), ])
-          df2 <- df2[is.na(df2[,paste('p_whi_', nameType, sep = '')]), ]
-        }
-      }
-
-
-      ## Names with Hyphens or Spaces, e.g. Double-Barreled Names
-      if(nrow(df2) > 0) {
-        df2$name2 <- df2$name1 <- NA
-        df2$name1[grep("-", df2[,paste(nameType, "name.upper", sep = "")])] <- sapply(strsplit(grep("-", df2[,paste(nameType, "name.upper", sep = "")], value = T), "-"), "[", 1)
-        df2$name2[grep("-", df2[,paste(nameType, "name.upper", sep = "")])] <- sapply(strsplit(grep("-", df2[,paste(nameType, "name.upper", sep = "")], value = T), "-"), "[", 2)
-        df2$name1[grep(" ", df2[,paste(nameType, "name.upper", sep = "")])] <- sapply(strsplit(grep(" ", df2[,paste(nameType, "name.upper", sep = "")], value = T), " "), "[", 1)
-        df2$name2[grep(" ", df2[,paste(nameType, "name.upper", sep = "")])] <- sapply(strsplit(grep(" ", df2[,paste(nameType, "name.upper", sep = "")], value = T), " "), "[", 2)
-
-        ## Use first half of name to merge in priors
-        df2[,paste(nameType, "name.match", sep = "")]  <- as.character(df2$name1)
-        df2 <- merge(df2[,!grepl(paste('_', nameType, sep = ''), names(df2))], nameDict[[nameType]], all.x = TRUE,
-                     by.x = paste(nameType, "name.match", sep = ""), by.y = paste(nameType, "name", sep = '_'))
-        df2 <- df2[,c(names(df1), "name1", "name2")] # reorder the columns
-
-        if (sum(!is.na(df2[,paste('p_whi_', nameType, sep = ''),])) > 0) {
-          df1 <- rbind(df1, df2[!is.na(df2[,paste('p_whi_', nameType, sep = '')]), !(names(df2) %in% c("name1", "name2"))])
-          df2 <- df2[is.na(df2[,paste('p_whi_', nameType, sep = '')]), ]
-        }
-      }
-
-      ## Use second half of name to merge in priors for rest
-      if(nrow(df2) > 0) {
-        df2[,paste(nameType, "name.match", sep = "")]  <- as.character(df2$name2)
-        df2 <- merge(df2[,!grepl(paste('_', nameType, sep = ''), names(df2))], nameDict[[nameType]], all.x = TRUE,
-                     by.x = paste(nameType, "name.match", sep = ""), by.y = paste(nameType, "name", sep = '_'))
-        df2 <- df2[,c(names(df1), "name1", "name2")] # reorder the columns
-
-        if (sum(!is.na(df2[,paste('p_whi_', nameType, sep = ''),])) > 0) {
-          df1 <- rbind(df1, df2[!is.na(df2[,paste('p_whi_', nameType, sep = '')]), !(names(df2) %in% c("name1", "name2"))])
-          df2 <- df2[is.na(df2[,paste('p_whi_', nameType, sep = '')]), ]
-        }
-      }
-
-      if(nrow(df2) > 0)
-        df <- rbind(df1, df2[, !(names(df2) %in% c("name1", "name2"))])
-      else
-        df <- df1
-      df <- df[order(df$caseid),]
-    }
-  }
-
-
-  ## For unmatched names, just fill with a 1
-  warning(paste(paste(sum(is.na(df$p_whi_last)), " (", round(100*mean(is.na(df$p_whi_last)), 1), "%) indivduals' last names were not matched.", sep = "")))
-  if(grepl('first', namesToUse)) {
-    warning(paste(paste(sum(is.na(df$p_whi_first)), " (", round(100*mean(is.na(df$p_whi_first)), 1), "%) indivduals' first names were not matched.", sep = "")))
-  }
-  if(grepl('middle', namesToUse)) {
-    warning(paste(paste(sum(is.na(df$p_whi_middle)), " (", round(100*mean(is.na(df$p_whi_middle)), 1), "%) indivduals' middle names were not matched.", sep = "")))
-  }
-
-  for(i in grep("p_", names(df))) {
-    df[,i] <- dplyr::coalesce(df[,i], 1)
-  }
-
-  # return the data
-  if(namesToUse == 'last')
-    return(df[order(df$caseid), c(names(voter.file), "lastname.match", paste(p_eth, "last", sep = "_"))])
-  else if(namesToUse == 'last, first')
-    return(df[order(df$caseid), c(names(voter.file), "lastname.match", "firstname.match",
-                                  paste(p_eth, "last", sep = "_"), paste(p_eth, "first", sep = "_"))])
-  else if(namesToUse == 'last, first, middle')
-    return(df[order(df$caseid), c(names(voter.file), "lastname.match", "firstname.match", "middlename.match",
-                                  paste(p_eth, "last", sep = "_"), paste(p_eth, "first", sep = "_"), paste(p_eth, "middle", sep = "_"))])
-
-}
diff --git a/R/merge_surnames.R b/R/merge_surnames.R
index e71dc29..cd3c5d6 100644
--- a/R/merge_surnames.R
+++ b/R/merge_surnames.R
@@ -1,48 +1,48 @@
 #' Surname probability merging function.
 #'
-#' \code{merge_surnames} merges surnames in user-input dataset with corresponding
+#' \code{merge_surnames} merges surnames in user-input dataset with corresponding 
 #'  race/ethnicity probabilities from U.S. Census Surname List and Spanish Surname List.
 #'
-#' This function allows users to match surnames in their dataset with the U.S.
-#'  Census Surname List (from 2000 or 2010) and Spanish Surname List to obtain
+#' This function allows users to match surnames in their dataset with the U.S. 
+#'  Census Surname List (from 2000 or 2010) and Spanish Surname List to obtain 
 #'  Pr(Race | Surname) for each of the five major racial groups.
-#'
-#'  By default, the function matches surnames to the Census list as follows:
-#'  1) Search raw surnames in Census surname list;
-#'  2) Remove any punctuation and search again;
-#'  3) Remove any spaces and search again;
-#'  4) Remove suffixes (e.g., Jr) and search again;
-#'  5) Split double-barreled surnames into two parts and search first part of name;
-#'  6) Split double-barreled surnames into two parts and search second part of name;
-#'  7) For any remaining names, impute probabilities using distribution
+#'  
+#'  By default, the function matches surnames to the Census list as follows: 
+#'  1) Search raw surnames in Census surname list; 
+#'  2) Remove any punctuation and search again; 
+#'  3) Remove any spaces and search again; 
+#'  4) Remove suffixes (e.g., Jr) and search again; 
+#'  5) Split double-barreled surnames into two parts and search first part of name; 
+#'  6) Split double-barreled surnames into two parts and search second part of name; 
+#'  7) For any remaining names, impute probabilities using distribution 
 #'  for all names not appearing on Census list.
-#'
-#'  Each step only applies to surnames not matched in a previous ste.
+#'  
+#'  Each step only applies to surnames not matched in a previous ste. 
 #'  Steps 2 through 7 are not applied if \code{clean.surname} is FALSE.
-#'
-#'  Note: Any name appearing only on the Spanish Surname List is assigned a
+#'  
+#'  Note: Any name appearing only on the Spanish Surname List is assigned a 
 #'  probability of 1 for Hispanics/Latinos and 0 for all other racial groups.
 #'
-#' @param voter.file An object of class \code{data.frame}. Must contain a field
+#' @param voter.file An object of class \code{data.frame}. Must contain a field 
 #'  named 'surname' containing list of surnames to be merged with Census lists.
-#' @param surname.year An object of class \code{numeric} indicating which year
-#'  Census Surname List is from. Accepted values are \code{2010} and \code{2000}.
+#' @param surname.year An object of class \code{numeric} indicating which year 
+#'  Census Surname List is from. Accepted values are \code{2010} and \code{2000}. 
 #'  Default is \code{2010}.
-#' @param clean.surname A \code{TRUE}/\code{FALSE} object. If \code{TRUE},
-#' any surnames in \code{\var{voter.file}} that cannot initially be matched
-#' to surname lists will be cleaned, according to U.S. Census specifications,
+#' @param clean.surname A \code{TRUE}/\code{FALSE} object. If \code{TRUE}, 
+#' any surnames in \code{\var{voter.file}} that cannot initially be matched 
+#' to surname lists will be cleaned, according to U.S. Census specifications, 
 #' in order to increase the chance of finding a match. Default is \code{TRUE}.
-#' @param impute.missing A \code{TRUE}/\code{FALSE} object. If \code{TRUE},
-#' race/ethnicity probabilities will be imputed for unmatched names using
+#' @param impute.missing A \code{TRUE}/\code{FALSE} object. If \code{TRUE}, 
+#' race/ethnicity probabilities will be imputed for unmatched names using  
 #' race/ethnicity distribution for all other names (i.e., not on Census List).
 #' Default is \code{TRUE}.
-#' @return Output will be an object of class \code{data.frame}. It will
-#'  consist of the original user-input data with additional columns that
-#'  specify the part of the name matched with Census data (\code{\var{surname.match}}),
-#'  and the probabilities Pr(Race | Surname) for each racial group
-#'  (\code{\var{p_whi}} for White, \code{\var{p_bla}} for Black,
-#'  \code{\var{p_his}} for Hispanic/Latino,
-#'  \code{\var{p_asi}} for Asian and Pacific Islander, and
+#' @return Output will be an object of class \code{data.frame}. It will 
+#'  consist of the original user-input data with additional columns that 
+#'  specify the part of the name matched with Census data (\code{\var{surname.match}}), 
+#'  and the probabilities Pr(Race | Surname) for each racial group 
+#'  (\code{\var{p_whi}} for White, \code{\var{p_bla}} for Black, 
+#'  \code{\var{p_his}} for Hispanic/Latino, 
+#'  \code{\var{p_asi}} for Asian and Pacific Islander, and 
 #'  \code{\var{p_oth}} for Other/Mixed).
 #'
 #' @import devtools
@@ -57,7 +57,7 @@ merge_surnames <- function(voter.file, surname.year = 2010, clean.surname = T, i
   if ("surname" %in% names(voter.file) == F) {
     stop('Data does not contain surname field.')
   }
-
+  
   ## Census Surname List
   if (surname.year == 2000) {
     surnames2000$surname <- as.character(surnames2000$surname)
@@ -66,10 +66,10 @@ merge_surnames <- function(voter.file, surname.year = 2010, clean.surname = T, i
     surnames2010$surname <- as.character(surnames2010$surname)
     surnames <- surnames2010
   }
-
+  
   p_eth <- c("p_whi", "p_bla", "p_his", "p_asi", "p_oth")
-
-  ## Convert Surnames in Voter File to Upper Case
+  
+  ## Convert Surnames in Voter File to Upper Case 
   df <- voter.file
   df$caseid <- 1:nrow(df)
   df$surname.match <- df$surname.upper <- toupper(as.character(df$surname))
@@ -82,13 +82,13 @@ merge_surnames <- function(voter.file, surname.year = 2010, clean.surname = T, i
   }
 
   df[df$surname.upper %in% surnames$surname == F, ]$surname.match <- ""
-
+  
   df1 <- df[df$surname.upper %in% surnames$surname, ] #Matched surnames
   df2 <- df[df$surname.upper %in% surnames$surname == F, ] #Unmatched surnames
-
+  
   ## Clean Surnames (if Specified by User)
   if (clean.surname) {
-
+    
     ## Remove All Punctuation and Try Merge Again
     df2$surname.match <- gsub("[^[:alnum:] ]", "", df2$surname.upper)
     df2 <- merge(df2[names(df2) %in% p_eth == F], surnames[c("surname", p_eth)], by.x = "surname.match", by.y = "surname", all.x = TRUE)
@@ -110,14 +110,14 @@ merge_surnames <- function(voter.file, surname.year = 2010, clean.surname = T, i
     ## Remove Jr/Sr/III Suffixes
     suffix <- c("JUNIOR", "SENIOR", "THIRD", "III", "JR", " II", " J R", " S R", " IV")
     for (i in 1:length(suffix)) {
-      df2$surname.match <- ifelse(substr(df2$surname.match, nchar(df2$surname.match) - (nchar(suffix)[i] - 1), nchar(df2$surname.match)) == suffix[i],
-                                  substr(df2$surname.match, 1, nchar(df2$surname.match) - nchar(suffix)[i]),
+      df2$surname.match <- ifelse(substr(df2$surname.match, nchar(df2$surname.match) - (nchar(suffix)[i] - 1), nchar(df2$surname.match)) == suffix[i], 
+                                  substr(df2$surname.match, 1, nchar(df2$surname.match) - nchar(suffix)[i]), 
                                   df2$surname.match)
     }
-    df2$surname.match <- ifelse(nchar(df2$surname.match) >= 7,
-                                ifelse(substr(df2$surname.match, nchar(df2$surname.match) - 1, nchar(df2$surname.match)) == "SR",
-                                       substr(df2$surname.match, 1, nchar(df2$surname.match) - 2),
-                                       df2$surname.match),
+    df2$surname.match <- ifelse(nchar(df2$surname.match) >= 7, 
+                                ifelse(substr(df2$surname.match, nchar(df2$surname.match) - 1, nchar(df2$surname.match)) == "SR", 
+                                       substr(df2$surname.match, 1, nchar(df2$surname.match) - 2), 
+                                       df2$surname.match), 
                                 df2$surname.match) #Remove "SR" only if name has at least 7 characters
     df2 <- merge(df2[names(df2) %in% p_eth == F], surnames[c("surname", p_eth)], by.x = "surname.match", by.y = "surname", all.x = TRUE)
     if (nrow(df2[df2$surname.match %in% surnames$surname, ]) > 0) {
@@ -127,30 +127,28 @@ merge_surnames <- function(voter.file, surname.year = 2010, clean.surname = T, i
     }
 
     ## Names with Hyphens or Spaces, e.g. Double-Barreled Names
-    if (nrow(df2) > 0) {
-      df2$surname2 <- df2$surname1 <- NA
-      df2$surname1[grep("-", df2$surname.upper)] <- sapply(strsplit(grep("-", df2$surname.upper, value = T), "-"), "[", 1)
-      df2$surname2[grep("-", df2$surname.upper)] <- sapply(strsplit(grep("-", df2$surname.upper, value = T), "-"), "[", 2)
-      df2$surname1[grep(" ", df2$surname.upper)] <- sapply(strsplit(grep(" ", df2$surname.upper, value = T), " "), "[", 1)
-      df2$surname2[grep(" ", df2$surname.upper)] <- sapply(strsplit(grep(" ", df2$surname.upper, value = T), " "), "[", 2)
-  
-      ## Use first half of name to merge in priors
-      df2$surname.match <- as.character(df2$surname1)
-      df2 <- merge(df2[names(df2) %in% c(p_eth) == F], surnames[c("surname", p_eth)], by.x = "surname.match", by.y = "surname", all.x = TRUE)[names(df2)]
-      if (nrow(df2[df2$surname.match %in% surnames$surname, ]) > 0) {
-        df1 <- rbind(df1, df2[df2$surname.match %in% surnames$surname, names(df2) %in% names(df1)])
-        df2 <- df2[df2$surname.match %in% surnames$surname == F, ]
-        if (nrow(df2[df2$surname.match %in% surnames$surname, ]) > 0) {df2$surname.match <- ""}
-      }
-  
-      ## Use second half of name to merge in priors for rest
-      df2$surname.match <- as.character(df2$surname2)
-      df2 <- merge(df2[names(df2) %in% c(p_eth, "surname1", "surname2") == F], surnames[c("surname", p_eth)], by.x = "surname.match", by.y = "surname", all.x = TRUE)[names(df2) %in% c("surname1", "surname2") == F]
-      if (nrow(df2[df2$surname.match %in% surnames$surname, ]) > 0) {
-        df1 <- rbind(df1, df2[df2$surname.match %in% surnames$surname, names(df2) %in% names(df1)])
-        df2 <- df2[df2$surname.match %in% surnames$surname == F, ]
-        if (nrow(df2[df2$surname.match %in% surnames$surname, ]) > 0) {df2$surname.match <- ""}
-      }
+    df2$surname2 <- df2$surname1 <- NA
+    df2$surname1[grep("-", df2$surname.upper)] <- sapply(strsplit(grep("-", df2$surname.upper, value = T), "-"), "[", 1)
+    df2$surname2[grep("-", df2$surname.upper)] <- sapply(strsplit(grep("-", df2$surname.upper, value = T), "-"), "[", 2)  
+    df2$surname1[grep(" ", df2$surname.upper)] <- sapply(strsplit(grep(" ", df2$surname.upper, value = T), " "), "[", 1)
+    df2$surname2[grep(" ", df2$surname.upper)] <- sapply(strsplit(grep(" ", df2$surname.upper, value = T), " "), "[", 2)
+    
+    ## Use first half of name to merge in priors
+    df2$surname.match <- as.character(df2$surname1)
+    df2 <- merge(df2[names(df2) %in% c(p_eth) == F], surnames[c("surname", p_eth)], by.x = "surname.match", by.y = "surname", all.x = TRUE)[names(df2)]
+    if (nrow(df2[df2$surname.match %in% surnames$surname, ]) > 0) {
+      df1 <- rbind(df1, df2[df2$surname.match %in% surnames$surname, names(df2) %in% names(df1)])
+      df2 <- df2[df2$surname.match %in% surnames$surname == F, ]
+      if (nrow(df2[df2$surname.match %in% surnames$surname, ]) > 0) {df2$surname.match <- ""}
+    }
+    
+    ## Use second half of name to merge in priors for rest
+    df2$surname.match <- as.character(df2$surname2)
+    df2 <- merge(df2[names(df2) %in% c(p_eth, "surname1", "surname2") == F], surnames[c("surname", p_eth)], by.x = "surname.match", by.y = "surname", all.x = TRUE)[names(df2) %in% c("surname1", "surname2") == F]
+    if (nrow(df2[df2$surname.match %in% surnames$surname, ]) > 0) {
+      df1 <- rbind(df1, df2[df2$surname.match %in% surnames$surname, names(df2) %in% names(df1)])
+      df2 <- df2[df2$surname.match %in% surnames$surname == F, ]
+      if (nrow(df2[df2$surname.match %in% surnames$surname, ]) > 0) {df2$surname.match <- ""}
     }
   }
 
@@ -162,7 +160,7 @@ merge_surnames <- function(voter.file, surname.year = 2010, clean.surname = T, i
       warning(paste("Probabilities were imputed for", nrow(df2), ifelse(nrow(df2) == 1, "surname", "surnames"), "that could not be matched to Census list."))
     }
   } else warning(paste(nrow(df2), ifelse(nrow(df2) == 1, "surname was", "surnames were"), "not matched."))
-
+  
   df <- rbind(df1, df2)
   return(df[order(df$caseid), c(names(voter.file), "surname.match", p_eth)])
 }
diff --git a/R/name_preproc.R b/R/name_preproc.R
deleted file mode 100644
index 5ceef62..0000000
--- a/R/name_preproc.R
+++ /dev/null
@@ -1,60 +0,0 @@
-#' Pre-process vector of names to match census style. Internal function
-#'
-#' @param voter_names Character vector to be pre-processed.
-#' @param target_names Character vector of census names to be matched.
-#'
-#' @return A character vector of pre-processed named
-#' @keywords internal
-#'
-
-.name_preproc <- function(voter_names, target_names){
-  post_names <- as.character(voter_names)
-  post_names <- toupper(post_names)
-  ## 1) Raw match
-  match_tmp <- post_names %in% target_names
-  ## 2) remove punctuation
-  post_names[!match_tmp] <- gsub("[^[:alnum:] ]", "",
-                                 post_names[!match_tmp])
-  match_tmp <- post_names %in% target_names
-  ## 3) Remove spaces
-  post_names[!match_tmp] <- gsub(" ", "",
-                                 post_names[!match_tmp])
-  match_tmp <- post_names %in% target_names
-  ## 4) Remove suffixes
-  suffix <- c("JUNIOR", "SENIOR", "THIRD", "III", "JR", " II", " J R", " S R", " IV")
-  for (i in 1:length(suffix)) {
-    post_names[!match_tmp] <- ifelse(substr(post_names[!match_tmp],
-                                       nchar(post_names[!match_tmp]) - (nchar(suffix)[i] - 1),
-                                       nchar(post_names[!match_tmp])) == suffix[i], 
-                                substr(post_names[!match_tmp], 1, nchar(post_names[!match_tmp]) - nchar(suffix)[i]), 
-                                post_names[!match_tmp])
-  }
-  post_names[!match_tmp] <- ifelse(nchar(post_names[!match_tmp]) >= 7, 
-                                   ifelse(substr(post_names[!match_tmp], 
-                                                 nchar(post_names[!match_tmp]) - 1,
-                                                 nchar(post_names[!match_tmp])) == "SR", 
-                                          substr(post_names[!match_tmp], 1, nchar(post_names[!match_tmp]) - 2), 
-                                          post_names[!match_tmp]), 
-                                   post_names[!match_tmp]) #Remove "SR" only if name has at least 7 characters
-  match_tmp <- post_names %in% target_names
-  ## 5) Split double-barreled names, match on first part
-  post_names_tmp <- post_names
-  post_names_tmp[(!match_tmp) & grep("[-, ]", post_names[(!match_tmp)])] <- sapply(strsplit(grep("[-, ]",
-                                                                                                 post_names[(!match_tmp)],
-                                                                                                 value = TRUE),
-                                                                                            "[-, ]"),
-                                                                                   "[", 1) 
-  match_tmp_2 <- post_names_tmp %in% target_names
-  post_names[(match_tmp_2 == TRUE) & (match_tmp == FALSE)] <- post_names_tmp[(match_tmp_2 == TRUE) & (match_tmp == FALSE)]
-  match_tmp <- post_names %in% target_names
-  ## 6) Split double-barreled names, match on second part
-  post_names_tmp <- post_names
-  post_names_tmp[(!match_tmp) & grep("[-, ]", post_names[(!match_tmp)])] <- sapply(strsplit(grep("[-, ]",
-                                                                                                 post_names[(!match_tmp)],
-                                                                                                 value = TRUE),
-                                                                                            "[-, ]"),
-                                                                                   "[", 2) 
-  match_tmp_2 <- post_names_tmp %in% target_names
-  post_names[(match_tmp_2 == TRUE) & (match_tmp == FALSE)] <- post_names_tmp[(match_tmp_2 == TRUE) & (match_tmp == FALSE)]
-  return(unlist(post_names))
-}
\ No newline at end of file
diff --git a/R/predict_race.R b/R/predict_race.R
index 30f127b..c44fef6 100644
--- a/R/predict_race.R
+++ b/R/predict_race.R
@@ -70,7 +70,6 @@
 #' Whatever the name of the party registration field in \code{\var{voter.file}}, 
 #' it should be coded as 1 for Democrat, 2 for Republican, and 0 for Other.
 #' @param retry The number of retries at the census website if network interruption occurs.
-#' @param impute.missing See \code{\link[wru]{merge_surnames}}. 
 #' @return Output will be an object of class \code{data.frame}. It will 
 #'  consist of the original user-input data with additional columns with 
 #'  predicted probabilities for each of the five major racial categories: 
@@ -98,7 +97,7 @@
 ## Race Prediction Function
 predict_race <- function(voter.file, 
                            census.surname = TRUE, surname.only = FALSE, surname.year = 2010, 
-                           census.geo, census.key, census.data = NA, age = FALSE, sex = FALSE, party, impute.missing = TRUE, retry = 0) {
+                           census.geo, census.key, census.data = NA, age = FALSE, sex = FALSE, party, retry = 0) {
   
   if (!missing(census.geo) && (census.geo == "precinct")) {
     # geo <- "precinct"
@@ -146,17 +145,16 @@ predict_race <- function(voter.file,
   ## Merge in Pr(Race | Surname) if necessary
   if (census.surname) {
     if (surname.year == 2010) {
-      voter.file <- merge_surnames(voter.file, impute.missing = impute.missing)
+      voter.file <- merge_surnames(voter.file)
     } else {
       if (surname.year == 2000) {
-        voter.file <- merge_surnames(voter.file, surname.year = surname.year, 
-                                     impute.missing = impute.missing)
+        voter.file <- merge_surnames(voter.file, surname.year = surname.year)
       } else {
         stop(paste(surname.year, "is not a valid surname.year. It should be either 2000 or 2010 (default)."))
       }
     }
   } else {
-    # Check if voter.file has the necessary data
+    # Check if voter.file has the nessary data
     for (k in 1:length(eth)) {
       if (paste("p", eth[k], sep = "_") %in% names(voter.file) == F) {
         stop(paste("voter.file object needs to have columns named ", paste(paste("p", eth, sep = "_"), collapse = " and "), ".", sep = ""))
diff --git a/R/predict_race_v2.R b/R/predict_race_v2.R
deleted file mode 100644
index e4db216..0000000
--- a/R/predict_race_v2.R
+++ /dev/null
@@ -1,194 +0,0 @@
-#' Race prediction function.
-#'
-#' \code{predict_race_new} makes probabilistic estimates of individual-level race/ethnicity.
-#'
-#' This function implements the Bayesian race prediction methods outlined in
-#' Imai, Olivella, and Rosenman (2021). The function produces probabilistic estimates of
-#' individual-level race/ethnicity, based on name and geolocation.
-#' @param voter.file An object of class \code{data.frame}.
-#' Must contain a row for each individual being predicted,
-#' as well as a field named \code{\var{last}} containing each individual's surname.
-#' If first name is also being used for prediction, the file must also contain a field
-#' named \code{\var{first}}. If middle name is also being used for prediction, the field
-#' must also contain a field named \code{\var{middle}}.
-#' Moreover, \code{\var{voter.file}} must contain a field named
-#' \code{\var{state}}, which contains the two-character abbreviation for each individual's
-#' state of residence (e.g., \code{"nj"} for New Jersey).
-#' If using Census geographic data in race/ethnicity predictions,
-#' \code{\var{voter.file}} must also contain at least one of the following fields:
-#' \code{\var{county}}, \code{\var{tract}}, \code{\var{block}}, and/or \code{\var{place}}.
-#' These fields should contain character strings matching U.S. Census categories.
-#' County is three characters (e.g., \code{"031"} not \code{"31"}),
-#' tract is six characters, and block is four characters. Place is five characters.
-#' See below for other optional fields.
-#' @param namesToUse A character vector identifying which names to use for the prediction.
-#' The default value is \code{"last"}, indicating that only the last name will be used.
-#' Other options are \code{"last, first"}, indicating that both last and first names will be
-#' used, and \code{"last, first, middle"}, indicating that last, first, and middle names will all
-#' be used.
-#' @param census.geo An optional character vector specifying what level of
-#' geography to use to merge in U.S. Census 2010 geographic data. Currently
-#' \code{"county"}, \code{"tract"}, \code{"block"}, and \code{"place"} are supported.
-#' Note: sufficient information must be in user-defined \code{\var{voter.file}} object.
-#' If \code{\var{census.geo} = "county"}, then \code{\var{voter.file}}
-#' must have column named \code{county}.
-#' If \code{\var{census.geo} = "tract"}, then \code{\var{voter.file}}
-#' must have columns named \code{county} and \code{tract}.
-#' And if \code{\var{census.geo} = "block"}, then \code{\var{voter.file}}
-#' must have columns named \code{county}, \code{tract}, and \code{block}.
-#' If \code{\var{census.geo} = "place"}, then \code{\var{voter.file}}
-#' must have column named \code{place}.
-#' Specifying \code{\var{census.geo}} will call \code{census_helper} function
-#' to merge Census geographic data at specified level of geography.
-#' @param census.key A character object specifying user's Census API
-#'  key. Required if \code{\var{census.geo}} is specified, because
-#'  a valid Census API key is required to download Census geographic data.
-#' @param census.data A list indexed by two-letter state abbreviations,
-#' which contains pre-saved Census geographic data.
-#' Can be generated using \code{get_census_data} function.
-#' @param retry The number of retries at the census website if network interruption occurs.
-#' @return Output will be an object of class \code{data.frame}. It will
-#'  consist of the original user-input data with additional columns with
-#'  predicted probabilities for each of the five major racial categories:
-#'  \code{\var{pred.whi}} for White,
-#'  \code{\var{pred.bla}} for Black,
-#'  \code{\var{pred.his}} for Hispanic/Latino,
-#'  \code{\var{pred.asi}} for Asian/Pacific Islander, and
-#'  \code{\var{pred.oth}} for Other/Mixed.
-#'
-#' @export
-#'
-#' @examples
-#' data(voters)
-#' predict_race(voters, surname.only = TRUE)
-#' predict_race(voter.file = voters, surname.only = TRUE)
-#' \dontrun{predict_race(voter.file = voters, census.geo = "tract", census.key = "...")}
-#' \dontrun{predict_race(voter.file = voters, census.geo = "tract", census.key = "...", age = T)}
-#' \dontrun{predict_race(voter.file = voters, census.geo = "place", census.key = "...", sex = T)}
-#' \dontrun{CensusObj <- get_census_data("...", state = c("NY", "DC", "NJ"));
-#' predict_race(voter.file = voters, census.geo = "tract", census.data = CensusObj, party = "PID")}
-#' \dontrun{CensusObj2 <- get_census_data(key = "...", state = c("NY", "DC", "NJ"), age = T, sex = T);
-#' predict_race(voter.file = voters, census.geo = "tract", census.data = CensusObj2, age = T, sex = T)}
-#' \dontrun{CensusObj3 <- get_census_data(key = "...", state = c("NY", "DC", "NJ"), census.geo = "place");
-#' predict_race(voter.file = voters, census.geo = "place", census.data = CensusObj3)}
-predict_race_new <- function(voter.file, namesToUse = 'last', census.geo, census.key,
-                         census.data = NA, retry = 0) {
-
-  # check the geography
-  if (!missing(census.geo) && (census.geo == "precinct")) {
-    stop('Error: census_helper function does not currently support merging precinct-level data.')
-  }
-
-  vars.orig <- names(voter.file)
-
-  # check the names
-  if(namesToUse == 'last') {
-    print("Proceeding with last name-only predictions...")
-    if(!("last" %in% names(voter.file)))
-      stop("Voter data frame needs to have a column named 'last'.")
-
-  } else if(namesToUse == 'last, first') {
-    print("Proceeding with first and last name-only predictions...")
-    if(!("last" %in% names(voter.file)) || !("first" %in% names(voter.file)))
-      stop("Voter data frame needs to have a column named 'last' and a column called 'first'.")
-
-  } else if(namesToUse == 'last, first, middle') {
-    print("Proceeding with first, last, and middle name predictions...")
-    if(!("last" %in% names(voter.file)) || !("first" %in% names(voter.file))
-       || !("middle" %in% names(voter.file)))
-      stop("Voter data frame needs to have a column named 'last', a column called 'first', and a column called 'middle'.")
-  }
-
-  # check the geographies
-  if (missing(census.geo) || is.null(census.geo) || is.na(census.geo) || census.geo %in% c("county", "tract", "block", "place") == F) {
-    stop("census.geo must be either 'county', 'tract', 'block', or 'place'")
-  } else {
-    print(paste("Proceeding with Census geographic data at", census.geo, "level..."))
-  }
-  if (missing(census.data) || is.null(census.data) || is.na(census.data)) {
-    if (missing(census.key) || is.null(census.key) || is.na(census.key)) {
-      stop("Please provide a valid Census API key using census.key option.")
-      } else {
-        print("Downloading Census geographic data using provided API key...")
-      }
-    } else {
-      if (!("state" %in% names(voter.file))) {
-        stop("voter.file object needs to have a column named state.")
-      }
-      if (sum(toupper(unique(as.character(voter.file$state))) %in% toupper(names(census.data)) == FALSE) > 0) {
-        print("census.data object does not include all states in voter.file object.")
-        if (missing(census.key) || is.null(census.key) || is.na(census.key)) {
-          stop("Please provide either a valid Census API key or valid census.data object that covers all states in voter.file object.")
-        } else {
-          print("Downloading Census geographic data for states not included in census.data object...")
-        }
-      } else {
-        print("Using Census geographic data from provided census.data object...")
-      }
-    }
-
-  eth <- c("whi", "bla", "his", "asi", "oth")
-
-  ## Merge in Pr(Name | Race)
-  voter.file <- merge_names(voter.file, namesToUse)
-
-  if (census.geo == "place") {
-    if (!("place" %in% names(voter.file))) {
-      stop("voter.file object needs to have a column named place.")
-    }
-    voter.file <- census_helper_new(key = census.key,
-                                    voter.file = voter.file,
-                                    states = "all",
-                                    geo = "place",
-                                    census.data = census.data, retry = retry)
-  }
-
-  if (census.geo == "block") {
-    if (!("tract" %in% names(voter.file)) || !("county" %in% names(voter.file)) || !("block" %in% names(voter.file))) {
-      stop("voter.file object needs to have columns named block, tract, and county.")
-    }
-    voter.file <- census_helper_new(key = census.key,
-                                    voter.file = voter.file,
-                                    states = "all",
-                                    geo = "block",
-                                    census.data = census.data, retry = retry)
-  }
-
-  if (census.geo == "precinct") {
-    geo <- "precinct"
-    stop('Error: census_helper function does not currently support precinct-level data.')
-  }
-
-  if (census.geo == "tract") {
-    if (!("tract" %in% names(voter.file)) || !("county" %in% names(voter.file))) {
-      stop("voter.file object needs to have columns named tract and county.")
-    }
-    voter.file <- census_helper_new(key = census.key,
-                                    voter.file = voter.file,
-                                    states = "all",
-                                    geo = "tract",
-                                    census.data = census.data, retry = retry)
-  }
-
-  if (census.geo == "county") {
-    if (!("county" %in% names(voter.file))) {
-      stop("voter.file object needs to have a column named county.")
-    }
-    voter.file <- census_helper_new(key = census.key,
-                                    voter.file = voter.file,
-                                    states = "all",
-                                    geo = "county",
-                                    census.data = census.data, retry = retry)
-  }
-  
-  # Pr(Race | Surname, Geolocation)
-  preds <- voter.file[,grep("_last", names(voter.file))]*voter.file[,grep("r_", names(voter.file))]
-  if(grepl('first', namesToUse))
-    preds <- preds*voter.file[,grep("_first", names(voter.file))]
-  if(grepl('middle', namesToUse))
-    preds <- preds*voter.file[,grep("_middle", names(voter.file))]
-  preds <- apply(preds, 2, FUN = function(x) {x/rowSums(preds)})
-  colnames(preds) <- paste("pred", eth, sep = "_")
-
-  return(data.frame(cbind(voter.file[c(vars.orig)], preds)))
-}
diff --git a/R/voters.R b/R/voters.R
index b71ef63..c8189bd 100644
--- a/R/voters.R
+++ b/R/voters.R
@@ -17,12 +17,10 @@
 #'   \item{sex}{0=male, 1=female}
 #'   \item{party}{Party registration (character)}
 #'   \item{PID}{Party registration (numeric)}
-#'   \item{first}{First name}
-#'   \item{last}{Last name}
-#'   }
-#'
+#'   #' }
+#' @docType data
 #' @keywords datasets
 #' @name voters
 #' @examples
 #' data(voters)
-NULL
+"voters"
diff --git a/R/wru.R b/R/wru.R
deleted file mode 100644
index a1f2b0c..0000000
--- a/R/wru.R
+++ /dev/null
@@ -1,3 +0,0 @@
-#' @useDynLib wru, .registration=TRUE
-#' @importFrom Rcpp evalCpp
-NULL
diff --git a/R/zzz.R b/R/zzz.R
deleted file mode 100644
index 9ca9a17..0000000
--- a/R/zzz.R
+++ /dev/null
@@ -1,3 +0,0 @@
-.onUnload <- function (libpath) {
-  library.dynam.unload("wru", libpath)
-}
\ No newline at end of file
diff --git a/README.md b/README.md
index 8f139f9..122b134 100644
--- a/README.md
+++ b/README.md
@@ -118,4 +118,4 @@ predict_race(voter.file = voters.dc.nj, census.geo = "county", census.data = cen
 predict_race(voter.file = voters.dc.nj, census.geo = "tract", census.data = censusObj2, party = "PID", age = TRUE, sex = FALSE)  # Pr(Race | Surname, Tract, Party)
 ```
 ### A related song 
-Watch [this](https://www.youtube.com/watch?v=r5kmCgVhADY)!
+Watch [this](https://www.youtube.com/watch?v=LYb_nqU_43w)!
diff --git a/data/firstNameDict.rda b/data/firstNameDict.rda
deleted file mode 100644
index 20ea91b..0000000
Binary files a/data/firstNameDict.rda and /dev/null differ
diff --git a/data/lastNameDict.rda b/data/lastNameDict.rda
deleted file mode 100644
index 6f8ad11..0000000
Binary files a/data/lastNameDict.rda and /dev/null differ
diff --git a/data/middleNameDict.rda b/data/middleNameDict.rda
deleted file mode 100644
index 56d1aa8..0000000
Binary files a/data/middleNameDict.rda and /dev/null differ
diff --git a/data/surnames2000.RData b/data/surnames2000.RData
new file mode 100644
index 0000000..8029711
Binary files /dev/null and b/data/surnames2000.RData differ
diff --git a/data/surnames2000.rda b/data/surnames2000.rda
deleted file mode 100644
index 431e1b1..0000000
Binary files a/data/surnames2000.rda and /dev/null differ
diff --git a/data/surnames2010.RData b/data/surnames2010.RData
new file mode 100644
index 0000000..61e6886
Binary files /dev/null and b/data/surnames2010.RData differ
diff --git a/data/surnames2010.rda b/data/surnames2010.rda
deleted file mode 100644
index 6f5b591..0000000
Binary files a/data/surnames2010.rda and /dev/null differ
diff --git a/data/voters.RData b/data/voters.RData
new file mode 100644
index 0000000..e7cfd64
Binary files /dev/null and b/data/voters.RData differ
diff --git a/data/voters.rda b/data/voters.rda
deleted file mode 100644
index 560fb20..0000000
Binary files a/data/voters.rda and /dev/null differ
diff --git a/man/census_helper_new.Rd b/man/census_helper_new.Rd
deleted file mode 100644
index db79d9f..0000000
--- a/man/census_helper_new.Rd
+++ /dev/null
@@ -1,77 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/census_helper_v2.R
-\name{census_helper_new}
-\alias{census_helper_new}
-\title{Census helper function.}
-\usage{
-census_helper_new(
-  key,
-  voter.file,
-  states = "all",
-  geo = "tract",
-  age = FALSE,
-  sex = FALSE,
-  census.data = NA,
-  retry = 0
-)
-}
-\arguments{
-\item{key}{A required character object. Must contain user's Census API
-key, which can be requested \href{https://api.census.gov/data/key_signup.html}{here}.}
-
-\item{voter.file}{An object of class \code{data.frame}. Must contain field(s) named 
-\code{\var{county}}, \code{\var{tract}}, \code{\var{block}}, and/or \code{\var{place}}
-specifying geolocation. These should be character variables that match up with 
-U.S. Census categories. County should be three characters (e.g., "031" not "31"), 
-tract should be six characters, and block should be four characters. 
-Place should be five characters if it is included.}
-
-\item{states}{A character vector specifying which states to extract 
-Census data for, e.g. \code{c("NJ", "NY")}. Default is \code{"all"}, which extracts 
-Census data for all states contained in user-input data.}
-
-\item{geo}{A character object specifying what aggregation level to use. 
-Use \code{"county"}, \code{"tract"}, or \code{"block"}. Default is \code{"tract"}. 
-Warning: extracting block-level data takes very long.}
-
-\item{age}{A \code{TRUE}/\code{FALSE} object indicating whether to condition on 
-age or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race).
-If \code{TRUE}, function will return Pr(Geolocation, Age | Race). 
-If \code{\var{sex}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race).}
-
-\item{sex}{A \code{TRUE}/\code{FALSE} object indicating whether to condition on 
-sex or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race). 
-If \code{TRUE}, function will return Pr(Geolocation, Sex | Race). 
-If \code{\var{age}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race).}
-
-\item{census.data}{A optional census object of class \code{list} containing 
-pre-saved Census geographic data. Can be created using \code{get_census_data} function.
-If \code{\var{census.data}} is provided, the \code{\var{age}} element must have the same value
-as the \code{\var{age}} option specified in this function (i.e., \code{TRUE} in both or 
-\code{FALSE} in both). Similarly, the \code{\var{sex}} element in the object provided in 
-\code{\var{census.data}} must have the same value as the \code{\var{sex}} option here.
-If \code{\var{census.data}} is missing, Census geographic data will be obtained via Census API.}
-
-\item{retry}{The number of retries at the census website if network interruption occurs.}
-}
-\value{
-Output will be an object of class \code{data.frame}. It will 
- consist of the original user-input data with additional columns of 
- Census data.
-}
-\description{
-\code{census_helper_v2} links user-input dataset with Census geographic data.
-}
-\details{
-This function allows users to link their geocoded dataset (e.g., voter file) 
-with U.S. Census 2010 data. The function extracts Census Summary File data 
-at the county, tract, or block level using the 'UScensus2010' package. Census data 
-calculated are Pr(Geolocation | Race) where geolocation is county, tract, or block.
-}
-\examples{
-\dontshow{data(voters)}
-\dontrun{census_helper(key = "...", voter.file = voters, states = "nj", geo = "block")}
-\dontrun{census_helper(key = "...", voter.file = voters, states = "all", geo = "tract", 
-age = TRUE, sex = TRUE)}
-
-}
diff --git a/man/co_cluster.Rd b/man/co_cluster.Rd
deleted file mode 100644
index 8bcbccc..0000000
--- a/man/co_cluster.Rd
+++ /dev/null
@@ -1,69 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/co_cluster.R
-\name{co_cluster}
-\alias{co_cluster}
-\title{Name-Race Co-clustering Using Keynames}
-\usage{
-co_cluster(
-  voter.file,
-  name_types,
-  name_race_tables,
-  census.geo,
-  ...,
-  control = NULL
-)
-}
-\arguments{
-\item{voter.file}{An object of class data.frame. Must contain a row for each individual being predicted, 
-as well as fields named "surname", and/or"first", and/or "middle", containing each 
-individual's corresponding names. It must also contain a field named state, which 
-contains the two-character lower-case abbreviation for each individual's state of residence (e.g., "nj" for New Jersey).
-If using Census geographic data in race/ethnicity
-predictions, voter.file must also contain at least one of the following fields: county, tract, block,
-and/or place. These fields should contain character strings matching U.S. Census categories. County is three
-characters (e.g., "031" not "31"), tract is six characters, and block is four characters. Place is
-five characters. See \code{\link{predict_race}} for other optional fields.}
-
-\item{name_types}{Character vector. Must have elements in "surname", "first", and "middle".}
-
-\item{name_race_tables}{Named list, with as many elements as there are names in \code{name_types}, and names matching
-elements in \code{name_types}. Each list element should be a data.frame of unique names (first column) by race (remaining columns),
-with conditional probabilities p(Name|Race).}
-
-\item{census.geo}{Required character vector. One of "county", "tract", "block" or "place". See \code{\link{predict_race}}.}
-
-\item{...}{Arguments passed to \code{\link{predict_race}}.}
-
-\item{control}{List of control arguments, including 
-\itemize{
-\item{race_init}{ Initial race for each observation in voter.file. Must be an integer, with
-                 0=white, 1=black, 2=hispanic, 3=asian, and 4=other.}
- \item{fit_insample}{ Boolean. Should model check in-sample fit of race prediction for each
-                     record? If \code{TRUE}, \code{race_obs} cannot be \code{NULL}. Defaults to \code{FALSE}.}
- \item{race_obs}{ Observed race for each record in \code{voter.file}. Must be an integer, with
-                 0=white, 1=black, 2=hispanic, 3=asian, and 4=other.}
- \item{iter}{ Number of MCMC iterations. Defaults to 1000.}
- \item{burnin}{ Number of iterations discarded as burnin. Defaults to half of \code{iter}.}
- \item{thin}{ Thinning interval for MCMC. Defaults to 1.}
- \item{log_post_interval}{ Interval for storing the log_posterior. Defaults to 10.}
- \item{beta_prior}{ Parameter for symmetric Dirichlet prior over names for each race. Defaults to 5.}
- \item{gamma_prior}{ Parameter for Beta prior over keyname/non-keyname mixture components. Defaults to c(5, 5).}
- \item{verbose}{ Print progress information. Defaults to \code{TRUE}.}
- \item{seed}{ RNG seed. If \code{NULL}, a seed is generated and stored for reproducibility.}
-}}
-}
-\value{
-A named list:
-\itemize{
-\item{name_by_race}{ Named list of predicted distributions of name by race for each name type.}
-\item{race_by_record}{ A copy of \code{voter.file}, with additional columns of predicted
-                      race probabilities, names \code{pred.<race>}. }
-\item{loglik}{ Values of log likelihood, evaluated every \code{log_post_interval}.}
-\item{fit_insample}{ When \code{fit_insample=TRUE}, a probability of correct in_sample prediction 
-                     for each record in \code{vote.file}.} 
-}
-}
-\description{
-Estimate a Bayesian mixed-membership, keyword-assisted race classification model for geo-locations
-in a voterfile.
-}
diff --git a/man/dot-name_preproc.Rd b/man/dot-name_preproc.Rd
deleted file mode 100644
index 93d20b1..0000000
--- a/man/dot-name_preproc.Rd
+++ /dev/null
@@ -1,20 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/name_preproc.R
-\name{.name_preproc}
-\alias{.name_preproc}
-\title{Pre-process vector of names to match census style. Internal function}
-\usage{
-.name_preproc(voter_names, target_names)
-}
-\arguments{
-\item{voter_names}{Character vector to be pre-processed.}
-
-\item{target_names}{Character vector of census names to be matched.}
-}
-\value{
-A character vector of pre-processed named
-}
-\description{
-Pre-process vector of names to match census style. Internal function
-}
-\keyword{internal}
diff --git a/man/firstNameDict.Rd b/man/firstNameDict.Rd
deleted file mode 100644
index 9b4927e..0000000
--- a/man/firstNameDict.Rd
+++ /dev/null
@@ -1,23 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
-\name{firstNameDict}
-\alias{firstNameDict}
-\title{Dictionary of First Names}
-\format{
-A data frame with 1,043,742 rows and 6 columns:
-\describe{
-  \item{first_name}{Voter identifier (numeric)}
-  \item{p_whi_first}{Probability of white}
-  \item{p_bla_first}{Probability of black}
-  \item{p_his_first}{Probability of Hispanic}
-  \item{p_asi_first}{Probability of asian},
-  \item{p_oth_first}{Probability of other}
-  }
-}
-\description{
-An example dataset containing voter file information.
-}
-\examples{
-data(firstNameDict)
-}
-\keyword{datasets}
diff --git a/man/format_legacy_data.Rd b/man/format_legacy_data.Rd
deleted file mode 100644
index 8cd2266..0000000
--- a/man/format_legacy_data.Rd
+++ /dev/null
@@ -1,35 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/format_legacy_data.R
-\name{format_legacy_data}
-\alias{format_legacy_data}
-\title{Legacy data formatting function.}
-\usage{
-format_legacy_data(legacyFilePath, outFile = NULL)
-}
-\arguments{
-\item{legacyFilePath}{A character vector giving the location of a legacy census data folder,
-sourced from https://www2.census.gov/programs-surveys/decennial/2020/data/01-Redistricting_File--PL_94-171/. 
-These file names should end in ".pl".}
-
-\item{outFile}{Optional character vector determining whether the formatted RData object should be saved. The
-filepath should end in ".RData".}
-}
-\description{
-\code{format_legacy_data} formats legacy data from the U.S. census to allow
-for Bayesian name geocoding.
-}
-\details{
-This function allows users to construct datasets for analysis using the census legacy data format. 
-These data are available for the 2020 census at 
-https://www2.census.gov/programs-surveys/decennial/2020/data/01-Redistricting_File--PL_94-171/. 
-This function returns data structured analogously to data from the Census API, which is not yet 
-available for the 2020 Census as of September 2021.
-}
-\examples{
-\dontrun{
-gaCensusData <- format_legacy_data(PL94171::pl_url('GA', 2020))
-predict_race_new(ga.voter.file, namesToUse = 'last, first, mid', census.geo = 'block',
-     census.data = gaCensusData)
-}
-
-}
diff --git a/man/keyWRU_fit.Rd b/man/keyWRU_fit.Rd
deleted file mode 100644
index 2edb983..0000000
--- a/man/keyWRU_fit.Rd
+++ /dev/null
@@ -1,35 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/RcppExports.R
-\name{keyWRU_fit}
-\alias{keyWRU_fit}
-\title{Collapsed Gibbs sampler for keyWRU. Internal function}
-\usage{
-keyWRU_fit(data, ctrl)
-}
-\arguments{
-\item{data}{A list with the following elements
-\itemize{
- \item{name_type_n}{Number of name types}
- \item{race_n}{Number of races}
- \item{geo_n}{Number of geolocations}
- \item{geo_race_table}{Matrix of conditional probabilities Pr(Race | Geolocation), with geolocations in the rows}
- \item{voters_per_geo}{Number of voterfile records per geolocation}
- \item{race_inits}{Table of initial race assignments per voterfile record}
- \item{name_data}{
-                   \itemize{
-                    \item{n_unique_names}{Number of unique names}
-                    \item{record_name_id}{Name id corresponding to each voterfile record}
-                    \item{keynames}{Integer matrix of name id's used as keynames for each race (race in the columns)}
-                    \item{census_table}{Matrix of Pr(Name | Race), with races in the columns}
-                    \item{beta_prior}{Scalar prior for name-race symmetric Dirichlet distribution}
-                    \item{gamma_prior}{Vector prior shapes for keyname/non-keyname Beta mixture}
-                   }
-                 }
-}}
-
-\item{ctrl}{A list of control arguments; see \code{co_cluster} function for details.}
-}
-\description{
-Collapsed Gibbs sampler for keyWRU. Internal function
-}
-\keyword{internal}
diff --git a/man/lastNameDict.Rd b/man/lastNameDict.Rd
deleted file mode 100644
index b267232..0000000
--- a/man/lastNameDict.Rd
+++ /dev/null
@@ -1,23 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
-\name{lastNameDict}
-\alias{lastNameDict}
-\title{Dictionary of Last Names}
-\format{
-A data frame with 1,502,541 rows and 6 columns:
-\describe{
-  \item{last_name}{Voter identifier (numeric)}
-  \item{p_whi_last}{Probability of white}
-  \item{p_bla_last}{Probability of black}
-  \item{p_his_last}{Probability of Hispanic}
-  \item{p_asi_last}{Probability of asian},
-  \item{p_oth_last}{Probability of other}
-  }
-}
-\description{
-An example dataset containing voter file information.
-}
-\examples{
-data(lastNameDict)
-}
-\keyword{datasets}
diff --git a/man/merge_names.Rd b/man/merge_names.Rd
deleted file mode 100644
index 727c666..0000000
--- a/man/merge_names.Rd
+++ /dev/null
@@ -1,67 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/merge_names.R
-\name{merge_names}
-\alias{merge_names}
-\title{Surname probability merging function.}
-\usage{
-merge_names(voter.file, namesToUse, clean.names = TRUE)
-}
-\arguments{
-\item{voter.file}{An object of class \code{data.frame}.  Must contain a row for each individual being predicted,
-as well as a field named \code{\var{last}} containing each individual's surname.
-If first name is also being used for prediction, the file must also contain a field
-named \code{\var{first}}. If middle name is also being used for prediction, the field
-must also contain a field named \code{\var{middle}}.}
-
-\item{namesToUse}{A character vector identifying which names to use for the prediction.
-The default value is \code{"last"}, indicating that only the last name will be used.
-Other options are \code{"last, first"}, indicating that both last and first names will be
-used, and \code{"last, first, middle"}, indicating that last, first, and middle names will all
-be used.}
-
-\item{clean.names}{A \code{TRUE}/\code{FALSE} object. If \code{TRUE},
-any surnames in \code{\var{voter.file}} that cannot initially be matched
-to the database will be cleaned, according to U.S. Census specifications,
-in order to increase the chance of finding a match. Default is \code{TRUE}.}
-}
-\value{
-Output will be an object of class \code{data.frame}. It will
- consist of the original user-input data with additional columns that
- specify the part of the name matched with Census data (\code{\var{surname.match}}),
- and the probabilities Pr(Race | Surname) for each racial group
- (\code{\var{p_whi}} for White, \code{\var{p_bla}} for Black,
- \code{\var{p_his}} for Hispanic/Latino,
- \code{\var{p_asi}} for Asian and Pacific Islander, and
- \code{\var{p_oth}} for Other/Mixed).
-}
-\description{
-\code{merge_names} merges names in a user-input dataset with corresponding
- race/ethnicity probabilities derived from both the U.S. Census Surname List
- and Spanish Surname List and voter files from states in the Southern U.S.
-}
-\details{
-This function allows users to match names in their dataset with database entries
- estimating P(name | ethnicity) for each of the five major racial groups for each
- name. The database probabilities are derived from both the U.S. Census Surname List
- and Spanish Surname List and voter files from states in the Southern U.S.
-
- By default, the function matches names as follows:
- 1) Search raw surnames in the database;
- 2) Remove any punctuation and search again;
- 3) Remove any spaces and search again;
- 4) Remove suffixes (e.g., "Jr") and search again (last names only)
- 5) Split double-barreled names into two parts and search first part of name;
- 6) Split double-barreled names into two parts and search second part of name;
-
- Each step only applies to names not matched in a previous step.
- Steps 2 through 6 are not applied if \code{clean.surname} is FALSE.
-
- Note: Any name appearing only on the Spanish Surname List is assigned a
- probability of 1 for Hispanics/Latinos and 0 for all other racial groups.
-}
-\examples{
-\donttest{
-data(voters)
-merge_names(voters, 'last, first')
-}
-}
diff --git a/man/merge_surnames.Rd b/man/merge_surnames.Rd
index e87b9fa..cb8f4b0 100644
--- a/man/merge_surnames.Rd
+++ b/man/merge_surnames.Rd
@@ -12,56 +12,56 @@ merge_surnames(
 )
 }
 \arguments{
-\item{voter.file}{An object of class \code{data.frame}. Must contain a field
+\item{voter.file}{An object of class \code{data.frame}. Must contain a field 
 named 'surname' containing list of surnames to be merged with Census lists.}
 
-\item{surname.year}{An object of class \code{numeric} indicating which year
-Census Surname List is from. Accepted values are \code{2010} and \code{2000}.
+\item{surname.year}{An object of class \code{numeric} indicating which year 
+Census Surname List is from. Accepted values are \code{2010} and \code{2000}. 
 Default is \code{2010}.}
 
-\item{clean.surname}{A \code{TRUE}/\code{FALSE} object. If \code{TRUE},
-any surnames in \code{\var{voter.file}} that cannot initially be matched
-to surname lists will be cleaned, according to U.S. Census specifications,
+\item{clean.surname}{A \code{TRUE}/\code{FALSE} object. If \code{TRUE}, 
+any surnames in \code{\var{voter.file}} that cannot initially be matched 
+to surname lists will be cleaned, according to U.S. Census specifications, 
 in order to increase the chance of finding a match. Default is \code{TRUE}.}
 
-\item{impute.missing}{A \code{TRUE}/\code{FALSE} object. If \code{TRUE},
-race/ethnicity probabilities will be imputed for unmatched names using
+\item{impute.missing}{A \code{TRUE}/\code{FALSE} object. If \code{TRUE}, 
+race/ethnicity probabilities will be imputed for unmatched names using  
 race/ethnicity distribution for all other names (i.e., not on Census List).
 Default is \code{TRUE}.}
 }
 \value{
-Output will be an object of class \code{data.frame}. It will
- consist of the original user-input data with additional columns that
- specify the part of the name matched with Census data (\code{\var{surname.match}}),
- and the probabilities Pr(Race | Surname) for each racial group
- (\code{\var{p_whi}} for White, \code{\var{p_bla}} for Black,
- \code{\var{p_his}} for Hispanic/Latino,
- \code{\var{p_asi}} for Asian and Pacific Islander, and
+Output will be an object of class \code{data.frame}. It will 
+ consist of the original user-input data with additional columns that 
+ specify the part of the name matched with Census data (\code{\var{surname.match}}), 
+ and the probabilities Pr(Race | Surname) for each racial group 
+ (\code{\var{p_whi}} for White, \code{\var{p_bla}} for Black, 
+ \code{\var{p_his}} for Hispanic/Latino, 
+ \code{\var{p_asi}} for Asian and Pacific Islander, and 
  \code{\var{p_oth}} for Other/Mixed).
 }
 \description{
-\code{merge_surnames} merges surnames in user-input dataset with corresponding
+\code{merge_surnames} merges surnames in user-input dataset with corresponding 
  race/ethnicity probabilities from U.S. Census Surname List and Spanish Surname List.
 }
 \details{
-This function allows users to match surnames in their dataset with the U.S.
- Census Surname List (from 2000 or 2010) and Spanish Surname List to obtain
+This function allows users to match surnames in their dataset with the U.S. 
+ Census Surname List (from 2000 or 2010) and Spanish Surname List to obtain 
  Pr(Race | Surname) for each of the five major racial groups.
-
- By default, the function matches surnames to the Census list as follows:
- 1) Search raw surnames in Census surname list;
- 2) Remove any punctuation and search again;
- 3) Remove any spaces and search again;
- 4) Remove suffixes (e.g., Jr) and search again;
- 5) Split double-barreled surnames into two parts and search first part of name;
- 6) Split double-barreled surnames into two parts and search second part of name;
- 7) For any remaining names, impute probabilities using distribution
+ 
+ By default, the function matches surnames to the Census list as follows: 
+ 1) Search raw surnames in Census surname list; 
+ 2) Remove any punctuation and search again; 
+ 3) Remove any spaces and search again; 
+ 4) Remove suffixes (e.g., Jr) and search again; 
+ 5) Split double-barreled surnames into two parts and search first part of name; 
+ 6) Split double-barreled surnames into two parts and search second part of name; 
+ 7) For any remaining names, impute probabilities using distribution 
  for all names not appearing on Census list.
-
- Each step only applies to surnames not matched in a previous ste.
+ 
+ Each step only applies to surnames not matched in a previous ste. 
  Steps 2 through 7 are not applied if \code{clean.surname} is FALSE.
-
- Note: Any name appearing only on the Spanish Surname List is assigned a
+ 
+ Note: Any name appearing only on the Spanish Surname List is assigned a 
  probability of 1 for Hispanics/Latinos and 0 for all other racial groups.
 }
 \examples{
diff --git a/man/middleNameDict.Rd b/man/middleNameDict.Rd
deleted file mode 100644
index d7efef8..0000000
--- a/man/middleNameDict.Rd
+++ /dev/null
@@ -1,23 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
-\name{middleNameDict}
-\alias{middleNameDict}
-\title{Dictionary of Middle Names}
-\format{
-A data frame with 1,182,133 rows and 6 columns:
-\describe{
-  \item{middle_name}{Voter identifier (numeric)}
-  \item{p_whi_middle}{Probability of white}
-  \item{p_bla_middle}{Probability of black}
-  \item{p_his_middle}{Probability of Hispanic}
-  \item{p_asi_middle}{Probability of asian},
-  \item{p_oth_middle}{Probability of other}
-  }
-}
-\description{
-An example dataset containing voter file information.
-}
-\examples{
-data(middleNameDict)
-}
-\keyword{datasets}
diff --git a/man/predict_race.Rd b/man/predict_race.Rd
index d5080ca..67b1b6d 100644
--- a/man/predict_race.Rd
+++ b/man/predict_race.Rd
@@ -15,7 +15,6 @@ predict_race(
   age = FALSE,
   sex = FALSE,
   party,
-  impute.missing = TRUE,
   retry = 0
 )
 }
@@ -94,8 +93,6 @@ on individual's party registration (in addition to geolocation).
 Whatever the name of the party registration field in \code{\var{voter.file}}, 
 it should be coded as 1 for Democrat, 2 for Republican, and 0 for Other.}
 
-\item{impute.missing}{See \code{\link[wru]{merge_surnames}}.}
-
 \item{retry}{The number of retries at the census website if network interruption occurs.}
 }
 \value{
diff --git a/man/predict_race_new.Rd b/man/predict_race_new.Rd
deleted file mode 100644
index d607d26..0000000
--- a/man/predict_race_new.Rd
+++ /dev/null
@@ -1,96 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/predict_race_v2.R
-\name{predict_race_new}
-\alias{predict_race_new}
-\title{Race prediction function.}
-\usage{
-predict_race_new(
-  voter.file,
-  namesToUse = "last",
-  census.geo,
-  census.key,
-  census.data = NA,
-  retry = 0
-)
-}
-\arguments{
-\item{voter.file}{An object of class \code{data.frame}.
-Must contain a row for each individual being predicted,
-as well as a field named \code{\var{last}} containing each individual's surname.
-If first name is also being used for prediction, the file must also contain a field
-named \code{\var{first}}. If middle name is also being used for prediction, the field
-must also contain a field named \code{\var{middle}}.
-Moreover, \code{\var{voter.file}} must contain a field named
-\code{\var{state}}, which contains the two-character abbreviation for each individual's
-state of residence (e.g., \code{"nj"} for New Jersey).
-If using Census geographic data in race/ethnicity predictions,
-\code{\var{voter.file}} must also contain at least one of the following fields:
-\code{\var{county}}, \code{\var{tract}}, \code{\var{block}}, and/or \code{\var{place}}.
-These fields should contain character strings matching U.S. Census categories.
-County is three characters (e.g., \code{"031"} not \code{"31"}),
-tract is six characters, and block is four characters. Place is five characters.
-See below for other optional fields.}
-
-\item{namesToUse}{A character vector identifying which names to use for the prediction.
-The default value is \code{"last"}, indicating that only the last name will be used.
-Other options are \code{"last, first"}, indicating that both last and first names will be
-used, and \code{"last, first, middle"}, indicating that last, first, and middle names will all
-be used.}
-
-\item{census.geo}{An optional character vector specifying what level of
-geography to use to merge in U.S. Census 2010 geographic data. Currently
-\code{"county"}, \code{"tract"}, \code{"block"}, and \code{"place"} are supported.
-Note: sufficient information must be in user-defined \code{\var{voter.file}} object.
-If \code{\var{census.geo} = "county"}, then \code{\var{voter.file}}
-must have column named \code{county}.
-If \code{\var{census.geo} = "tract"}, then \code{\var{voter.file}}
-must have columns named \code{county} and \code{tract}.
-And if \code{\var{census.geo} = "block"}, then \code{\var{voter.file}}
-must have columns named \code{county}, \code{tract}, and \code{block}.
-If \code{\var{census.geo} = "place"}, then \code{\var{voter.file}}
-must have column named \code{place}.
-Specifying \code{\var{census.geo}} will call \code{census_helper} function
-to merge Census geographic data at specified level of geography.}
-
-\item{census.key}{A character object specifying user's Census API
-key. Required if \code{\var{census.geo}} is specified, because
-a valid Census API key is required to download Census geographic data.}
-
-\item{census.data}{A list indexed by two-letter state abbreviations,
-which contains pre-saved Census geographic data.
-Can be generated using \code{get_census_data} function.}
-
-\item{retry}{The number of retries at the census website if network interruption occurs.}
-}
-\value{
-Output will be an object of class \code{data.frame}. It will
- consist of the original user-input data with additional columns with
- predicted probabilities for each of the five major racial categories:
- \code{\var{pred.whi}} for White,
- \code{\var{pred.bla}} for Black,
- \code{\var{pred.his}} for Hispanic/Latino,
- \code{\var{pred.asi}} for Asian/Pacific Islander, and
- \code{\var{pred.oth}} for Other/Mixed.
-}
-\description{
-\code{predict_race_new} makes probabilistic estimates of individual-level race/ethnicity.
-}
-\details{
-This function implements the Bayesian race prediction methods outlined in
-Imai, Olivella, and Rosenman (2021). The function produces probabilistic estimates of
-individual-level race/ethnicity, based on name and geolocation.
-}
-\examples{
-data(voters)
-predict_race(voters, surname.only = TRUE)
-predict_race(voter.file = voters, surname.only = TRUE)
-\dontrun{predict_race(voter.file = voters, census.geo = "tract", census.key = "...")}
-\dontrun{predict_race(voter.file = voters, census.geo = "tract", census.key = "...", age = T)}
-\dontrun{predict_race(voter.file = voters, census.geo = "place", census.key = "...", sex = T)}
-\dontrun{CensusObj <- get_census_data("...", state = c("NY", "DC", "NJ"));
-predict_race(voter.file = voters, census.geo = "tract", census.data = CensusObj, party = "PID")}
-\dontrun{CensusObj2 <- get_census_data(key = "...", state = c("NY", "DC", "NJ"), age = T, sex = T);
-predict_race(voter.file = voters, census.geo = "tract", census.data = CensusObj2, age = T, sex = T)}
-\dontrun{CensusObj3 <- get_census_data(key = "...", state = c("NY", "DC", "NJ"), census.geo = "place");
-predict_race(voter.file = voters, census.geo = "place", census.data = CensusObj3)}
-}
diff --git a/man/voters.Rd b/man/voters.Rd
index aeb352c..7a58eff 100644
--- a/man/voters.Rd
+++ b/man/voters.Rd
@@ -1,5 +1,6 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/voters.R
+\docType{data}
 \name{voters}
 \alias{voters}
 \title{Example voter file.}
@@ -19,9 +20,10 @@ A data frame with 10 rows and 12 variables:
   \item{sex}{0=male, 1=female}
   \item{party}{Party registration (character)}
   \item{PID}{Party registration (numeric)}
-  \item{first}{First name}
-  \item{last}{Last name}
-  }
+  #' }
+}
+\usage{
+voters
 }
 \description{
 An example dataset containing voter file information.
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
deleted file mode 100644
index 00bded1..0000000
--- a/src/RcppExports.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-// Generated by using Rcpp::compileAttributes() -> do not edit by hand
-// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
-
-#include <RcppEigen.h>
-#include <Rcpp.h>
-
-using namespace Rcpp;
-
-#ifdef RCPP_USE_GLOBAL_ROSTREAM
-Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
-Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
-#endif
-
-// keyWRU_fit
-Rcpp::List keyWRU_fit(Rcpp::List data, Rcpp::List ctrl);
-RcppExport SEXP _wru_keyWRU_fit(SEXP dataSEXP, SEXP ctrlSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< Rcpp::List >::type data(dataSEXP);
-    Rcpp::traits::input_parameter< Rcpp::List >::type ctrl(ctrlSEXP);
-    rcpp_result_gen = Rcpp::wrap(keyWRU_fit(data, ctrl));
-    return rcpp_result_gen;
-END_RCPP
-}
-
-static const R_CallMethodDef CallEntries[] = {
-    {"_wru_keyWRU_fit", (DL_FUNC) &_wru_keyWRU_fit, 2},
-    {NULL, NULL, 0}
-};
-
-RcppExport void R_init_wru(DllInfo *dll) {
-    R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
-    R_useDynamicSymbols(dll, FALSE);
-}
diff --git a/src/XName.cpp b/src/XName.cpp
deleted file mode 100644
index 9c8c36e..0000000
--- a/src/XName.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-#include "XName.h"
-
-using namespace Eigen;
-using namespace Rcpp;
-using namespace std;
-
-
-XName::XName(const List& data,
-             const List& ctrl,
-             const int G_,
-             const int R_,
-             const VectorXd& n_r,
-             const std::vector<IntegerVector>& Races,
-             const String type
-               
-) :
-  W(as< std::vector<IntegerVector> >(data["record_name_id"])),
-  phi_tilde(as<MatrixXd>(data["census_table"])),
-  gamma_prior(as<VectorXd>(ctrl["gamma_prior"])),
-  beta_w(as<double>(ctrl["beta_prior"])),
-  N_(as<int>(data["n_unique_names"])),
-  R_(R_),
-  n_r(n_r),
-  type(type)
-{
-  //Initialize containers
-  max_kw = as<int>(data["largest_keyword"]);
-  n_rc.resize(R_, 2);
-  n_rc.setZero();
-  n_wr.resize(N_, R_);
-  n_wr.setZero();
-  int geo_size = 0, r_ = 0, w_ = 0;
-
-  
-  for(int ii = 0; ii < G_ ; ++ii){ //iterate over geos
-    geo_size =  W[ii].size();
-    C.push_back(VectorXi(geo_size));
-    C[ii].setZero(geo_size);
-  }
-  
-  
-  e_phi.resize(N_, R_);
-  e_phi.setZero();
-  
-  //Initialize placeholders
-  c1_prob = 1.0; numerator = 0.0;
-  denominator = 1.0; c0_prob = 1.0;
-  sum_c = 1.0; pi_0 = 1.0; pi_1 = 1.0;
-  c = 0; new_c = 0; r_ = 0; w_ = 0; 
-  
-}
-
-void XName::sample_c(int r,
-                     int voter,
-                     int geo_id)
-{
-  //Get unique names...
-  w_ = W[geo_id][voter];
-  //(Check if name is in keyword list, and skip update if not)
-//if(!found_in(keywords[r], w_)){	
-  if(w_ >= max_kw){	
-    return;
-  }
-  
-  // ... and current mixture component
-  c = C[geo_id][voter];
-  
-  // remove data
-  if(c == 0){
-    n_wr(w_, r)--;
-  }
-  n_rc(r, c)--;
-  
-  // newprob_c0
-  c1_prob = phi_tilde(w_, r) * (n_rc(r, 1) + gamma_prior[0]);
-  
-  // newprob_c0
-  numerator = (n_wr(w_, r) + beta_w)
-    * (n_rc(r, 0) + gamma_prior[1]);
-  denominator = n_rc(r, 0) + ((double)N_ * beta_w);
-  c0_prob = numerator / denominator;
-  
-  
-  // Normalize
-  sum_c = c0_prob + c1_prob;
-  
-  c1_prob = c1_prob / sum_c;
-  new_c = (R::runif(0,1) <= c1_prob);  //new_c = Bern(c1_prob);
-  
-  // add back data counts
-  if (new_c == 0) {
-    n_wr(w_, r)++;
-  }
-  n_rc(r, new_c)++;
-  
-  //Update mixture component
-  C[geo_id][voter] = new_c;
-}
-
-void XName::phihat_store(){
-  // keyword component
-  for(int r = 0; r < R_; ++r){
-    denominator =  n_rc(r, 1) + n_rc(r, 0) + gamma_prior.sum();
-    denominator_phi = n_r(r) + ((double)N_ * beta_w);
-    pi_0 = (n_rc(r, 1) + gamma_prior[0]) / denominator;
-    pi_1 = (n_rc(r, 0) + gamma_prior[1]) / denominator;
-    for(int w = 0; w < N_; ++w){
-      if(w < max_kw){
-      e_phi(w, r) += (pi_0 * phi_tilde(w, r)
-                        + pi_1 * ((n_wr(w, r) + beta_w) / denominator_phi));
-      } else {
-        e_phi(w, r) += ((n_wr(w, r) + beta_w) / denominator_phi);
-        }
-    }
-  }
-}
-
-MatrixXd XName::getPhiHat(){
-  return e_phi;
-}
-
-
-
diff --git a/src/XName.h b/src/XName.h
deleted file mode 100644
index a3d4d0b..0000000
--- a/src/XName.h
+++ /dev/null
@@ -1,70 +0,0 @@
-#ifndef __XName__
-#define __XName__
-
-#include <RcppEigen.h>
-#include <vector>
-#include "aux_funs.h"
-
-using namespace Eigen;
-using namespace Rcpp;
-
-class XName
-{
-public:
-  //Constructor
-  XName(const List& data,
-        const List& ctrl,
-        const int G_,
-        const int R_,
-        const VectorXd& n_r,
-        const std::vector<IntegerVector>& Races,
-        const String type
-  );
-  
-  //getters
-  MatrixXd getPhiHat();
-  
-private:
-  //Data
-  const std::vector<IntegerVector> W; // List of vectors of unique name id (row 0) and counts (row 1), for each geo
-  const std::vector<IntegerVector> keywords; //List of vectors of keyword name id (rows) by race (columns).
-  const MatrixXd phi_tilde; // census table, name by race
-  const VectorXd gamma_prior;
-  const double beta_w;
-  const int N_,
-  R_
-  ;
-  const VectorXd& n_r;
-  const String type;
-  
-  int max_kw;
-  
-  std::vector<VectorXi> C; // Vector of vectors of mixture component for each record, in each geo
-  MatrixXd n_rc, // Suff. stat, race by mixture component
-  e_phi, //Expected distribution over races for each name
-  n_wr; //Suff stat, name by race for names in non-keyword components
-  
-  
-  double c1_prob,
-  numerator,
-  denominator,
-  denominator_phi,
-  c0_prob,
-  sum_c, 
-  pi_0, pi_1;
-  
-  int c,
-  new_c,
-    r_,
-    w_;
-  //Methods
-  void sample_c(int r,
-                int voter,
-                int geo_id);
-  void phihat_store();
-
-  
-  
-  friend class keyWRU;
-};
-#endif
diff --git a/src/aux_funs.cpp b/src/aux_funs.cpp
deleted file mode 100644
index 33790fc..0000000
--- a/src/aux_funs.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-#include "aux_funs.h"
-
-inline int rand_wrapper(const int n) 
-  {
-  return floor(R::unif_rand() * n); 
-  }
-
-
-int rcat_without_normalize(Eigen::VectorXd &prob,
-			   const double total,
-			   const int size)
-  { 
-    // Draw from a categorical distribution
-    // This function does not require a normalized probability vector.
-    double u = R::unif_rand() * total;
-    double temp = 0.0;
-    int index = 0;
-    for (int ii = 0; ii < size; ii++) {
-      temp += prob(ii);
-      if (u < temp) {
-        index = ii;
-        break;
-      }
-    }
-    return index;
-  }
-
-
-double mylgamma(const double x){
-    // gammaln_val = 0.0;
-    // gammaln_val = lgamma(x);
-    
-    // Good approximation when x > 1
-    //    x > 1: max abs err: 2.272e-03
-    //    x > 0.5: 0.012
-    //    x > 0.6: 0.008
-    // Abramowitz and Stegun p.257
-    
-    if(x < 0.6)
-      return (lgamma(x));
-    else
-      return ((x-0.5)*log(x) - x + 0.91893853320467 + 1/(12*x));
-  };
diff --git a/src/aux_funs.h b/src/aux_funs.h
deleted file mode 100644
index 2786951..0000000
--- a/src/aux_funs.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __aux_funs__
-#define __aux_funs__
-
-#include <RcppEigen.h>
-inline int rand_wrapper(const int n); 
-int rcat_without_normalize(Eigen::VectorXd& prob,
-		       const double total,
-		       const int size);
-
-double mylgamma(const double x);
-
-
-#endif
diff --git a/src/fit.cpp b/src/fit.cpp
deleted file mode 100644
index 268890f..0000000
--- a/src/fit.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-#include <RcppEigen.h>
-#include "XName.h"
-#include "keyWRU.h"
-
-// [[Rcpp::plugins(cpp11)]]
-// [[Rcpp::depends(RcppEigen)]]
-// [[Rcpp::depends(RcppProgress)]]
-
-//' Collapsed Gibbs sampler for keyWRU. Internal function
-//'
-//' @param data A list with the following elements
-//' \itemize{
-//'  \item{name_type_n}{Number of name types}
-//'  \item{race_n}{Number of races}
-//'  \item{geo_n}{Number of geolocations}
-//'  \item{geo_race_table}{Matrix of conditional probabilities Pr(Race | Geolocation), with geolocations in the rows}
-//'  \item{voters_per_geo}{Number of voterfile records per geolocation}
-//'  \item{race_inits}{Table of initial race assignments per voterfile record}
-//'  \item{name_data}{
-//'                    \itemize{
-//'                     \item{n_unique_names}{Number of unique names}
-//'                     \item{record_name_id}{Name id corresponding to each voterfile record}
-//'                     \item{keynames}{Integer matrix of name id's used as keynames for each race (race in the columns)}
-//'                     \item{census_table}{Matrix of Pr(Name | Race), with races in the columns}
-//'                     \item{beta_prior}{Scalar prior for name-race symmetric Dirichlet distribution}
-//'                     \item{gamma_prior}{Vector prior shapes for keyname/non-keyname Beta mixture}
-//'                    }
-//'                  }
-//' }
-//' @param ctrl A list of control arguments; see \code{co_cluster} function for details.
-//'
-//' @keywords internal
-// [[Rcpp::export]]
-Rcpp::List keyWRU_fit(Rcpp::List data,
-                      Rcpp::List ctrl)
-{
-  keyWRU model(data, ctrl);
-  model.sample();
-  Rcpp::List res = model.return_obj();
-  return res;
-}
-
diff --git a/src/keyWRU.cpp b/src/keyWRU.cpp
deleted file mode 100644
index f12efa2..0000000
--- a/src/keyWRU.cpp
+++ /dev/null
@@ -1,267 +0,0 @@
-#include "keyWRU.h"
-
-using namespace Eigen;
-using namespace Rcpp;
-
-keyWRU::keyWRU(const List data,
-               const List ctrl) :
-  M_(as<int>(data["name_type_n"])),
-  R_(as<int>(data["race_n"])),
-  G_(as<int>(data["geo_n"])),
-  max_iter(as<int>(ctrl["iter"])),
-  thin(as<int>(ctrl["thin"])),
-  burnin(as<int>(ctrl["burnin"])),
-  llk_per(as<int>(ctrl["log_post_interval"])),
-  verbose(as<bool>(ctrl["verbose"])),
-  theta(as<MatrixXd>(data["geo_race_table"])),
-  geo_each_size(as<VectorXi>(data["voters_per_geo"])),
-  Races(as< std::vector<IntegerVector> >(data["race_inits"])),
-  check_in_sample(as<bool>(ctrl["fit_insample"]))
-{
-
-  //Initialize suff. stat for race count and race sample storage
-  n_r.resize(R_);
-  n_r.setZero();
-  RaceSamples.resize(G_);
-  for (int ii = 0; ii < G_; ++ii) { //iterate over geos
-    (RaceSamples[ii]).resize(geo_each_size[ii], R_);
-    (RaceSamples[ii]).setZero();
-    for (int jj = 0; jj < geo_each_size[ii]; ++jj) { //iterate over names
-      n_r(Races[ii][jj])++;
-    }
-  }
-  //Construct list of name objects
-  const List& all_name_data = as<List>(data["name_data"]);
-  CharacterVector all_types = all_name_data.names();
-  //XName name;
-  for(int m = 0; m < M_; ++m){
-    //name = XName(all_name_data[m], ctrl, G_, R_, n_r, Races, all_types[m]);
-    names.push_back(XName(all_name_data[m], ctrl, G_, R_, n_r, Races, all_types[m]));
-  }
-
-  int c_val = 0, r_, w_;
-  for (int ii = 0; ii < G_; ++ii) {
-    geo_id_ = ii;
-    geo_size = geo_each_size[geo_id_];
-    // Iterate over each record in the geographic loc.
-    for (int jj = 0; jj < geo_size; ++jj) {
-      Rcpp::checkUserInterrupt();
-      voter_ = jj;
-      //Init name-specific suff stats
-      for(int m = 0; m < M_; ++m){
-        r_ = Races[ii][jj]; w_ = names[m].W[ii][jj];
-        //initialize C (0 for non-keyword names, bern(0.7) otherwise)
-        if(w_ < names[m].max_kw){
-          c_val =  R::rbinom(1, 0.7);
-          names[m].C[ii][jj] = c_val;
-        }
-        //Init n_rc suff. stat  matrix
-        names[m].n_rc(r_, c_val)++;
-        //Init n_wr suff. stat matrix
-        if(!c_val){
-          names[m].n_wr(w_, r_)++;
-        }
-      }
-    }
-  }
-
-
-
-  // Initialize all placeholders
-  geo_id_ = 0; geo_size = 0; w = 0; r = 0;
-  c = 0; new_r = 0; voter_ = 0; N_ = 0; n_samp = 0;
-  numerator = 0.0; denominator = 1.0;
-  sum_r = 1.0; n_rc = 0.0;
-  r_prob_vec.resize(R_);
-
-
-
-
-  //If testing in sample fit
-  if(check_in_sample){
-    race_match.resize(G_);
-    for (int ii = 0; ii < G_; ++ii) { //iterate over geos
-      (race_match[ii]).resize(geo_each_size[ii]);
-      for (int jj = 0; jj < geo_each_size[ii]; ++jj) { //iterate over names
-        race_match[ii][jj] = 0;
-      }
-    }
-    obs_race = as< std::vector<IntegerVector> >(data["race_obs"]);
-  }
-
-}
-
-int keyWRU::sample_r(int voter,
-                     int geo_id)
-{
-  // Extract current race
-  r = Races[geo_id][voter];
-
-  //remove data
-  n_r(r)--;
-  for(int m = 0; m < M_; ++m){
-    names[m].n_rc(r, (names[m].C[geo_id])[voter])--;
-    if(!(names[m].C[geo_id][voter])){
-      names[m].n_wr((names[m].W[geo_id])[voter], r)--;
-    }
-  }
-
-  for(int k = 0; k < R_; ++k){
-    numerator = 0.0;
-    denominator = 0.0;
-    for(int m = 0; m < M_; ++m){
-      const XName& name = names[m];
-      w = (name.W[geo_id])[voter];
-      c = (name.C[geo_id])[voter];
-      n_rc = name.n_rc(k, c);
-      numerator += log(n_rc + name.gamma_prior[c])
-        + (c ? log(name.phi_tilde(w, k)) : log(name.n_wr(w, k) + name.beta_w));
-      denominator += log(n_r(k) + name.gamma_prior.sum())
-        + (c ? 0.0 : log(n_rc + ((double)(name.N_) * name.beta_w)));
-    }
-    r_prob_vec(k) = theta(geo_id, k) * exp(numerator - denominator);
-  }
-  sum_r = r_prob_vec.sum();
-  new_r = rcat_without_normalize(r_prob_vec,
-                                 sum_r,
-                                 R_); // Cat(r_prob_vec/sum_r)
-
-
-  //Add counts back in
-  n_r(new_r)++ ;
-  for(int m = 0; m < M_; ++m){
-    names[m].n_rc(new_r, (names[m].C[geo_id])[voter])++ ;
-    if(!(names[m].C[geo_id][voter])){
-      names[m].n_wr((names[m].W[geo_id])[voter], new_r)++ ;
-    }
-  }
-
-
-  return new_r;
-}
-
-void keyWRU::iteration_single(int it)
-{
-  //geo_indeces = shuffle_indeces(G_); // shuffle geo locs
-  for (int ii = 0; ii < G_; ++ii) {
-    geo_id_ = ii;//geo_indeces[ii];
-    geo_size = geo_each_size[geo_id_];
-    //record_indeces = shuffle_indeces(geo_size); //shuffle records
-    // Iterate over each record in the geographic loc.
-    for (int jj = 0; jj < geo_size; ++jj) {
-      Rcpp::checkUserInterrupt();
-      voter_ = jj;//record_indeces[jj];
-      //Sample race
-      Races[geo_id_][voter_] = sample_r(voter_, geo_id_);
-      if((it+1) > burnin){
-        //Store sampled race
-        RaceSamples[geo_id_](voter_, Races[geo_id_][voter_])++;
-        if(check_in_sample){
-          race_match[geo_id_][voter_] += (Races[geo_id_][voter_] == obs_race[geo_id_][voter_]);
-        }
-      }
-      //Sample mixture component
-      for(int m = 0; m < M_; ++m){
-        names[m].sample_c(Races[geo_id_][voter_], voter_, geo_id_);
-      }
-    }
-  }
-}
-
-
-List keyWRU::return_obj()
-{
-  List phi_hat;
-  MatrixXd phi_mat;
-  for(int m = 0; m < M_; ++m){
-    phi_mat = names[m].getPhiHat();
-    phi_mat /= n_samp;
-    phi_hat.push_back(phi_mat, names[m].type);
-  }
-  List res;
-  res["phi"] = phi_hat;
-  res["ll"] = model_fit;
-  if(check_in_sample){
-    res["r_insample"] = race_match;
-  }
-  res["predict_race"] = getRHat();
-  return res;
-}
-
-void keyWRU::sample()
-{
-  // Set progress bar up
-  Progress progress_bar(max_iter, verbose);
-
-  for (int iter = 0; iter < max_iter; ++iter) {
-    // Run iteration
-    iteration_single(iter);
-
-    // Store samples and measures of model fit
-    int r_index = iter + 1;
-    if(r_index > burnin){
-      if ((r_index % llk_per) == 0 || r_index == 1 || r_index == max_iter) {
-        mfit_store();
-      }
-      if ((r_index % thin) == 0 || r_index == 1 || r_index == max_iter) {
-        phihat_store();
-        n_samp++;
-      }
-    }
-
-    // Progress bar
-    progress_bar.increment();
-
-    // Check keybord interruption to cancel the iteration
-    checkUserInterrupt();
-  }
-}
-
-double keyWRU::loglik_total()
-{
-  double loglik = 0.0, beta_w;
-  int N_;
-  for (int r = 0; r < R_; ++r) {
-    for( int m = 0; m < M_; ++m){
-      loglik += mylgamma(names[m].n_rc(r, 1) + names[m].gamma_prior[0])
-      + mylgamma(names[m].n_rc(r, 0) + names[m].gamma_prior[1]);
-      loglik -= mylgamma(n_r[r] + names[m].gamma_prior.sum());
-      N_ = names[m].N_;
-      beta_w = names[m].beta_w;
-      // Possible omp pragma here
-      for(int w = 0; w < N_; ++w){
-        loglik+=mylgamma(names[m].n_wr(w, r) + beta_w);
-      }
-      loglik -= mylgamma(names[m].n_rc(r, 0)
-                           + ((double)(names[m].N_) * beta_w));
-    }
-  }
-  return loglik;
-}
-
-
-
-void keyWRU::mfit_store()
-{
-  // Store likelihood during the sampling
-  double loglik = loglik_total();
-  model_fit.push_back(loglik);
-}
-
-void keyWRU::phihat_store()
-{
-  // Store expected distribution over races for given name
-  for(int m = 0; m < M_; ++m){
-    names[m].phihat_store();
-  }
-}
-
-List keyWRU::getRHat()
-{
-  return(wrap(RaceSamples));
-}
-
-
-
-
-
diff --git a/src/keyWRU.h b/src/keyWRU.h
deleted file mode 100644
index 00aab12..0000000
--- a/src/keyWRU.h
+++ /dev/null
@@ -1,108 +0,0 @@
-#ifndef __keyWRU__
-#define __keyWRU__
-
-#include <RcppEigen.h>
-#include <vector>
-#include <progress.hpp>
-#include <progress_bar.hpp>
-#include "XName.h"
-#include "aux_funs.h"
-
-
-using namespace Eigen;
-using namespace Rcpp;
-
-class keyWRU
-{
-  // This is a header file for "WRU" class.
-  // keyWRU class includes variables and functions
-  // that appear throughout the keyWRU models.
-  // Potential extensions inherit keyWRU and add model specific
-  // functions.
-  //
-  // keyWRU uses virtual inheritance.
-  // keyWRU is friends with the XName class,
-  // which contains First-, Last-, etc. name
-  // specific data and sampling methods
-
-public:
- 
-  //Constructor/destructor    
-  keyWRU(const List data,
-	 const List ctrl
-	 );
-
-   //Methods
-  void sample();
-  List return_obj();  
-  
-private:
-   //Data
-
-  const int M_ // number of name types 
-     , R_ // number of races
-    , G_ // number of locations
-    , max_iter // number of MCMC iterations
-    , thin // thinning interval
-    , burnin
-    , llk_per // log_posterior store interval
-    ;
- 
-  const bool verbose;
-  const MatrixXd theta;
-  const VectorXi geo_each_size; //
-  std::vector<IntegerVector> Races;
-  std::vector<MatrixXi> RaceSamples;
-  
-  bool check_in_sample;
-
-  Rcpp::NumericVector model_fit;
-
-  //Suff. Stat, race count
-  VectorXd n_r;
-
-
-  //All name components
-  std::vector<XName> names;
-
-  int geo_id_ // location id placeholder
-    , geo_size // Nr. of voters in location placeholder
-    , w // name placeholder
-    , r //race placeholder
-    , c // mixture component placeholder
-    , new_r // new race placeholder
-    , voter_ // voter id placeholder
-    , N_ // number of unique names placeholder
-    , n_samp
-    ;
-
-  //Intermediate computation placeholders 
-  double numerator
-    , denominator 
-    , sum_r
-    , n_rc // race-component suff. stat. placeholder 
-    ;
-  VectorXd r_prob_vec;
-
-  
-  
-  std::vector<int> geo_indeces // vector of shuffled locations
-    , record_indeces // vector of shuffl
-    ;
-  
-  std::vector< VectorXi > race_match;
-  std::vector<IntegerVector> obs_race;
-   // Methods
-  int sample_r(int voter,
-	       int geo_id);
-  void iteration_single(int it);
-  double loglik_total();
-  void mfit_store();
-  void phihat_store();
-  List getRHat();
-  
-};
-  
-  
-#endif
-  
diff --git a/tests/testthat/test-all.R b/tests/testthat/test-all.R
index 4522132..77e1d73 100644
--- a/tests/testthat/test-all.R
+++ b/tests/testthat/test-all.R
@@ -8,11 +8,11 @@ data(voters)
 
 test_that("Tests surname only predictions", {
   set.seed(12345)
-
+  
   # Prediction using surname only
   x <- suppressWarnings(predict_race(voter.file = voters, surname.only = TRUE))
   # Test and confirm prediction output is as expected
-  expect_equal(dim(x), c(10, 20))
+  expect_equal(dim(x), c(10, 18))
   expect_equal(sum(is.na(x)), 0)
   expect_equal(round(x[x$surname == "Khanna", "pred.whi"], 4), 0.0676)
   expect_equal(round(x[x$surname == "Johnson", "pred.his"], 4), 0.0236)
@@ -25,10 +25,10 @@ test_that("Tests predictions using the Census object", {
   if (!is.null(k)) {
     # Remove two NY cases from dataset to reduce the amount of the computation in the following test
     voters.dc.nj <- voters[c(-3, -7), ]  
-  
+    
     # Create Census data object covering DC and NJ
     census.dc.nj <- get_census_data(key = k, state = c("DC", "NJ"), census.geo = "tract", age = TRUE, sex = FALSE)  
-  
+    
     # Prediction using the Census object created in the previous step; tract-level statistics used in prediction
     x = predict_race(voter.file = voters.dc.nj, census.geo = "tract", census.data = census.dc.nj, age = TRUE, sex = FALSE, party = "PID")
     # test and comfirm the prediction output as expected
@@ -55,7 +55,7 @@ test_that("Tests predictions using the Census object", {
     expect_equal(sum(x$surname == "Johnson"), 0)
     expect_equal(round(x[x$surname == "Khanna", "pred.whi"], 4), 0.0441)
     expect_equal(round(x[x$surname == "Morse", "pred.his"], 4), 0.0163)
-  
+    
     # Prediction using the Census object built in the previous step; tract-level statistics used in prediction
     x = predict_race(voter.file = voters.dc.nj, census.geo = "tract", census.data = censusObj2, party = "PID", age = TRUE, sex = FALSE)  # Pr(Race | Surname, Tract, Party)
     # Test and confirm prediction output is as expected
@@ -79,7 +79,7 @@ test_that("Tests predictions using Census API key", {
     expect_equal(sum(x$surname == "Johnson"), 1)
     expect_equal(round(x[x$surname == "Khanna", "pred.whi"], 4), 0.0819)
     expect_equal(round(x[x$surname == "Morse", "pred.his"], 4), 0.0034)
-
+    
     # Prediction using a valid Census API key; place-level statistics used
     x = predict_race(voter.file = voters, census.geo = "place", census.key = k, sex = TRUE)
     # Test and confirm prediction output is as expected
@@ -103,7 +103,7 @@ test_that("Tests predictions using Census API key", {
     expect_equal(round(x[x$surname == "Morse", "pred.his"], 4), 0.0123)
   }
 }) 
-  
+
 test_that("Tests predictions using Census data from a different year", {
   set.seed(12345)
   
@@ -118,4 +118,3 @@ test_that("Tests predictions using Census data from a different year", {
     expect_equal(round(x[x$surname == "Morse", "pred.his"], 4), 0.0023)
   }
 })
-