diff --git a/DESCRIPTION b/DESCRIPTION index 583b255..e1a6646 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: mrgda Title: Tools for Data Assembly -Version: 0.13.0 +Version: 0.13.0.9000 Authors@R: c( person(given = "Eric", family = "Anderson", email = "andersone@metrumrg.com", role = c("aut", "cre")), @@ -16,6 +16,7 @@ Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.3 Imports: + bslib, cli (>= 3.3.0), dplyr, glue (>= 1.6.2), @@ -36,7 +37,10 @@ Imports: stringr, knitr, readr, - tidyselect + shiny, + tidyselect, + ggplot2, + plotly Suggests: testthat (>= 3.1.10), withr (>= 2.5.0), diff --git a/NAMESPACE b/NAMESPACE index f4ec63e..a1e343d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,7 @@ export(explain) export(query_src_list) export(read_csv_dots) export(read_src_dir) +export(visualize_data) export(write_csv_dots) export(write_derived) import(rlang) diff --git a/NEWS.md b/NEWS.md index 9c74e7d..0d89eab 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,7 @@ ## New features and changes +- Added `visualize_data` function to allow users to investigate data sets. (#253) - `query_src_list` now searches across values in addition to column and label names. (#251) # mrgda 0.13.0 diff --git a/R/app_exit_hint.R b/R/app_exit_hint.R new file mode 100644 index 0000000..481d716 --- /dev/null +++ b/R/app_exit_hint.R @@ -0,0 +1,12 @@ +#' @noRd +show_app_exit_hint <- function(app_name) { + cli::cli_rule(left = paste("Launching", app_name)) + cli::cli_alert_info("{.strong How to close the app and free your R console:}") + cli::cli_ul() + cli::cli_li("EITHER: close the browser tab/window running {.emph {app_name}}") + cli::cli_li( + "OR: click the {.strong Stop} button (red stop sign) above the R Console in RStudio" + ) + cli::cli_end() + invisible() +} diff --git a/R/visualize-data.R b/R/visualize-data.R new file mode 100644 index 0000000..0c66816 --- /dev/null +++ b/R/visualize-data.R @@ -0,0 +1,385 @@ +#' Load the mrgda Shiny app +#' +#' Launch an interactive Shiny app for exploring a CSV data set. The Visualizer view +#' provides a scatter plot with selectable X/Y axes, optional color grouping, hover +#' fields based on the active axis/color/facet/filter selections, and ad hoc filters +#' for numeric ranges or categorical values. +#' +#' CSV files are read with [read_csv_dots()], so "." values are interpreted as `NA`. +#' If a YAML specification is supplied, it is loaded with [yspec::ys_load()] and +#' applied via [yspec::ys_factors()] to coerce labeled factors before rendering. +#' +#' @param .csv_path Path to a CSV data file. +#' @param .spec_path Optional path to a YAML specification file for factor metadata. +#' @return A Shiny app object. +#' @export +#' +#' @examples +#' \dontrun{ +#' visualize_data("analysis/adsl.csv") +#' visualize_data("analysis/adsl.csv", "analysis/adsl.yaml") +#' } +visualize_data <- function(.csv_path, .spec_path = NULL) { + if (!file.exists(.csv_path)) { + stop("`.csv_path` does not exist: ", .csv_path, call. = FALSE) + } + + data <- read_csv_dots(.csv_path) + + # Apply factors to data + if (!is.null(.spec_path)) { + + # Confirm spec path exists + if (!file.exists(.spec_path)) { + stop("`.spec_path` does not exist: ", .spec_path, call. = FALSE) + } + + # Load in spec + spec <- yspec::ys_load(.spec_path) + + data <- yspec::ys_add_factors(data, spec, .suffix = "") + } + + data_vars <- names(data) + default_x <- if (length(data_vars) > 0) data_vars[[1]] else "" + default_y <- if (length(data_vars) > 1) data_vars[[2]] else default_x + default_hover <- unique(c(default_x, default_y)) + + ui <- bslib::page_fluid( + theme = bslib::bs_theme(version = 5), + shiny::tabsetPanel( + shiny::tabPanel( + title = "General visualizer", + bslib::layout_sidebar( + sidebar = bslib::sidebar( + shiny::selectInput( + inputId = "x_var", + label = "X-axis", + choices = data_vars, + selected = default_x + ), + shiny::selectInput( + inputId = "y_var", + label = "Y-axis", + choices = data_vars, + selected = default_y + ), + shiny::selectInput( + inputId = "color_var", + label = "Color by", + choices = c("None" = "", data_vars), + selected = "" + ), + shiny::selectInput( + inputId = "facet_var", + label = "Facet by", + choices = c("None" = "", data_vars), + selected = "" + ), + shiny::conditionalPanel( + condition = "input.facet_var !== ''", + shiny::checkboxInput( + inputId = "facet_free_scales", + label = "Free facet axes", + value = FALSE + ) + ), + shiny::selectizeInput( + inputId = "hover_vars", + label = "Hover variables", + choices = data_vars, + selected = default_hover, + multiple = TRUE + ), + shiny::selectizeInput( + inputId = "filter_vars", + label = "Filter variables", + choices = data_vars, + multiple = TRUE + ), + shiny::uiOutput("filter_ui") + ), + shiny::uiOutput("scatter_plot_ui") + ) + ) + ) + ) + + server <- function(input, output, session) { + session$onSessionEnded(function() shiny::stopApp()) + + # show hint once the server has started + show_app_exit_hint("diffDashboard") + + filter_map <- shiny::reactive({ + vars <- input$filter_vars + if (is.null(vars) || length(vars) == 0) { + return(NULL) + } + ids <- make.names(vars, unique = TRUE) + stats::setNames(ids, vars) + }) + + output$filter_ui <- shiny::renderUI({ + vars <- input$filter_vars + if (is.null(vars) || length(vars) == 0) { + return(NULL) + } + mapping <- filter_map() + ui_list <- lapply(vars, function(var) { + id_base <- mapping[[var]] + filter_data <- data[[var]] + if (is.numeric(filter_data)) { + rng <- range(filter_data, na.rm = TRUE) + if (!all(is.finite(rng))) { + return(NULL) + } + shiny::sliderInput( + inputId = paste0("filter_num__", id_base), + label = paste0(var, " range"), + min = rng[1], + max = rng[2], + value = rng + ) + } else { + choices <- sort(unique(as.character(filter_data))) + shiny::selectizeInput( + inputId = paste0("filter_cat__", id_base), + label = paste0(var, " values"), + choices = choices, + selected = choices, + multiple = TRUE + ) + } + }) + do.call(shiny::tagList, ui_list) + }) + + auto_hover_vars <- shiny::reactive({ + vars <- c( + input$x_var, + input$y_var, + if (!is.null(input$color_var) && nzchar(input$color_var)) input$color_var else NULL, + if (!is.null(input$facet_var) && nzchar(input$facet_var)) input$facet_var else NULL, + input$filter_vars + ) + vars <- vars[!is.na(vars) & nzchar(vars)] + unique(vars) + }) + + shiny::observeEvent( + list( + input$x_var, + input$y_var, + input$color_var, + input$facet_var, + input$filter_vars + ), + { + selected <- unique(c(auto_hover_vars(), input$hover_vars)) + shiny::updateSelectizeInput( + session, + "hover_vars", + choices = data_vars, + selected = selected + ) + }, + ignoreInit = FALSE + ) + + filtered_data <- shiny::reactive({ + vars <- input$filter_vars + if (is.null(vars) || length(vars) == 0) { + return(data) + } + mapping <- filter_map() + plot_data <- data + for (var in vars) { + id_base <- mapping[[var]] + filter_data <- plot_data[[var]] + if (is.numeric(filter_data)) { + range_id <- paste0("filter_num__", id_base) + rng <- input[[range_id]] + if (is.null(rng) || length(rng) != 2) { + next + } + keep <- filter_data >= rng[1] & filter_data <= rng[2] + plot_data <- plot_data[keep, , drop = FALSE] + } else { + cat_id <- paste0("filter_cat__", id_base) + vals <- input[[cat_id]] + if (is.null(vals) || length(vals) == 0) { + plot_data <- plot_data[FALSE, , drop = FALSE] + next + } + keep <- as.character(filter_data) %in% vals + plot_data <- plot_data[keep, , drop = FALSE] + } + } + plot_data + }) + + plot_spec <- shiny::reactive({ + plot_data <- filtered_data() + shiny::req(input$x_var, input$y_var) + x <- plot_data[[input$x_var]] + y <- plot_data[[input$y_var]] + keep_xy <- !is.na(x) & !is.na(y) + plot_data <- plot_data[keep_xy, , drop = FALSE] + x <- x[keep_xy] + y <- y[keep_xy] + if (nrow(plot_data) == 0) { + return(list(empty = TRUE)) + } + hover_vars <- unique(c(auto_hover_vars(), input$hover_vars)) + hover_vars <- hover_vars[hover_vars %in% data_vars] + hover_text <- NULL + if (length(hover_vars) > 0) { + hover_df <- plot_data[, hover_vars, drop = FALSE] + hover_df[] <- lapply(hover_df, function(col) { + if (is.factor(col)) { + as.character(col) + } else { + col + } + }) + hover_text <- apply( + hover_df, + 1, + function(row) paste(paste0(hover_vars, ": ", row), collapse = "
") + ) + } + + color_data <- NULL + if (!is.null(input$color_var) && nzchar(input$color_var)) { + color_data <- plot_data[[input$color_var]] + } + + facet_data <- NULL + if (!is.null(input$facet_var) && nzchar(input$facet_var)) { + facet_data <- plot_data[[input$facet_var]] + } + + base_df <- data.frame( + x = x, + y = y, + hover_text = if (is.null(hover_text)) NA_character_ else hover_text, + stringsAsFactors = FALSE + ) + if (!is.null(color_data)) { + base_df$color <- if (is.factor(color_data)) as.character(color_data) else color_data + } + if (!is.null(facet_data)) { + facet_vals <- if (is.factor(facet_data)) as.character(facet_data) else facet_data + facet_vals[is.na(facet_vals)] <- "(Missing)" + base_df$facet <- facet_vals + } + + list( + empty = FALSE, + base_df = base_df, + has_color = !is.null(color_data), + has_facet = !is.null(facet_data), + tooltip = if (is.null(hover_text)) "x+y" else "text", + x_label = input$x_var, + y_label = input$y_var, + color_label = if (!is.null(color_data)) input$color_var else NULL, + facet_free_scales = isTRUE(input$facet_free_scales) + ) + }) + + build_plot <- function(plot_df, spec, facets_per_plot = NULL) { + gg_args <- list( + data = plot_df, + mapping = ggplot2::aes( + x = .data$x, + y = .data$y, + text = .data$hover_text + ) + ) + if (isTRUE(spec$has_color)) { + gg_args$mapping <- ggplot2::aes( + x = .data$x, + y = .data$y, + text = .data$hover_text, + color = .data$color + ) + } + + gg_plot <- do.call(ggplot2::ggplot, gg_args) + + ggplot2::geom_point() + + ggplot2::labs( + x = spec$x_label, + y = spec$y_label, + color = spec$color_label + ) + + ggplot2::theme_bw() + + ggplot2::theme( + panel.grid.minor = ggplot2::element_blank(), + panel.grid.major = ggplot2::element_line(color = "grey85"), + panel.background = ggplot2::element_rect(fill = "white"), + plot.background = ggplot2::element_rect(fill = "white") + ) + + if (isTRUE(spec$has_facet)) { + facet_scales <- if (isTRUE(spec$facet_free_scales)) "free" else "fixed" + gg_plot <- gg_plot + ggplot2::facet_wrap(~facet, ncol = 3, scales = facet_scales) + } + + plotly::ggplotly(gg_plot, tooltip = spec$tooltip) + } + + output$scatter_plot_ui <- shiny::renderUI({ + spec <- plot_spec() + if (isTRUE(spec$empty)) { + return( + shiny::div( + class = "text-muted", + "No rows remain after filtering for non-missing X/Y values." + ) + ) + } + if (!isTRUE(spec$has_facet)) { + output$scatter_plot_1 <- plotly::renderPlotly({ + build_plot(spec$base_df, spec) + }) + return(plotly::plotlyOutput("scatter_plot_1", height = "800px")) + } + + facet_levels <- unique(spec$base_df$facet) + facet_groups <- split( + facet_levels, + ceiling(seq_along(facet_levels) / 9) + ) + + ui_list <- lapply(seq_along(facet_groups), function(i) { + output_id <- paste0("scatter_plot_", i) + facet_subset <- facet_groups[[i]] + output[[output_id]] <- plotly::renderPlotly({ + plot_df <- spec$base_df[spec$base_df$facet %in% facet_subset, , drop = FALSE] + build_plot(plot_df, spec) + }) + plotly::plotlyOutput(output_id, height = "800px") + }) + + if (length(ui_list) > 1) { + tabs <- lapply(seq_along(ui_list), function(i) { + shiny::tabPanel( + title = paste0("Facets ", i), + ui_list[[i]] + ) + }) + do.call(shiny::tabsetPanel, tabs) + } else { + do.call(shiny::tagList, ui_list) + } + }) + } + + shiny::shinyApp( + ui = ui, + server = server, + options = list(.csv_path = .csv_path, .spec_path = .spec_path, + launch.browser = TRUE, quiet = TRUE) + ) +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 0528b02..bb41a48 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -28,3 +28,4 @@ reference: - explain - read_csv_dots - write_csv_dots + - visualize_data diff --git a/man/visualize_data.Rd b/man/visualize_data.Rd new file mode 100644 index 0000000..a1281ee --- /dev/null +++ b/man/visualize_data.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visualize-data.R +\name{visualize_data} +\alias{visualize_data} +\title{Load the mrgda Shiny app} +\usage{ +visualize_data(.csv_path, .spec_path = NULL) +} +\arguments{ +\item{.csv_path}{Path to a CSV data file.} + +\item{.spec_path}{Optional path to a YAML specification file for factor metadata.} +} +\value{ +A Shiny app object. +} +\description{ +Launch an interactive Shiny app for exploring a CSV data set. The Visualizer view +provides a scatter plot with selectable X/Y axes, optional color grouping, hover +fields based on the active axis/color/facet/filter selections, and ad hoc filters +for numeric ranges or categorical values. +} +\details{ +CSV files are read with \code{\link[=read_csv_dots]{read_csv_dots()}}, so "." values are interpreted as \code{NA}. +If a YAML specification is supplied, it is loaded with \code{\link[yspec:ys_load]{yspec::ys_load()}} and +applied via \code{\link[yspec:ys_factors]{yspec::ys_factors()}} to coerce labeled factors before rendering. +} +\examples{ +\dontrun{ +visualize_data("analysis/adsl.csv") +visualize_data("analysis/adsl.csv", "analysis/adsl.yaml") +} +} diff --git a/tests/testthat/test-visualize-data.R b/tests/testthat/test-visualize-data.R new file mode 100644 index 0000000..2332f7a --- /dev/null +++ b/tests/testthat/test-visualize-data.R @@ -0,0 +1,29 @@ +test_that("visualize_data validates input paths", { + + expect_error( + visualize_data("does-not-exist.csv"), + "`.csv_path` does not exist" + ) + + csv_path <- withr::local_tempfile(fileext = ".csv") + writeLines(c("a,b", "1,2"), csv_path) + + expect_error( + visualize_data(csv_path, "missing-spec.yaml"), + "`.spec_path` does not exist" + ) +}) + +test_that("visualize_data returns a shiny app with options", { + + csv_path <- withr::local_tempfile(fileext = ".csv") + writeLines(c("a,b", "1,2"), csv_path) + + app <- visualize_data(csv_path) + + expect_true(inherits(app, "shiny.appobj")) + expect_equal(app$options$.csv_path, csv_path) + expect_null(app$options$.spec_path) + expect_true(isTRUE(app$options$launch.browser)) + expect_true(isTRUE(app$options$quiet)) +})