Skip to content

Commit b29b831

Browse files
authored
Treatment options for zeroes in histograms (#6139)
* add `keep.zeroes` option * add test * document * add news bullet
1 parent 579e2d5 commit b29b831

File tree

4 files changed

+49
-1
lines changed

4 files changed

+49
-1
lines changed

NEWS.md

+1
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@
188188
* The ellipsis argument is now checked in `fortify()`, `get_alt_text()`,
189189
`labs()` and several guides (@teunbrand, #3196).
190190
* `stat_summary_bin()` no longer ignores `width` parameter (@teunbrand, #4647).
191+
* Added `keep.zeroes` argument to `stat_bin()` (@teunbrand, #3449)
191192

192193
# ggplot2 3.5.1
193194

R/stat-bin.R

+28-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@
2626
#' or left edges of bins are included in the bin.
2727
#' @param pad If `TRUE`, adds empty bins at either end of x. This ensures
2828
#' frequency polygons touch 0. Defaults to `FALSE`.
29+
#' @param keep.zeroes Treatment of zero count bins. If `"all"` (default), such
30+
#' bins are kept as-is. If `"none"`, all zero count bins are filtered out.
31+
#' If `"inner"` only zero count bins at the flanks are filtered out, but not
32+
#' in the middle.
2933
#' @eval rd_computed_vars(
3034
#' count = "number of points in bin.",
3135
#' density = "density of points in bin, scaled to integrate to 1.",
@@ -55,6 +59,7 @@ stat_bin <- function(mapping = NULL, data = NULL,
5559
closed = c("right", "left"),
5660
pad = FALSE,
5761
na.rm = FALSE,
62+
keep.zeroes = "all",
5863
orientation = NA,
5964
show.legend = NA,
6065
inherit.aes = TRUE) {
@@ -77,6 +82,7 @@ stat_bin <- function(mapping = NULL, data = NULL,
7782
pad = pad,
7883
na.rm = na.rm,
7984
orientation = orientation,
85+
keep.zeroes = keep.zeroes,
8086
...
8187
)
8288
)
@@ -89,6 +95,10 @@ stat_bin <- function(mapping = NULL, data = NULL,
8995
StatBin <- ggproto("StatBin", Stat,
9096
setup_params = function(self, data, params) {
9197
params$flipped_aes <- has_flipped_aes(data, params, main_is_orthogonal = FALSE)
98+
params$keep.zeroes <- arg_match0(
99+
params$keep.zeroes %||% "all",
100+
c("all", "none", "inner"), arg_nm = "keep.zeroes"
101+
)
92102

93103
has_x <- !(is.null(data$x) && is.null(params$x))
94104
has_y <- !(is.null(data$y) && is.null(params$y))
@@ -139,7 +149,7 @@ StatBin <- ggproto("StatBin", Stat,
139149
compute_group = function(data, scales, binwidth = NULL, bins = NULL,
140150
center = NULL, boundary = NULL,
141151
closed = c("right", "left"), pad = FALSE,
142-
breaks = NULL, flipped_aes = FALSE,
152+
breaks = NULL, flipped_aes = FALSE, keep.zeroes = "all",
143153
# The following arguments are not used, but must
144154
# be listed so parameters are computed correctly
145155
origin = NULL, right = NULL, drop = NULL) {
@@ -163,6 +173,14 @@ StatBin <- ggproto("StatBin", Stat,
163173
boundary = boundary, closed = closed)
164174
}
165175
bins <- bin_vector(data[[x]], bins, weight = data$weight, pad = pad)
176+
177+
keep <- switch(
178+
keep.zeroes,
179+
none = bins$count != 0,
180+
inner = inner_runs(bins$count != 0),
181+
TRUE
182+
)
183+
bins <- vec_slice(bins, keep)
166184
bins$flipped_aes <- flipped_aes
167185
flip_data(bins, flipped_aes)
168186
},
@@ -174,3 +192,12 @@ StatBin <- ggproto("StatBin", Stat,
174192
dropped_aes = "weight" # after statistical transformation, weights are no longer available
175193
)
176194

195+
inner_runs <- function(x) {
196+
rle <- vec_unrep(x)
197+
nruns <- nrow(rle)
198+
inner <- rep(TRUE, nruns)
199+
i <- unique(c(1, nruns))
200+
inner[i] <- inner[i] & rle$key[i]
201+
rep(inner, rle$times)
202+
}
203+

man/geom_histogram.Rd

+6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-stat-bin.R

+14
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,20 @@ test_that("stat_bin() provides width (#3522)", {
118118
expect_equal(out$xmax - out$xmin, rep(binwidth, 10))
119119
})
120120

121+
test_that("stat_bin(keep.zeroes) options work as intended", {
122+
p <- ggplot(data.frame(x = c(1, 2, 2, 3, 5, 6, 6, 7)), aes(x)) +
123+
scale_x_continuous(limits = c(-1, 9))
124+
125+
ld <- layer_data(p + geom_histogram(binwidth = 1, keep.zeroes = "all"))
126+
expect_equal(ld$x, -1:9)
127+
128+
ld <- layer_data(p + geom_histogram(binwidth = 1, keep.zeroes = "inner"))
129+
expect_equal(ld$x, c(1:7))
130+
131+
ld <- layer_data(p + geom_histogram(binwidth = 1, keep.zeroes = "none"))
132+
expect_equal(ld$x, c(1:3, 5:7))
133+
})
134+
121135
# Underlying binning algorithm --------------------------------------------
122136

123137
test_that("bins() computes fuzz with non-finite breaks", {

0 commit comments

Comments
 (0)