Skip to content

Commit 51c08fb

Browse files
committed
CFVariable::summarise() method, and same forCFArray. Multiple other improvements.
1 parent 772f743 commit 51c08fb

39 files changed

+899
-466
lines changed

DESCRIPTION

+2-1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Imports:
2424
Collate:
2525
'AOI.R'
2626
'AOImethod.R'
27+
'CFArray.R'
2728
'CFAuxiliaryLongLat.R'
2829
'CFAxis.R'
2930
'CFAxisCharacter.R'
@@ -35,14 +36,14 @@ Collate:
3536
'CFAxisTime.R'
3637
'CFAxisVertical.R'
3738
'CFBounds.R'
38-
'CFData.R'
3939
'CFDataset.R'
4040
'CFGridMapping.R'
4141
'CFLabel.R'
4242
'NCObject.R'
4343
'CFObject.R'
4444
'CFResource.R'
4545
'CFVariable.R'
46+
'CFVariableBase.R'
4647
'CFVariableL3b.R'
4748
'NCDimension.R'
4849
'NCGroup.R'

NAMESPACE

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ S3method(dimnames,CFVariable)
1212
S3method(groups,CFDataset)
1313
S3method(names,CFDataset)
1414
S3method(str,CFDataset)
15+
export(CFArray)
1516
export(CFAuxiliaryLongLat)
1617
export(CFAxisCharacter)
1718
export(CFAxisDiscrete)
@@ -22,7 +23,6 @@ export(CFAxisScalar)
2223
export(CFAxisTime)
2324
export(CFAxisVertical)
2425
export(CFBounds)
25-
export(CFData)
2626
export(CFDataset)
2727
export(CFGridMapping)
2828
export(CFLabel)

NEWS.md

+19-7
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,28 @@
11
# ncdfCF (development version)
22

3-
- `CFData` objects can now be written to a netCDF file.
4-
- Method `CFData$summarise()` summarises the temporal dimension of a data
5-
object to a lower resolution, using the specific calendar of the temporal
6-
dimension and returns a new `CFData` object with the summarised data.
7-
- Method `CFData$data.table()` exports a data object to a `data.table`.
8-
- `CFVariable` and `CFData` classes now have `time()` method to retrieve the
9-
`CFTime` instance of a "time" axis, if present.
3+
- `CFData` has been renamed `CFArray` to more accurately describe its contents.
4+
- `CFArray` objects can now be written to a netCDF file.
5+
- Methods `CFVariable$summarise()` and `CFArray$summarise()` summarise the
6+
temporal dimension of a data object to a lower resolution using a user-supplied
7+
function, using the specific calendar of the temporal dimension and returning
8+
a new `CFArray` object with the summarised data for every return value of a
9+
call to the function, i.e. the function may have multiple return values. The
10+
`CFArray` version is much faster (because all data has been read already),
11+
but the `CFVariable` version can also summarise data variables that are too
12+
big to fit into the available memory entirely. In either case, code is
13+
optimized compared to the R base version so an operation over the "time"
14+
dimension of a data array is about twice as fast as using the base R
15+
`apply(X, MARGIN, tapply, INDEX, FUN, ...)` call.
16+
- Method `CFArray$data.table()` exports a data object to a `data.table`.
17+
- `CFVariable` and `CFArray` classes now have `time()` method to retrieve the
18+
"time" axis or its `CFTime` instance, if present.
1019
- `CFAxis` has new `coordinates` field with which to retrieve the coordinates
1120
along the axis.
1221
- New attributes can be defined on any object that supports attributes, or
1322
deleted.
23+
- Fixed error on reading bounds for auxiliary coordinate variables. Various
24+
other minor code fixes.
25+
- `CFResource` fixed to conform to new `R6` version.
1426

1527
# ncdfCF 0.3.0
1628

R/CFData.R R/CFArray.R

+38-147
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
# FIXME: summarise() returns new CFData in same group
2-
31
#' Data extracted from a CF data variable
42
#'
53
#' @description This class holds the data that is extracted from a [CFVariable],
@@ -24,8 +22,7 @@
2422
#' must be installed for this to work.
2523
#'
2624
#' The temporal dimension of the data, if present, may be summarised using the
27-
#' `summarise()` method. The data is returned as an array in the standard R
28-
#' format.
25+
#' `summarise()` method. The data is returned as a new `CFArray` instance.
2926
#'
3027
#' In general, the metadata from the netCDF resource will be lost when
3128
#' exporting to a different format insofar as those metadata are not
@@ -34,31 +31,24 @@
3431
#' @docType class
3532
#'
3633
#' @export
37-
CFData <- R6::R6Class("CFData",
38-
inherit = CFObject,
34+
CFArray <- R6::R6Class("CFArray",
35+
inherit = CFVariableBase,
3936
private = list(
40-
# Return the order of dimensional axes that "receive special treatment".
41-
# Scalar axes are not considered here.
42-
YXZT = function() {
43-
orient <- sapply(1:length(dim(self$value)), function(x) self$axes[[x]]$orientation)
44-
match(c("Y", "X", "Z", "T"), orient, nomatch = 0L)
45-
},
46-
47-
# Orient self$value in such a way that it conforms to regular R arrays: axis
37+
# Orient self$values in such a way that it conforms to regular R arrays: axis
4838
# order will be Y-X-Z-T-others and Y values will go from the top to the bottom.
4939
# Returns a new array.
5040
orient = function() {
5141
order <- private$YXZT()
5242
if (sum(order) == 0L) {
5343
warning("Cannot orient data array because axis orientation has not been set")
54-
return(self$value)
44+
return(self$values)
5545
}
5646
if (all(order == 1L:4L))
57-
out <- self$value
47+
out <- self$values
5848
else {
59-
all_dims <- seq(length(dim(self$value)))
49+
all_dims <- seq(length(dim(self$values)))
6050
perm <- c(order[which(order > 0L)], all_dims[!(all_dims %in% order)])
61-
out <- aperm(self$value, perm)
51+
out <- aperm(self$values, perm)
6252
}
6353

6454
# Flip Y-axis, if necessary
@@ -74,31 +64,28 @@ CFData <- R6::R6Class("CFData",
7464
}
7565

7666
out
67+
},
68+
69+
# Get all the data values
70+
get_values = function() {
71+
self$values
72+
},
73+
74+
# Internal apply/tapply over the temporal dimension.
75+
process_data = function(tdim, fac, fun, ...) {
76+
.process.data(self$values, tdim, fac, fun, ...)
7777
}
7878
),
7979
public = list(
80-
#' @field value The data of this object. The structure of the data depends
81-
#' on the method that produced it. Typical structures are an array or a
82-
#' `data.table`.
83-
value = NULL,
84-
85-
#' @field axes List of instances of classes descending from [CFAxis] that
86-
#' are the axes of the data object. If there are any scalar axes, they are
87-
#' listed after the axes that associate with the dimensions of the data.
88-
#' (In other words, axes `1..n` describe the `1..n` data dimensions, while
89-
#' any axes `n+1..m` are scalar axes.)
90-
axes = list(),
91-
92-
#' @field crs An instance of [CFGridMapping] or `NULL` when no grid mapping
93-
#' is available.
94-
crs = NULL,
80+
#' @field values The data of this object.
81+
values = NULL,
9582

9683
#' @description Create an instance of this class.
9784
#' @param name The name of the object.
9885
#' @param group The group that this data should live in. This is usually an
9986
#' in-memory group, but it could be a regular group if the data is
10087
#' prepared for writing into a new netCDF file.
101-
#' @param value The data of this object. The structure of the data depends
88+
#' @param values The data of this object. The structure of the data depends
10289
#' on the method that produced it.
10390
#' @param axes A `list` of [CFAxis] descendant instances that describe the
10491
#' axes of the argument `value`.
@@ -107,33 +94,31 @@ CFData <- R6::R6Class("CFData",
10794
#' @param attributes A `data.frame` with the attributes associated with the
10895
#' data in argument `value`.
10996
#' @return An instance of this class.
110-
initialize = function(name, group, value, axes, crs, attributes) {
97+
initialize = function(name, group, values, axes, crs, attributes) {
11198
var <- NCVariable$new(-1L, name, group, "NC_FLOAT", 0L, NULL)
11299
var$attributes <- attributes
113-
super$initialize(var, group)
100+
super$initialize(var, group, axes, crs)
114101

115-
self$value <- value
116-
self$axes <- axes
117-
self$crs <- crs
102+
self$values <- values
118103
},
119104

120105
#' @description Print a summary of the data object to the console.
121106
print = function() {
122-
cat("<Data>", self$name, "\n")
107+
cat("<Data array>", self$name, "\n")
123108
longname <- self$attribute("long_name")
124109
if (!is.na(longname) && longname != self$name)
125110
cat("Long name:", longname, "\n")
126111

127-
if (all(is.na(self$value))) {
112+
if (all(is.na(self$values))) {
128113
cat("\nValues: -\n")
129-
cat(sprintf(" NA: %d (100%%)\n", length(self$value)))
114+
cat(sprintf(" NA: %d (100%%)\n", length(self$values)))
130115
} else {
131-
rng <- range(self$value, na.rm = TRUE)
116+
rng <- range(self$values, na.rm = TRUE)
132117
units <- self$attribute("units")
133118
if (is.na(units)) units <- ""
134119
cat("\nValues: [", rng[1L], " ... ", rng[2L], "] ", units, "\n", sep = "")
135-
NAs <- sum(is.na(self$value))
136-
cat(sprintf(" NA: %d (%.1f%%)\n", NAs, NAs * 100 / length(self$value)))
120+
NAs <- sum(is.na(self$values))
121+
cat(sprintf(" NA: %d (%.1f%%)\n", NAs, NAs * 100 / length(self$values)))
137122
}
138123

139124
cat("\nAxes:\n")
@@ -146,27 +131,13 @@ CFData <- R6::R6Class("CFData",
146131
self$print_attributes()
147132
},
148133

149-
#' @description Return the time object from the axis representing time.
150-
#' @param want Character string with value "axis" or "time", indicating
151-
#' what is to be returned.
152-
#' @return If `want = "axis"` the [CFAxisTime] axis; if `want = "time"` the
153-
#' `CFTime` instance of the axis, or `NULL` if the variable does not have a
154-
#' "time" dimension.
155-
time = function(want = "axis") {
156-
ndx <- sapply(self$axes, inherits, "CFAxisTime")
157-
if (any(ndx))
158-
if (want == "axis") self$axes[[which(ndx)]]
159-
else self$axes[[which(ndx)]]$time()
160-
else NULL
161-
},
162-
163134
#' @description Retrieve the data in the object exactly as it was produced
164135
#' by the operation on `CFVariable`.
165136
#' @return The data in the object. This is usually an `array` with the
166137
#' contents along axes varying.
167138
raw = function() {
168-
dimnames(self$value) <- self$dimnames
169-
self$value
139+
dimnames(self$values) <- self$dimnames
140+
self$values
170141
},
171142

172143
#' @description Retrieve the data in the object in the form of an R array,
@@ -176,7 +147,7 @@ CFData <- R6::R6Class("CFData",
176147
if (length(self$axes) < 2L)
177148
stop("Cannot create an array from data object with only one axis.", call. = FALSE)
178149

179-
dimnames(self$value) <- self$dimnames
150+
dimnames(self$values) <- self$dimnames
180151
private$orient()
181152
},
182153

@@ -215,7 +186,7 @@ CFData <- R6::R6Class("CFData",
215186
wkt <- if (is.null(self$crs)) .wkt2_crs_geo(4326L)
216187
else self$crs$wkt2(.wkt2_axis_info(self))
217188
arr <- self$array()
218-
numdims <- length(dim(self$value))
189+
numdims <- length(dim(self$values))
219190
dn <- dimnames(arr)
220191
if (numdims == 4L) {
221192
r <- terra::sds(arr, extent = ext, crs = wkt)
@@ -247,8 +218,7 @@ CFData <- R6::R6Class("CFData",
247218
exp <- expand.grid(lapply(self$axes, function(ax) ax$coordinates),
248219
KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE)
249220
dt <- as.data.table(exp)
250-
nm <- names(dt)
251-
dt[ , eval(self$name) := self$value]
221+
dt[ , eval(self$name) := self$values]
252222

253223
long_name <- self$attribute("long_name")
254224
if (is.na(long_name)) long_name <- ""
@@ -258,85 +228,6 @@ CFData <- R6::R6Class("CFData",
258228
dt
259229
},
260230

261-
#' @description Summarise the temporal dimension of the data, if present, to
262-
#' a lower resolution, using a user-supplied aggregation function.
263-
#' @param name Character string with the name for the summarised data.
264-
#' @param period The period to summarise to. Must be one of either "day",
265-
#' "dekad", "month", "quarter", "season", "year". A "quarter" is the
266-
#' standard calendar quarter such as January-March, April-June, etc. A
267-
#' "season" is a meteorological season, such as December-February,
268-
#' March-May, etc. (any December data is from the year preceding the
269-
#' January data). The period must be of lower resolution than the
270-
#' resolution of the time dimension.
271-
#' @param fun A function or a symbol or character string naming a function
272-
#' that will be applied to each grouping of data.
273-
#' @return A new `CFData` object in the same group as `self` with the
274-
#' summarised data.
275-
summarise = function(name, period, fun) {
276-
if (missing(name) || missing(period) || missing(fun))
277-
stop("Arguments 'name', 'period' and 'fun' are required.", call. = FALSE)
278-
if (!(period %in% c("day", "dekad", "month", "quarter", "season", "year")))
279-
stop("Argument 'period' has invalid value.", call. = FALSE)
280-
281-
# Find the time object, create the factor
282-
tax <- self$time("axis")
283-
if (is.null(tax))
284-
stop("No 'time' dimension found to summarise on.", call. = FALSE)
285-
fac <- try(tax$time()$factor(period), silent = TRUE)
286-
if (inherits(fac, "try-error"))
287-
stop("The time dimension is too short to summarise on.", call. = FALSE)
288-
289-
# Make a new time axis for the result
290-
new_tm <- attr(fac, "CFTime")
291-
var <- NCVariable$new(-1L, tax$name, self$group, "NC_DOUBLE", 1L, NULL)
292-
len <- length(new_tm)
293-
new_ax <- if (len == 1L)
294-
CFAxisScalar$new(self$group, var, "T", new_tm)
295-
else {
296-
dim <- NCDimension$new(-1L, tax$name, len, FALSE)
297-
CFAxisTime$new(self$group, var, dim, new_tm)
298-
}
299-
300-
# Summarise
301-
num_dim_axes <- length(dim(self$value))
302-
if (num_dim_axes == 1L) {
303-
dt <- tapply(self$value, fac, fun, na.rm = TRUE)
304-
ax <- new_ax
305-
} else {
306-
tm <- sum(private$YXZT() > 0L) # Test which oriented axes are present, T is the last one
307-
perm <- seq(num_dim_axes)
308-
dt <- apply(self$array(), perm[-tm], tapply, fac, fun, na.rm = TRUE)
309-
perm <- c(perm[2L:tm], 1L, perm[-(1L:tm)])
310-
dt <- aperm(dt, perm)
311-
312-
# Organise the axes
313-
ax <- self$axes
314-
ax[[tm]] <- new_ax
315-
316-
# Fix name of time dimension in dimnames
317-
dn <- dimnames(dt)
318-
axn <- names(dn)
319-
axn[tm] <- tax$name
320-
names(dn) <- axn
321-
dimnames(dt) <- dn
322-
}
323-
324-
# Attributes
325-
atts <- self$attributes
326-
# FIXME: set cell_methods
327-
328-
# Create the output
329-
out <- CFData$new(name, self$group, dt, ax, self$crs, atts)
330-
},
331-
332-
#' @description Plot a 2D slice of data to the display.
333-
#' @param ... Arguments passed to the `base::plot()` function.
334-
plot = function(...) {
335-
image(self$axes[["lon"]]$values, self$axes[["lat"]]$values, self$raw(),
336-
xlab = "Longitude", ylab = "Latitude", useRaster = T,
337-
xaxp = c(0, 360, 8), yaxp = c(-90, 90, 4))
338-
},
339-
340231
#' @description Save the data object to a netCDF file.
341232
#' @param fn The name of the netCDF file to create.
342233
#' @return Self, invisibly.
@@ -360,13 +251,13 @@ CFData <- R6::R6Class("CFData",
360251

361252
# Data variable
362253
# FIXME: Pack data
363-
dim_axes <- length(dim(self$value))
254+
dim_axes <- length(dim(self$values))
364255
axis_names <- sapply(self$axes, function(ax) ax$name)
365256
RNetCDF::var.def.nc(nc, self$name, self$NCvar$vtype, axis_names[1L:dim_axes])
366257
if (length(self$axes) > dim_axes)
367258
self$set_attribute("coordinates", "NC_CHAR", paste(axis_names[-(1L:dim_axes)]))
368259
self$write_attributes(nc, self$name)
369-
RNetCDF::var.put.nc(nc, self$name, self$value)
260+
RNetCDF::var.put.nc(nc, self$name, self$values)
370261

371262
RNetCDF::close.nc(nc)
372263
invisible(self)
@@ -376,7 +267,7 @@ CFData <- R6::R6Class("CFData",
376267
#' @field dimnames (read-only) Retrieve dimnames of the data object.
377268
dimnames = function(value) {
378269
if (missing(value)) {
379-
len <- length(dim(self$value))
270+
len <- length(dim(self$values))
380271
dn <- lapply(1:len, function(ax) dimnames(self$axes[[ax]]))
381272
names(dn) <- sapply(1:len, function(ax) self$axes[[ax]]$name)
382273
dn

R/CFAxis.R

+2-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ CFAxis <- R6::R6Class("CFAxis",
5151

5252
#' @description Prints a summary of the axis to the console. This method is
5353
#' typically called by the `print()` method of descendant classes.
54-
#' @param ... Ignored.
54+
#' @param ... Arguments passed on to other functions. Of particular interest
55+
#' is `width = ` to indicate a maximum width of attribute columns.
5556
#' @return `self`, invisibly.
5657
print = function(...) {
5758
cat("<", self$friendlyClassName, "> [", self$dimid, "] ", self$name, "\n", sep = "")

0 commit comments

Comments
 (0)