diff --git a/DESCRIPTION b/DESCRIPTION index e2cce4c..bedd454 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -80,7 +80,7 @@ SystemRequirements: quarto VignetteBuilder: quarto Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 Collate: 'conversion.R' 'MsBackendPython.R' diff --git a/NAMESPACE b/NAMESPACE index 29c6f31..32383f7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -50,6 +50,9 @@ importFrom(reticulate,py_run_string) importFrom(reticulate,py_set_attr) importFrom(reticulate,py_to_r) importFrom(reticulate,r_to_py) +importMethodsFrom(ProtGenerics,"intensity<-") +importMethodsFrom(ProtGenerics,"mz<-") +importMethodsFrom(ProtGenerics,"peaksData<-") importMethodsFrom(ProtGenerics,"spectraData<-") importMethodsFrom(ProtGenerics,acquisitionNum) importMethodsFrom(ProtGenerics,backendInitialize) diff --git a/NEWS.md b/NEWS.md index 38b1647..f1620c9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,6 +11,8 @@ - `backendInitialize()` with defined parameter `data` drops core spectra variables that contain only missing values. - Add `$<-` method for `MsBackendPy`. +- Add `peaksData()<-` for `MsBackendPy`. +- Add `intensity()<-` and `mz()<-` methods for `MsBackendPy`. ## Changes in 0.99.11 diff --git a/R/MsBackendPython.R b/R/MsBackendPython.R index 226557a..09518b4 100644 --- a/R/MsBackendPython.R +++ b/R/MsBackendPython.R @@ -26,8 +26,8 @@ #' the `setBackend()` method. Special care should also be given to parameter #' `spectraVariableMapping`, that defines which spectra variables should be #' considered/translated and how their names should or have to be converted -#' between R and Python. See the description for `backendInitialize()` for -#' details. +#' between R and Python. See the description for `backendInitialize()` and the +#' package vignette for details and examples. #' #' @details #' @@ -55,6 +55,12 @@ #' If for example data was transformed or metadata added or removed in the #' Python object, it immediately affects the `Spectra`/backend. #' +#' Any replacement operation uses internally the `spectraData()<-` method, +#' thus replacing/updating values for individual spectra variables or peaks +#' variables will first load the current data from Python to R, update or +#' replace the values and then store the full MS data again to the +#' referenced Python attribute. +#' #' @section `MsBackendPy` methods: #' #' The `MsBackendPy` supports all methods defined by the [Spectra::MsBackend()] @@ -91,6 +97,28 @@ #' instance of `MsBackendPy`. See examples below for different settings #' and conversion of spectra variables. #' +#' - `intensity()`, `intensity()<-`: get or replace the intensity values. +#' `intensity()` returns a `NumericList` of length equal to the number of +#' spectra with each element being the intensity values of the individual +#' mass peaks per spectrum. `intensity()<-` takes the same list-like +#' structure as input parameter. Both the number of spectra and the number of +#' peaks must match the length of the spectra and the number of existing mass +#' peaks. To change the number of peaks use the `peaksData()<-` method +#' instead that replaces the *m/z* and intensity values at the same time. +#' Calling `intensity()<-` will replace the full MS data (spectra variables +#' as well as peaks variables) of the associated Python variable. +#' +#' - `mz()`, `mz()<-`: get or replace the *m/z* values. `mz()` returns a +#' `NumericList` of length equal to the number of spectra with each element +#' being the *m/z* values of the individual mass peaks per spectrum. +#' `mz()<-` takes the same list-like structure as input parameter. Both the +#' number of spectra and the number of peaks must match the length of the +#' spectra and the number of existing mass peaks. To change the number of +#' peaks use the `peaksData()<-` method instead that replaces the *m/z* and +#' intensity values at the same time. +#' Calling `mz()<-` will replace the full MS data (spectra variables +#' as well as peaks variables) of the associated Python variable. +#' #' - `peaksData()`: extracts the peaks data matrices from the backend. Python #' code is applied to the data structure in Python to #' extract the *m/z* and intensity values as a list of (numpy) arrays. These @@ -98,6 +126,14 @@ #' Because Python does not allow to name columns of an array, an additional #' loop in R is required to set the column names to `"mz"` and `"intensity"`. #' +#' - `peaksData()<-`: replaces the full peaks data (i.e., *m/z* and intensity +#' values) for all spectra. Parameter `value` has to be a `list`-like +#' structure with each element being a `numeric` matrix with one column +#' (named `"mz"`) containing the spectrum's *m/z* and one column (named +#' `"intensity"`) with the intensity values. This method will replace the +#' full data of the associated Python variable (i.e., both the spectra as +#' well as the peaks data). +#' #' - `spectraData()`: extracts the spectra data from the backend. Which spectra #' variables are translated and retrieved from the Python objects depends on #' the backend's `spectraVariableMapping()`. All metadata names defined are @@ -520,9 +556,9 @@ setReplaceMethod("spectraData", "MsBackendPy", function(object, value) { #' #' @rdname MsBackendPy setMethod( - "peaksData", "MsBackendPy", function(object, - columns = c("mz", "intensity"), - drop = FALSE) { + "peaksData", + "MsBackendPy", + function(object, columns = c("mz", "intensity"), drop = FALSE) { if (length(object@py_var)) { res <- switch( object@py_lib, @@ -545,6 +581,24 @@ setMethod( } else list() }) +#' @importMethodsFrom ProtGenerics peaksData<- +#' +#' @rdname MsBackendPy +setReplaceMethod("peaksData", "MsBackendPy", function(object, value) { + Spectra:::.check_peaks_data_value(value, length(object)) + spd <- spectraData(object, union(names(spectraVariableMapping(object)), + peaksVariables(object))) + cns <- colnames(value[[1L]]) + for (cn in cns) { + vals <- lapply(value, "[", , cn) + if (cn %in% c("mz", "intensity")) + vals <- NumericList(vals, compress = FALSE) + spd[[cn]] <- vals + } + spectraData(object) <- spd + object +}) + #' @importMethodsFrom ProtGenerics extractByIndex setMethod("extractByIndex", c("MsBackendPy", "ANY"), function(object, i) { object@i <- object@i[i] @@ -602,6 +656,18 @@ setMethod("intensity", "MsBackendPy", function(object) { NumericList(peaksData(object, "intensity", drop = TRUE), compress = FALSE) }) +#' @importMethodsFrom ProtGenerics intensity<- +#' +#' @rdname MsBackendPy +setReplaceMethod("intensity", "MsBackendPy", function(object, value) { + .check_mz_intensity(value, length(object), lengths(object)) + spd <- spectraData(object, union(names(spectraVariableMapping(object)), + peaksVariables(object))) + spd[["intensity"]] <- value + spectraData(object) <- spd + object +}) + #' @importMethodsFrom ProtGenerics isolationWindowLowerMz setMethod("isolationWindowLowerMz", "MsBackendPy", function(object) { spectraData(object, "isolationWindowLowerMz", drop = TRUE) @@ -627,6 +693,18 @@ setMethod("mz", "MsBackendPy", function(object) { NumericList(peaksData(object, "mz", drop = TRUE), compress = FALSE) }) +#' @importMethodsFrom ProtGenerics mz<- +#' +#' @rdname MsBackendPy +setReplaceMethod("mz", "MsBackendPy", function(object, value) { + .check_mz_intensity(value, length(object), lengths(object)) + spd <- spectraData(object, union(names(spectraVariableMapping(object)), + peaksVariables(object))) + spd[["mz"]] <- value + spectraData(object) <- spd + object +}) + #' @importMethodsFrom ProtGenerics polarity setMethod("polarity", "MsBackendPy", function(object) { spectraData(object, "polarity", drop = TRUE) @@ -851,3 +929,25 @@ reindex <- function(object) { keep <- c(setdiff(colnames(x), svs), svs[keep]) x[, colnames(x) %in% keep, drop = FALSE] } + +#' helper to check input/validity of intensity or mz: has to be a list-like +#' structure with numeric vectors. +#' +#' @param x `list` or `NumericList`. +#' +#' @param l `integer(1)` with the number of spectra/expected elements in `x`. +#' +#' @param ls `integer` with the number of peaks, i.e., the lengths of the +#' numeric vectors. +#' +#' @noRd +.check_mz_intensity <- function(x, l = length(x), ls = lengths(x)) { + if (!(is.list(x) | inherits(x, "SimpleList"))) + stop("'value' has to be a list-like data structure.") + if (length(x) != l) + stop("length of 'value' has to match the number of spectra") + if (!all(lengths(x) == ls)) + stop("lengths(value) has to match the number of peaks per spectrum") + if (!all(vapply1l(x, is.numeric))) + stop("elements of 'value' are expected to be numeric vectors.") +} diff --git a/man/MsBackendPy.Rd b/man/MsBackendPy.Rd index bb5cac6..7b27669 100644 --- a/man/MsBackendPy.Rd +++ b/man/MsBackendPy.Rd @@ -8,8 +8,11 @@ \alias{spectraData,MsBackendPy-method} \alias{spectraData<-,MsBackendPy-method} \alias{peaksData,MsBackendPy-method} +\alias{peaksData<-,MsBackendPy-method} \alias{$,MsBackendPy-method} \alias{$<-,MsBackendPy-method} +\alias{intensity<-,MsBackendPy-method} +\alias{mz<-,MsBackendPy-method} \alias{spectraVariableMapping<-,MsBackendPy-method} \alias{spectraVariableMapping,MsBackendPy-method} \alias{reindex} @@ -36,10 +39,16 @@ \S4method{peaksData}{MsBackendPy}(object, columns = c("mz", "intensity"), drop = FALSE) +\S4method{peaksData}{MsBackendPy}(object) <- value + \S4method{$}{MsBackendPy}(x, name) \S4method{$}{MsBackendPy}(x, name) <- value +\S4method{intensity}{MsBackendPy}(object) <- value + +\S4method{mz}{MsBackendPy}(object) <- value + \S4method{spectraVariableMapping}{MsBackendPy}(object) <- value \S4method{spectraVariableMapping}{MsBackendPy}(object, value) @@ -118,8 +127,8 @@ if the backend of a \code{Spectra} object is changed to \code{MsBackendPy} using the \code{setBackend()} method. Special care should also be given to parameter \code{spectraVariableMapping}, that defines which spectra variables should be considered/translated and how their names should or have to be converted -between R and Python. See the description for \code{backendInitialize()} for -details. +between R and Python. See the description for \code{backendInitialize()} and the +package vignette for details and examples. } \details{ The \code{MsBackendPy} keeps only a reference to the MS data in Python (i.e. the @@ -144,6 +153,12 @@ the variable in Python. Thus, each time MS data is requested from the backend, it is retrieved in its \strong{current} state. If for example data was transformed or metadata added or removed in the Python object, it immediately affects the \code{Spectra}/backend. + +Any replacement operation uses internally the \verb{spectraData()<-} method, +thus replacing/updating values for individual spectra variables or peaks +variables will first load the current data from Python to R, update or +replace the values and then store the full MS data again to the +referenced Python attribute. } \section{\code{MsBackendPy} methods}{ @@ -181,12 +196,39 @@ representing the MS data in Python. It can be either \code{pythonLibrary = "spectrum_utils"}. The function returns an initialized instance of \code{MsBackendPy}. See examples below for different settings and conversion of spectra variables. +\item \code{intensity()}, \verb{intensity()<-}: get or replace the intensity values. +\code{intensity()} returns a \code{NumericList} of length equal to the number of +spectra with each element being the intensity values of the individual +mass peaks per spectrum. \verb{intensity()<-} takes the same list-like +structure as input parameter. Both the number of spectra and the number of +peaks must match the length of the spectra and the number of existing mass +peaks. To change the number of peaks use the \verb{peaksData()<-} method +instead that replaces the \emph{m/z} and intensity values at the same time. +Calling \verb{intensity()<-} will replace the full MS data (spectra variables +as well as peaks variables) of the associated Python variable. +\item \code{mz()}, \verb{mz()<-}: get or replace the \emph{m/z} values. \code{mz()} returns a +\code{NumericList} of length equal to the number of spectra with each element +being the \emph{m/z} values of the individual mass peaks per spectrum. +\verb{mz()<-} takes the same list-like structure as input parameter. Both the +number of spectra and the number of peaks must match the length of the +spectra and the number of existing mass peaks. To change the number of +peaks use the \verb{peaksData()<-} method instead that replaces the \emph{m/z} and +intensity values at the same time. +Calling \verb{mz()<-} will replace the full MS data (spectra variables +as well as peaks variables) of the associated Python variable. \item \code{peaksData()}: extracts the peaks data matrices from the backend. Python code is applied to the data structure in Python to extract the \emph{m/z} and intensity values as a list of (numpy) arrays. These are then translated into an R \code{list} of two-column \code{numeric} matrices. Because Python does not allow to name columns of an array, an additional loop in R is required to set the column names to \code{"mz"} and \code{"intensity"}. +\item \verb{peaksData()<-}: replaces the full peaks data (i.e., \emph{m/z} and intensity +values) for all spectra. Parameter \code{value} has to be a \code{list}-like +structure with each element being a \code{numeric} matrix with one column +(named \code{"mz"}) containing the spectrum's \emph{m/z} and one column (named +\code{"intensity"}) with the intensity values. This method will replace the +full data of the associated Python variable (i.e., both the spectra as +well as the peaks data). \item \code{spectraData()}: extracts the spectra data from the backend. Which spectra variables are translated and retrieved from the Python objects depends on the backend's \code{spectraVariableMapping()}. All metadata names defined are diff --git a/tests/testthat/test_MsBackendPython.R b/tests/testthat/test_MsBackendPython.R index 65fcf09..8ceab82 100644 --- a/tests/testthat/test_MsBackendPython.R +++ b/tests/testthat/test_MsBackendPython.R @@ -683,6 +683,86 @@ test_that(".drop_na_core_spectra_variables works", { expect_true(any(colnames(res) == "rtime")) }) +test_that("peaksData<-,MsBackendPy works", { + a <- setBackend( + s, MsBackendPy(), pythonVariableName = "pd_test", + spectraVariableMapping = c(INCHI = "inchi", + defaultSpectraVariableMapping()))@backend + spd <- spectraData(a) + svm <- a@spectraVariableMapping + pd <- peaksData(s) + expect_error(peaksData(a) <- pd[1:3], "match length") + pd <- lapply(pd, function(z) { + z[, 2L] <- z[, 2L] / 2 + z + }) + peaksData(a) <- pd + expect_equal(svm, a@spectraVariableMapping) + pd_2 <- peaksData(a) + expect_equal(pd, pd_2) + spd_2 <- spectraData(a) + expect_equal(colnames(spd), colnames(spd_2)) + expect_equal(lengths(spd$intensity), lengths(spd_2$intensity)) + expect_equal(spd$intensity / 2, spd_2$intensity) + spd$intensity <- NULL + spd_2$intensity <- NULL + expect_equal(spd, spd_2) +}) + +test_that(".check_mz_intensity works", { + mzs <- mz(s) + expect_silent(.check_mz_intensity(mzs, length(mzs), lengths(mzs))) + expect_silent(.check_mz_intensity(as.list(mzs), length(mzs), lengths(mzs))) + expect_error(.check_mz_intensity(3, 1, 1), "list-like") + expect_error(.check_mz_intensity(mzs, 1, 1), "number of spectra") + expect_error(.check_mz_intensity(mzs[1:3], 3, c(3, 3, 3)), + "number of peaks") + a <- list(c(1:4), c(1.1, 2.2, 3), c("a", "b")) + expect_error(.check_mz_intensity(a, 4, c(4, 3, 2)), "number of spectra") + expect_error(.check_mz_intensity(a, 3, c(4, 1, 2)), "number of peaks") + expect_error(.check_mz_intensity(a, 3, c(4, 3, 2)), "numeric vectors") +}) + +test_that("mz<-,MsBackendPy works", { + a <- setBackend( + s, MsBackendPy(), pythonVariableName = "mz_test", + spectraVariableMapping = c(INCHI = "inchi", + defaultSpectraVariableMapping()))@backend + spd <- spectraData(a) + mzs <- mz(a) + expect_equal(spd$mz, mzs) + mzs <- mzs / 3 + mz(a) <- mzs + expect_equal(mz(a), mzs) + expect_equal(a$INCHI, spd$INCHI) + mzs <- as.list(mzs) + mzs[[1L]] <- mzs[[1L]] * 2 + mz(a) <- mzs + expect_equal(as.list(mz(a)), mzs) + ## errors/issues + expect_error(mz(a) <- mzs[1:3], "match the number") +}) + +test_that("intensity<-,MsBackendPy works", { + a <- setBackend( + s, MsBackendPy(), pythonVariableName = "intensity_test", + spectraVariableMapping = c(INCHI = "inchi", + defaultSpectraVariableMapping()))@backend + spd <- spectraData(a) + ints <- intensity(a) + expect_equal(spd$intensity, ints) + ints <- ints / 3 + intensity(a) <- ints + expect_equal(intensity(a), ints) + expect_equal(a$INCHI, spd$INCHI) + ints <- as.list(ints) + ints[[1L]] <- ints[[1L]] * 2 + intensity(a) <- ints + expect_equal(as.list(intensity(a)), ints) + ## errors/issues + expect_error(intensity(a) <- ints[1:3], "match the number") +}) + ## Comments, thoughts TODO ## DONE spectraData()<-: replaces the full data and allows adding/removing ## spectra variables. number of spectra has to match. @@ -690,9 +770,11 @@ test_that(".drop_na_core_spectra_variables works", { ## spectraVariableMapping: we can use spectraDataMapping now to add ## additional columns - missing data will be dropped - and not registered ## in @spectraVariableMapping -## TODO $<- : replace the full data? -## TODO peaksData()<-: replace the full data? +## DONE $<- : replace the full data? +## DONE peaksData()<-: replace the full data? +## DONE mz()<- +## DONE intensity()<- ## TODO applyProcessing(): replace the full data? is that needed? should ## internally call peaksData()<- -## TODO all other replacement methods. +## DONE all other replacement methods. ## TODO support the Spectra unit test suite diff --git a/vignettes/SpectriPy.qmd b/vignettes/SpectriPy.qmd index 977b8aa..53799d2 100644 --- a/vignettes/SpectriPy.qmd +++ b/vignettes/SpectriPy.qmd @@ -631,6 +631,17 @@ sps$SMILES |> head() See also the next section for more information on the mapping between `Spectra`'s spectra variables and *matchms* metadata. +The `MsBackendPy` has full *read/write* support, i.e., it allows to add new +spectra variables or change existing spectra and/or peaks variables through the +available replacement methods `spectraData()<-`, `peaksData()<-`, +`intensity()<-`, `mz()<-` and `$<-`. Currently, this operations change however +the full MS data of the associated Python variable. Thus, even if for example +only the retention times are replaced using `$rtime<-`, the (full!) data is +first loaded from Python to R, the retention time values are then replaced and +finally the full data (spectra and peaks variables) are again stored to the +associated Python variable. Replacement operations are thus quite memory +demanding. + ## Conversion of spectra variables Conversion of the MS peaks data (i.e. the *m/z* and intensity values) is always