Skip to content

Commit

Permalink
Fix johannes feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
philouail committed Mar 15, 2024
1 parent 3b1bea1 commit 95bd882
Show file tree
Hide file tree
Showing 9 changed files with 315 additions and 203 deletions.
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ export(
"loadXcmsData",
"matchLamasChromPeaks",
"summarizeLamaMatch",
"rtMap"
"matchedRtimes"
)

## New analysis methods
Expand Down
34 changes: 7 additions & 27 deletions R/AllGenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,33 +61,13 @@ setGeneric("addProcessHistory", function(object, ...)
#' function.
#'
#' - `LamaParama`: This function performs retention time correction by aligning
#' chromatographic data to an external reference dataset (method by Carl
#' Brunius).The process involves identifying and aligning peaks within the
#' experimental chromatographic data, represented as an `XcmsExperiment`
#' object, to a predefined set of landmark features called "lamas". These
#' landmark features are characterized by their mass-to-charge ratio (m/z)
#' and retention time.
#'
#' The alignment algorithm matches chromatographic peaks from the experimental
#' data to the lamas, fitting a model based on this match to adjust their
#' retention times and minimize discrepancies. This adjustment is performed
#' file by file. Adjustable parameters such as `ppm`, `tolerance`, and
#' `toleranceRt` define acceptable deviations during the matching process.
#' It's crucial to note that only lamas and chromatographic peaks exhibiting a
#' one-to-one mapping are considered when estimating retention time shifts. If
#' a file has no peaks matching with lamas, no adjustment will be performed,
#' and the file will be returned as-is. Users can evaluate this matching, for
#' example, by checking the number of matches and ranges of the matching
#' peaks, by first running `[matchLamasChromPeaks()]`.
#'
#' Different warping methods are available; users can choose to fit a *loess*
#' (`method = "loess"`, the default) or a *gam* (`method = "gam"`) between the
#' reference data points and observed matching ChromPeaks. Additional
#' parameters such as `span`, `weight`, `outlierTolerance`, `zeroWeight`,
#' and `bs` are specific to these models. These parameters offer flexibility
#' in fine-tuning how the matching chromatographic peaks are fitted to the
#' lamas, thereby generating a model to align the overall retention time for
#' a single file.
#' chromatographic data to an external reference dataset (concept and initial
#' implementation by Carl Brunius). The process involves identifying and
#' aligning peaks within the experimental chromatographic data, represented
#' as an `XcmsExperiment` object, to a predefined set of landmark features
#' called "lamas". These landmark features are characterized by their
#' mass-to-charge ratio (m/z) and retention time. see [LamaParama()] for more
#' information on the method.
#'
#' @section Subset-based alignment:
#'
Expand Down
2 changes: 0 additions & 2 deletions R/DataClasses.R
Original file line number Diff line number Diff line change
Expand Up @@ -1467,8 +1467,6 @@ setClass("PeakGroupsParam",
else TRUE
})

#' @slot rtMap,nChrompeaks slots that should not be accessed by the user.
#' @rdname adjustRtime
setClass("LamaParama",
slots = c(lamas = "matrix",
method = "character",
Expand Down
8 changes: 3 additions & 5 deletions R/XcmsExperiment.R
Original file line number Diff line number Diff line change
Expand Up @@ -1361,7 +1361,7 @@ setMethod(
setMethod(
"adjustRtime", signature(object = "XcmsExperiment", param = "LamaParama"),
function(object, param, BPPARAM = bpparam(), ...) {
if(!hasChromPeaks(object))
if (!hasChromPeaks(object))
stop("'object' needs to have detected chromPeaks. ",
"Run 'findChromPeaks()' first")
if (hasAdjustedRtime(object))
Expand All @@ -1372,15 +1372,13 @@ setMethod(
"alignment.")
fidx <- as.factor(fromFile(object))
rt_raw <- split(rtime(object), fidx)
cp_raw <- split.data.frame(chromPeaks(object)[, c("mz", "rt")],
chromPeaks(object)[, "sample"])
idx <- seq_along(object)

# Check if user as ran matching lama vs chrompeaks beforehand
if(length(param@rtMap) == 0)
if (length(param@rtMap) == 0)
param <- matchLamasChromPeaks(object, param)
rtMap <- param@rtMap
if(length(rtMap) != length(object))
if (length(rtMap) != length(object))
stop("Mismatch between the number of files matched to lamas: ",
length(rtMap), "and files in the object: ", length(object))

Expand Down
216 changes: 151 additions & 65 deletions R/do_adjustRtime-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,133 @@ adjustRtimeSubset <- function(rtraw, rtadj, subset,
rtadj
}

###########################################################
###### LamaParama

#' @title Landmark-based alignment: aligning a dataset against an external
#' reference
#'
#' @aliases LamaParama-class
#'
#' @description
#' Alignment is achieved using the ['adjustRtime()'] method with a `param` of
#' class `LamaParama`. This method #' corrects retention time by aligning
#' chromatographic data with an external reference dataset. Peaks in the
#' experimental chromatographic data are aligned to predefined landmark
#' features called "lamas" based on their mass-to-charge ratio (m/z) and
#' retention time.
#'
#' This alignment algorithm matches chromatographic peaks from the experimental
#' data to the lamas, fitting a model based on this match to adjust their
#' retention times and minimize discrepancies. This adjustment is performed
#' file by file. Adjustable parameters such as `ppm`, `tolerance`, and
#' `toleranceRt` define acceptable deviations during the matching process.
#' It's crucial to note that only lamas and chromatographic peaks exhibiting a
#' one-to-one mapping are considered when estimating retention time shifts. If
#' a file has no peaks matching with lamas, no adjustment will be performed,
#' and the file will be returned as-is. Users can evaluate this matching, for
#' example, by checking the number of matches and ranges of the matching
#' peaks, by first running `[matchLamasChromPeaks()]`.
#'
#' Different warping methods are available; users can choose to fit a *loess*
#' (`method = "loess"`, the default) or a *gam* (`method = "gam"`) between the
#' reference data points and observed matching ChromPeaks. Additional
#' parameters such as `span`, `weight`, `outlierTolerance`, `zeroWeight`,
#' and `bs` are specific to these models. These parameters offer flexibility
#' in fine-tuning how the matching chromatographic peaks are fitted to the
#' lamas, thereby generating a model to align the overall retention time for
#' a single file.
#'
#' Other functions related to this method:
#'
#' - `LamaParama()`: create the parameter object for alignment using
#' `adjustRtime()` function. Is also the input for functions listed below.
#'
#' - `matchLamasChromPeaks()`: quickly matches each file's ChromPeaks
#' to Lamas, allowing the user to evaluate the matches for each file.
#'
#' - `summarizeLamaMatch()`: generates a summary of the `LamaParama` method.
#' See below for the details of the return object.
#'
#' - `matchedRtimes()`: Access the list of `data.frame` saved in the
#' `LamaParama` object, generated by the `matchLamasChromPeaks()` function.
#'
#'
#' @param BPPARAM For `matchLamasChromPeaks()`: parallel processing setup.
#' Defaults to `BPPARAM = bpparam()`. See [bpparam()] for more information.
#'
#' @param object An object of class `XcmsExperiment` with defined ChromPeaks.
#'
#' @param param An object of class `LamaParama` that will later be used for
#' adjustment using the `[adjustRtime()]` function.
#'
#' @param LamaParama same object that will be passed to the `adjustRtime()`
#' function. To run this function the `matchLamasChromPeaks()` need to be run
#' on this first.
#'
#' @return
#' For `matchLamasChromPeaks()`: A `LamaParama` object with new slot `rtMap`
#' composed of a list of matrices representing the 1:1 matches between Lamas
#' (ref) and ChromPeaks (obs). To access this, `matchedRtimes()` can be used.
#'
#' For `matchedRtimes()`: A list of `data.frame` representing matches
#' between chromPeaks and `lamas` for each files.
#'
#' For `summarizeLamaMatch()`:A `data.frame` with:
#'
#' - "Total_peaks": total number of chromatographic peaks in the file.
#'
#' - "Matched_peak": The number of matched peaks to Lamas.
#'
#' - "Total_Lamas": Total number of Lamas.
#'
#' - "Model_summary": `summary.loess` or `summary.gam` object for each file.
#'
#' @examples
#' ## load test and reference datasets
#' ref <- loadXcmsData("xmse")
#' tst <- loadXcmsData("faahko_sub2")
#'
#' ## create lamas input from the reference dataset
#' f <- sampleData(ref)$sample_type
#' f[f == "QC"] <- NA
#' ref <- filterFeatures(ref, PercentMissingFilter(threshold = 0, f = f))
#' ref_mz_rt <- featureDefinitions(ref)[, c("mzmed","rtmed")]
#'
#' ## Set up the LamaParama object
#' param <- LamaParama(lamas = ref_mz_rt, method = "loess", span = 0.5,
#' outlierTolerance = 3, zeroWeight = 10, ppm = 20,
#' tolerance = 0, toleranceRt = 20, bs = "tp")
#'
#' ## input into `adjustRtime()`
#' tst_adjusted <- adjustRtime(tst, param = param)
#'
#' ## run diagnostic functions to pre-evaluate alignment
#' param <- matchLamasChromPeaks(tst, param = param)
#' mtch <- matchedRtimes(param)
#'
#' ## Access summary of matches and model information
#' summary <- summarizeLamaMatch(param)
#'
#' ##coverage for each file
#' summary$Matched_peaks / summary$Total_peaks * 100
#'
#' ## Access the information on the model of for the first file
#' summary$model_summary[[1]]
#'
#' @note
#' If there are no matches when using `matchLamasChromPeaks()`, the file
#' retention will not be adjusted when calling [adjustRtime()] with the same
#' `LamaParama` and `XcmsExperiment` object.
#'
#' To see examples on how to utilize this methods and its functionality,
#' see the vignette.
#'
#' @author Carl Brunius, Philippine Louail
#'
#' @name LamaParama
NULL

#' @description
#'
#' Match anchor (reference) peaks to chrompeaks based on rt and m/z. Peaks with
Expand Down Expand Up @@ -635,7 +762,7 @@ adjustRtimeSubset <- function(rtraw, rtadj, subset,
#' *observed* retention times of matching peaks in the same sample from
#' which the retention times in `rt_raw` are.
#'
#' @author Carl Brunius
#' @author Carl Brunius, Philippine Louail
#'
#' @noRd
.adjust_rt_model <- function(rt_raw,
Expand Down Expand Up @@ -684,7 +811,7 @@ adjustRtimeSubset <- function(rtraw, rtadj, subset,
rt_map, span = 0.5,
resid_ratio = 3,
zero_weight = 10,
bs = "tp", warnings = FALSE){
bs = "tp"){
rt_map <- rt_map[order(rt_map$obs), ]
# add first row of c(0,0) to set a fix timepoint.
rt_map <- rbind(c(0,0), rt_map)
Expand All @@ -695,35 +822,25 @@ adjustRtimeSubset <- function(rtraw, rtadj, subset,
.check_gam_library()
model <- mgcv::gam(ref ~ s(obs, bs = bs), weights = weights,
data = rt_map)
} else {
cw <- tryCatch(
} else
model <- loess(ref ~ obs, data = rt_map, span = span,
weights = weights) , warning=function(w) w)
if(!inherits(cw, "loess")){
warning("Warnings arose when fitting the loess model, this file ",
"might have too few datapoints we advise to relax the ",
"matching parameters, to see the warnings run ",
"'warnings = TRUE'")
if (warnings == TRUE)
message("The Loess Warnings are: ", cw)
}
}
weights = weights)
## compute outliers
SSq <- resid(model)^2
meanSSq <- mean(SSq)
not_outlier <- (SSq / meanSSq) < resid_ratio

## re-run only if there is outliers and keep the zero.
if (sum(!not_outlier)){
if (any(!not_outlier)){
not_outlier[1] <- TRUE
rt_map <- rt_map[not_outlier, , drop = FALSE]
weights <- weights[not_outlier]
if (method == "gam") {
model <- mgcv::gam(ref ~ s(obs, bs = "tp"), weights = weights,
data = rt_map)
} else {
suppressWarnings(model <- loess(ref ~ obs, data = rt_map, span = span,
weights = weights)) # should also check warnings here ?
model <- loess(ref ~ obs, data = rt_map, span = span,
weights = weights)
}
}
model
Expand All @@ -738,34 +855,14 @@ adjustRtimeSubset <- function(rtraw, rtadj, subset,
"install with 'BiocInstaller::install(\"mgcv\")'")
}

#' @title Match reference Lamas to ChromPeaks for evaluation prior to alignment
#'
#' @description
#' The `matchLamasChromPeaks()` function quickly matches each file's ChromPeaks
#' to Lamas, allowing the user to evaluate the matches for each file.
#'
#' @param object An object of class `XcmsExperiment` with defined ChromPeaks.
#'
#' @param param An object of class `LamaParama` that will later be used for
#' adjustment using the `[adjustRtime()]` function.
#'
#' @return A `LamaParama` object with new slot rtMap composed of a list of
#' matrices representing the 1:1 matches between Lamas (ref) and ChromPeaks
#' (obs).
#'
#' @note If there are no matches, the file retention will not be adjusted when
#' calling [adjustRtime()] with the same `LamaParama` and `XcmsExperiment`
#' object.
#'
#' @author Philippine Louail, Carl Brunius
#'
#' @rdname adjustRtime
#' @export
#' @rdname LamaParama
matchLamasChromPeaks <- function(object, param, BPPARAM = bpparam()){
if (!hasChromPeaks(object))
stop("'object' needs to have detected ChromPeaks. ",
"Run 'findChromPeaks()' first.")
cp_raw <- split.data.frame(chromPeaks(object)[, c("mz", "rt")],
chromPeaks(object)[, "sample"])
f <- as.factor(chromPeaks(object)[, "sample"], levels = seq_along(object))
cp_raw <- split.data.frame(chromPeaks(object)[, c("mz", "rt")], f)
param@nChromPeaks <- vapply(cp_raw, nrow, numeric(1))
param@rtMap <- bplapply(cp_raw, FUN = function(x) {
.match_reference_anchors(obs_peaks = x, ref_anchors = param@lamas,
Expand All @@ -775,28 +872,8 @@ matchLamasChromPeaks <- function(object, param, BPPARAM = bpparam()){
param
}

#' @title Summary of LamaParama retention time alignment
#'
#' @description
#' The `summarizeLamaMatch()` generates a summary of the LamaParama method.
#' Composed of coverage % of the chrompeaks match over the total chrompeaks of
#' the object. as well as a summary of the model that will be applied to the
#' file to adjust the retention times
#'
#' @param `LamaParama` same object that will be passed to the `adjustRtime()`
#' function. To run this function the `matchLamasChromPeaks()` need to be run
#' on this first.
#'
#' @return A `data.frame` with:
#'
#' - Total_peaks: total number of chromatographic peaks in the file
#' - Matched_peak: The number of matched peaks to Lamas
#' - Total_Lamas: Total number of Lamas
#' - Model_summary: `summary.loess` or `summary.gam` object for each file.
#'
#' @author Philippine Louail, Carl Brunius
#'
#' @rdname adjustRtime
#' @export
#' @rdname LamaParama
summarizeLamaMatch <- function(param){
if (!inherits(param, "LamaParama"))
stop("The input needs to be of class 'LamaParama'")
Expand Down Expand Up @@ -843,9 +920,9 @@ summarizeLamaMatch <- function(param){
# had to change because which.max gives 1 if the vector is all FALSE..
idx <- which.max(diff(vec_temp) < 0)
# Find next biggest value
next_idx <- suppressWarnings(min(which(vec_temp > vec_temp[idx])))
next_idx <- which(vec_temp > vec_temp[idx])[1L]

if(next_idx == Inf){ # no very happy about that..
if (is.na(next_idx)){
l <- idx:length(vec_temp)
vec_temp[l] <- seq(vec_temp[idx], by = 0.000001,
length.out = length(l))
Expand All @@ -862,3 +939,12 @@ summarizeLamaMatch <- function(param){
x[nna_idx] <- vec_temp
x
}

#' @export
#' @rdname LamaParama
matchedRtimes <- function(param){
if(!inherits(param, "LamaParama"))
stop("The inputs need to be of class 'LamaParama'")
rtMap <- param@rtMap
rtMap
}
11 changes: 0 additions & 11 deletions R/functions-Params.R
Original file line number Diff line number Diff line change
Expand Up @@ -309,17 +309,6 @@ LamaParama <- function(lamas = matrix(ncol = 2, nrow = 0,
nChromPeaks = nChromPeaks)
}

#' Function to access rtMap from `LamaParama` object
#' @export
#'
#' @rdname adjustRtime
rtMap <- function(param){
if(!inherits(param, "LamaParama"))
stop("The inputs need to be of class LamaParama")
rtMap <- param@rtMap
rtMap
}

#' @rdname adjustRtime
ObiwarpParam <- function(binSize = 1, centerSample = integer(), response = 1L,
distFun = "cor_opt", gapInit = numeric(),
Expand Down
Loading

0 comments on commit 95bd882

Please sign in to comment.