Skip to content

Commit

Permalink
Merge pull request #82 from ipb-halle/feature/qfeature
Browse files Browse the repository at this point in the history
Feature/qfeature
  • Loading branch information
sneumann authored Sep 4, 2024
2 parents 80aa01a + eb5311a commit 469ee81
Show file tree
Hide file tree
Showing 15 changed files with 499 additions and 24 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
^.*\.Rproj$
^\.Rproj\.user$
10 changes: 7 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ Package: MetFamily
Type: Package
Title: MetFamily: Discovering Regulated Metabolite Families in Untargeted Metabolomics Studies
Version: 0.99.4
Date: 2024-08-30
Date: 2024-08-22
Author: c( person("Hendrik", "Treutler, role = c("aut"), email = "[email protected]"),
person("Khabat", "Vahabi, role = c("aut"), email = "[email protected]"),
person("Norman", "Storz, role = c("aut"), email = "[email protected]"),
person("Khabat", "Vahabi", role = c("aut"), email = "[email protected]"),
person("Norman", "Storz", role = c("aut"), email = "[email protected]"),
person(given = "Steffen", family = "Neumann", email = "[email protected]",
role = c("aut", "cre"), comment = c(ORCID = "0000-0002-7899-7192")) )
Depends:
Expand Down Expand Up @@ -38,6 +38,7 @@ Imports:
graphics,
grDevices,
methods,
QFeatures,
stats,
utils
Remotes:
Expand Down Expand Up @@ -66,5 +67,8 @@ Collate:
'TreeAlgorithms.R'
'Plots.R'
'R_packages.R'
parsePeakAbundanceMatrixQF.R
readMSDial.R
readMetaboScape.R
RoxygenNote: 7.2.3
Encoding: UTF-8
Binary file added IntMedDf.Rdata
Binary file not shown.
17 changes: 17 additions & 0 deletions MetFamily.Rproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
Version: 1.0

RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX

BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
39 changes: 26 additions & 13 deletions NAMESPACE
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,4 +1,26 @@
# Generated by roxyXXXgen2: do not edit by hand
# Generated by roxygen2: do not edit by hand

export(calcPlotDendrogram_plotly)
export(calcPlotHeatmapLegend)
export(castListEntries)
export(data.numericmatrix)
export(metaboliteFamilyVersusClass)
export(mzClustGeneric)
export(parsePeakAbundanceMatrixQF)
export(processMS1data)
export(readClusterDataFromProjectFile)
export(readMSDial)
export(readMetaboscape)
export(readProjectData)
export(runMetFamily)
importFrom(QFeatures,QFeatures)
importFrom(S4Vectors,DataFrame)
importFrom(SummarizedExperiment,SummarizedExperiment)
importFrom(grDevices,colorRampPalette)
importFrom(grDevices,rainbow)
importFrom(grDevices,rgb)
importFrom(openxlsx2,read_xlsx)


## Hardcoded:
exportPattern("^[^\\.]")
Expand All @@ -14,15 +36,6 @@ importFrom("stats", "as.dendrogram", "cor", "dendrapply", "dist",
importFrom("utils", "flush.console", "read.table")


## From roxygen:
export(calcPlotDendrogram_plotly)
export(calcPlotHeatmapLegend)
export(castListEntries)
export(data.numericmatrix)
export(metaboliteFamilyVersusClass)
export(mzClustGeneric)
export(processMS1data)
export(readClusterDataFromProjectFile)
export(readProjectData)
export(runMetFamily)
importFrom(grDevices,colorRampPalette)
importFrom(SummarizedExperiment,colData)
importFrom(SummarizedExperiment,rowData)
importFrom(SummarizedExperiment,assay)
5 changes: 3 additions & 2 deletions R/DataProcessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,9 @@ readProjectData <- function(fileLines, progress = FALSE)
#Start of importing annotation part1 from two
# Display the message and give the user the option to choose whether to upload the annotation file or not.
#If Y shows selection window for annotation file. if N ignores annotation process
message("Do you want to upload the annotation file? (Y/N)")
user_choice <- readline()
#message("Do you want to upload the annotation file? (Y/N)")
#user_choice <- readline()
user_choice <- "N"

if (toupper(user_choice) == "Y") {

Expand Down
19 changes: 13 additions & 6 deletions R/FragmentMatrixFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -1396,7 +1396,7 @@ mzClustGeneric <- function(p,
return(list(mat=groupmat,idx=groupindex))
}

convertToProjectFile <- function(filePeakMatrix,
convertToProjectFile <- function(filePeakMatrixPath,
fileSpectra,
parameterSet,
progress = FALSE){
Expand Down Expand Up @@ -1434,8 +1434,11 @@ convertToProjectFile <- function(filePeakMatrix,

rm(returnObj)


filePeakMatrixQF <- readMSDial(filePeakMatrixPath)

returnObj <- convertToProjectFile2(
filePeakMatrix = filePeakMatrix,
filePeakMatrixQF = filePeakMatrixQF,
spectraList = spectraList, precursorMz = precursorMz, precursorRt = precursorRt,
metaboliteFamilies = rep(x = "", times = numberOfSpectra), uniqueMetaboliteFamilies = NULL, metaboliteFamilyColors = NULL,
parameterSet = parameterSet,
Expand All @@ -1454,7 +1457,7 @@ convertToProjectFile <- function(filePeakMatrix,
return(returnObj)
}

convertToProjectFile2 <- function(filePeakMatrix,
convertToProjectFile2 <- function(filePeakMatrixQF,
spectraList,
precursorMz,
precursorRt,
Expand All @@ -1471,9 +1474,9 @@ convertToProjectFile2 <- function(filePeakMatrix,
## metabolite profile
if(!is.na(progress)) if(progress) incProgress(amount = 0.1, detail = paste("Parsing MS1 file...", sep = "")) else print(paste("Parsing MS1 file...", sep = ""))

if(!is.null(filePeakMatrix)){
returnObj <- parsePeakAbundanceMatrix(
filePeakMatrix = filePeakMatrix,
if(!is.null(filePeakMatrixQF)){
returnObj <- parsePeakAbundanceMatrixQF(
qfeatures = filePeakMatrixQF,
doPrecursorDeisotoping = parameterSet$doPrecursorDeisotoping,
mzDeviationInPPM_precursorDeisotoping = parameterSet$mzDeviationInPPM_precursorDeisotoping,
mzDeviationAbsolute_precursorDeisotoping = parameterSet$mzDeviationAbsolute_precursorDeisotoping,
Expand Down Expand Up @@ -1582,6 +1585,10 @@ convertToProjectFile2 <- function(filePeakMatrix,
metaboliteFamilies <- metaboliteFamilies[orderMS1features]
furtherProperties <- lapply(X = furtherProperties, FUN = function(props){props[orderMS1features]})

#TODO: resolve ?
#temporary fix
#filePeakMatrix <- NULL

if(!is.null(filePeakMatrix)){
## allHits: dataFrame$"Average Mz" --> precursorMz; allHits indexes the spectraList
diffAll <- abs(outer(X = precursorMz, Y = dataFrame$"Average Mz", FUN = function(x, y){abs(x-y)}))
Expand Down
181 changes: 181 additions & 0 deletions R/parsePeakAbundanceMatrixQF.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
#' Title
#'
#' @param qfeatures Object of type QFeatures
#' @param doPrecursorDeisotoping
#' @param mzDeviationInPPM_precursorDeisotoping
#' @param mzDeviationAbsolute_precursorDeisotoping
#' @param maximumRtDifference
#' @param progress boolean whether or not to show a progress bar
#'
#' @return
#' @export
#'
#' @examples

parsePeakAbundanceMatrixQF <- function(qfeatures,
doPrecursorDeisotoping,
mzDeviationInPPM_precursorDeisotoping,
mzDeviationAbsolute_precursorDeisotoping,
maximumRtDifference,
progress=FALSE)
{
## read file
if(!is.na(progress)) {
if(progress) {
incProgress(amount = 0.1, detail = paste("Parsing MS1 file content...", sep = ""))
} else {
print(paste("Parsing MS1 file content...", sep = ""))
}
}




cols_to_exclude <- c("Reference RT","Reference m/z","Comment",
"Manually modified for quantification",
"Total score","RT similarity","Average","Stdev")

cols_to_keep <- which(!colnames(rowData(qfeatures))[[1]] %in% cols_to_exclude)

dataFrame <- cbind(rowData(qfeatures)[[1]][,cols_to_keep] ,assay(qfeatures))
#workaround for avoiding change in colnames during coercion
cnames <- colnames(dataFrame)
dataFrame <- as.data.frame(dataFrame)
colnames(dataFrame) <- cnames
oldFormat <- ncol(colData(qfeatures))==3
numRowDataCols <- ncol(rowData(qfeatures)[[1]])
dataColumnStartEndIndeces <- c(numRowDataCols+1,ncol(dataFrame))
numberOfPrecursors <- nrow(dataFrame)
numberOfPrecursorsPrior <- numberOfPrecursors

if(ncol(assay(qfeatures))>0){
numberOfDataColumns <- ncol(assay(qfeatures))
sampleClass <- colData(qfeatures)$Class
sampleType <- colData(qfeatures)$Type
sampleInjectionOrder <- colData(qfeatures)$"Injection order"
batchID <- NULL
if(! is.null(colData(qfeatures)$BatchID))
batchID <- colData(qfeatures)$BatchID

} else {
dataColumnStartEndIndeces <- NULL
numberOfDataColumns <- 0
sampleClass <- NULL
sampleType <- NULL
sampleInjectionOrder <- NULL
batchID <- NULL
}

commaNumbers <- sum(grepl(x = dataFrame$"Average Mz", pattern = "^(\\d+,\\d+$)|(^\\d+$)"))
decimalSeparatorIsComma <- commaNumbers == nrow(dataFrame)
if(decimalSeparatorIsComma){
if(!is.null(dataFrame$"Average Rt(min)")) dataFrame$"Average Rt(min)" <- gsub(x = gsub(x = dataFrame$"Average Rt(min)", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".")
if(!is.null(dataFrame$"Average Mz")) dataFrame$"Average Mz" <- gsub(x = gsub(x = dataFrame$"Average Mz", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".")
if(!is.null(dataFrame$"Fill %")) dataFrame$"Fill %" <- gsub(x = gsub(x = dataFrame$"Fill %", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".")
if(!is.null(dataFrame$"Dot product")) dataFrame$"Dot product" <- gsub(x = gsub(x = dataFrame$"Dot product", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".")
if(!is.null(dataFrame$"Reverse dot product")) dataFrame$"Reverse dot product" <- gsub(x = gsub(x = dataFrame$"Reverse dot product", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".")
if(!is.null(dataFrame$"Fragment presence %")) dataFrame$"Fragment presence %" <- gsub(x = gsub(x = dataFrame$"Fragment presence %", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".")

## replace -1 by 0
if(numberOfDataColumns > 0) {
for(colIdx in (numRowDataCols+1):ncol(dataFrame)){
dataFrame[ , colIdx] <- gsub(x = gsub(x = dataFrame[ , colIdx], pattern = "\\.", replacement = ""), pattern = ",", replacement = ".")
}
}
}

###################
## column formats
if(!is.null(dataFrame$"Average Rt(min)")) dataFrame$"Average Rt(min)" <- as.numeric(dataFrame$"Average Rt(min)")
if(!is.null(dataFrame$"Average Mz")) dataFrame$"Average Mz" <- as.numeric(dataFrame$"Average Mz")
if(!is.null(dataFrame$"Fill %")) dataFrame$"Fill %" <- as.numeric(dataFrame$"Fill %")
if(!is.null(dataFrame$"MS/MS included")) dataFrame$"MS/MS included" <- as.logical(dataFrame$"MS/MS included")
if(!is.null(dataFrame$"Dot product")) dataFrame$"Dot product" <- as.numeric(dataFrame$"Dot product")
if(!is.null(dataFrame$"Reverse dot product")) dataFrame$"Reverse dot product" <- as.numeric(dataFrame$"Reverse dot product")
if(!is.null(dataFrame$"Fragment presence %")) dataFrame$"Fragment presence %" <- as.numeric(dataFrame$"Fragment presence %")

#####################
## sorted by m/z (needed for deisotoping)
if(!is.null(dataFrame$"Average Mz"))
dataFrame <- dataFrame[order(dataFrame$"Average Mz"), ]

## replace -1 by 0
if(numberOfDataColumns > 0){
for(colIdx in (numRowDataCols+1):ncol(dataFrame)){
dataFrame[ , colIdx] <- as.numeric(dataFrame[ , colIdx])
if(!is.na(sum(dataFrame[,colIdx] == -1)))
dataFrame[(dataFrame[,colIdx] == -1),colIdx] <- 0
}
}
vals <- NULL
## deisotoping
numberOfRemovedIsotopePeaks <- 0
if(doPrecursorDeisotoping & !is.null(dataFrame$"Average Mz")){
if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = paste("Precursor deisotoping...", sep = "")) else print(paste("Precursor deisotoping...", sep = ""))
distance13Cminus12C <- 1.0033548378

## mark isotope precursors
precursorsToRemove <- vector(mode = "logical", length = numberOfPrecursors)

if(numberOfDataColumns > 0){
intensities <- dataFrame[ , (numRowDataCols+1):ncol(dataFrame)]
medians <- apply(X = as.matrix(intensities), MARGIN = 1, FUN = median)
}

for(precursorIdx in seq_len(numberOfPrecursors)){
if((precursorIdx %% (as.integer(numberOfPrecursors/10))) == 0)
if(!is.na(progress)) if(progress) incProgress(amount = 0.0, detail = paste("Precursor deisotoping ", precursorIdx, " / ", numberOfPrecursors, sep = "")) else print(paste("Precursor deisotoping ", precursorIdx, " / ", numberOfPrecursors, sep = ""))

mzError <- dataFrame$"Average Mz"[[precursorIdx]] * mzDeviationInPPM_precursorDeisotoping / 1000000
mzError <- max(mzError, mzDeviationAbsolute_precursorDeisotoping)

## RT difference <= maximumRtDifference
validPrecursorsInRt <- abs(dataFrame$"Average Rt(min)"[[precursorIdx]] - dataFrame$"Average Rt(min)"[-precursorIdx]) <= maximumRtDifference

## MZ difference around 1.0033548378 (first isotope) or 1.0033548378 * 2 (second isotope)
validPrecursorsInMz1 <- abs((dataFrame$"Average Mz"[[precursorIdx]] - distance13Cminus12C * 1) - dataFrame$"Average Mz"[-precursorIdx]) <= mzError
validPrecursorsInMz2 <- abs((dataFrame$"Average Mz"[[precursorIdx]] - distance13Cminus12C * 2) - dataFrame$"Average Mz"[-precursorIdx]) <= mzError
validPrecursorsInMz <- validPrecursorsInMz1 | validPrecursorsInMz2

## intensity gets smaller in the isotope spectrum
if(numberOfDataColumns > 0){
validPrecursorsInIntensity <- (medians[-precursorIdx] - medians[[precursorIdx]]) > 0
} else {
validPrecursorsInIntensity <- TRUE
}

if(any(validPrecursorsInRt & validPrecursorsInMz & validPrecursorsInIntensity))
precursorsToRemove[[precursorIdx]] <- TRUE

}

## remove isotopes
dataFrame <- dataFrame[!precursorsToRemove, ]

numberOfRemovedIsotopePeaks <- sum(precursorsToRemove)
numberOfPrecursors <- nrow(dataFrame)
}

if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = paste("Boxing...", sep = "")) else print(paste("Boxing...", sep = ""))
returnObj <- list()
returnObj$dataFrame <- dataFrame
returnObj$vals <- vals

## meta
returnObj$oldFormat <- oldFormat
returnObj$numberOfPrecursors <- numberOfPrecursors
returnObj$dataColumnStartEndIndeces <- dataColumnStartEndIndeces
returnObj$numberOfDataColumns <- numberOfDataColumns

## group anno
returnObj$sampleClass <- sampleClass
returnObj$sampleType <- sampleType
returnObj$sampleInjectionOrder <- sampleInjectionOrder
returnObj$batchID <- batchID

## misc
returnObj$numberOfPrecursorsPrior <- numberOfPrecursorsPrior
returnObj$numberOfRemovedIsotopePeaks <- numberOfRemovedIsotopePeaks

return (returnObj)
}
Loading

0 comments on commit 469ee81

Please sign in to comment.