Merge pull request #82 from ipb-halle/feature/qfeature

Feature/qfeature
ipb-halle · Sep 4, 2024 · 469ee81 · 469ee81
2 parents 80aa01a + eb5311a
commit 469ee81
Show file tree

Hide file tree

Showing 15 changed files with 499 additions and 24 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -0,0 +1,2 @@
+^.*\.Rproj$
+^\.Rproj\.user$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,10 +2,10 @@ Package: MetFamily
 Type: Package
 Title: MetFamily: Discovering Regulated Metabolite Families in Untargeted Metabolomics Studies
 Version: 0.99.4
-Date: 2024-08-30
+Date: 2024-08-22
 Author: c( person("Hendrik", "Treutler, role = c("aut"), email = "[email protected]"),
-           person("Khabat", "Vahabi, role = c("aut"), email = "[email protected]"),
-           person("Norman", "Storz, role = c("aut"), email = "[email protected]"),
+           person("Khabat", "Vahabi", role = c("aut"), email = "[email protected]"),
+           person("Norman", "Storz", role = c("aut"), email = "[email protected]"),
            person(given = "Steffen", family = "Neumann", email = "[email protected]",
            role = c("aut", "cre"), comment = c(ORCID = "0000-0002-7899-7192")) )
 Depends:
@@ -38,6 +38,7 @@ Imports:
     graphics,
     grDevices,
     methods,
+    QFeatures,
     stats,
     utils
 Remotes: 
@@ -66,5 +67,8 @@ Collate:
     'TreeAlgorithms.R'
     'Plots.R'
     'R_packages.R'
+    parsePeakAbundanceMatrixQF.R
+    readMSDial.R
+    readMetaboScape.R
 RoxygenNote: 7.2.3
 Encoding: UTF-8
diff --git a/IntMedDf.Rdata b/IntMedDf.Rdata
diff --git a/MetFamily.Rproj b/MetFamily.Rproj
@@ -0,0 +1,17 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,4 +1,26 @@
-# Generated by roxyXXXgen2: do not edit by hand
+# Generated by roxygen2: do not edit by hand
+
+export(calcPlotDendrogram_plotly)
+export(calcPlotHeatmapLegend)
+export(castListEntries)
+export(data.numericmatrix)
+export(metaboliteFamilyVersusClass)
+export(mzClustGeneric)
+export(parsePeakAbundanceMatrixQF)
+export(processMS1data)
+export(readClusterDataFromProjectFile)
+export(readMSDial)
+export(readMetaboscape)
+export(readProjectData)
+export(runMetFamily)
+importFrom(QFeatures,QFeatures)
+importFrom(S4Vectors,DataFrame)
+importFrom(SummarizedExperiment,SummarizedExperiment)
+importFrom(grDevices,colorRampPalette)
+importFrom(grDevices,rainbow)
+importFrom(grDevices,rgb)
+importFrom(openxlsx2,read_xlsx)
+
 
 ## Hardcoded:
 exportPattern("^[^\\.]")
@@ -14,15 +36,6 @@ importFrom("stats", "as.dendrogram", "cor", "dendrapply", "dist",
 importFrom("utils", "flush.console", "read.table")
 
 
-## From roxygen:
-export(calcPlotDendrogram_plotly)
-export(calcPlotHeatmapLegend)
-export(castListEntries)
-export(data.numericmatrix)
-export(metaboliteFamilyVersusClass)
-export(mzClustGeneric)
-export(processMS1data)
-export(readClusterDataFromProjectFile)
-export(readProjectData)
-export(runMetFamily)
-importFrom(grDevices,colorRampPalette)
+importFrom(SummarizedExperiment,colData)
+importFrom(SummarizedExperiment,rowData)
+importFrom(SummarizedExperiment,assay)
diff --git a/R/DataProcessing.R b/R/DataProcessing.R
@@ -202,8 +202,9 @@ readProjectData <- function(fileLines, progress = FALSE)
   #Start of importing  annotation part1 from two
   # Display the message and give the user the option to choose whether to upload the annotation file or not. 
   #If Y shows selection window for annotation file. if N ignores annotation process
-  message("Do you want to upload the annotation file? (Y/N)")
-  user_choice <- readline()
+  #message("Do you want to upload the annotation file? (Y/N)")
+  #user_choice <- readline()
+  user_choice <- "N"
 
   if (toupper(user_choice) == "Y") {
 

diff --git a/R/FragmentMatrixFunctions.R b/R/FragmentMatrixFunctions.R
@@ -1396,7 +1396,7 @@ mzClustGeneric <- function(p,
   return(list(mat=groupmat,idx=groupindex))
 }
 
-convertToProjectFile <- function(filePeakMatrix, 
+convertToProjectFile <- function(filePeakMatrixPath, 
                                  fileSpectra, 
                                  parameterSet, 
                                  progress = FALSE){
@@ -1434,8 +1434,11 @@ convertToProjectFile <- function(filePeakMatrix,
 
   rm(returnObj)
 
+
+  filePeakMatrixQF <- readMSDial(filePeakMatrixPath)
+
   returnObj <- convertToProjectFile2(
-    filePeakMatrix = filePeakMatrix, 
+    filePeakMatrixQF = filePeakMatrixQF, 
     spectraList = spectraList, precursorMz = precursorMz, precursorRt = precursorRt, 
     metaboliteFamilies = rep(x = "", times = numberOfSpectra), uniqueMetaboliteFamilies = NULL, metaboliteFamilyColors = NULL, 
     parameterSet = parameterSet, 
@@ -1454,7 +1457,7 @@ convertToProjectFile <- function(filePeakMatrix,
   return(returnObj)
 }
 
-convertToProjectFile2 <- function(filePeakMatrix, 
+convertToProjectFile2 <- function(filePeakMatrixQF, 
                                   spectraList, 
                                   precursorMz, 
                                   precursorRt, 
@@ -1471,9 +1474,9 @@ convertToProjectFile2 <- function(filePeakMatrix,
   ## metabolite profile
   if(!is.na(progress))  if(progress)  incProgress(amount = 0.1, detail = paste("Parsing MS1 file...", sep = "")) else print(paste("Parsing MS1 file...", sep = ""))
 
-  if(!is.null(filePeakMatrix)){
-    returnObj <- parsePeakAbundanceMatrix(
-      filePeakMatrix = filePeakMatrix, 
+  if(!is.null(filePeakMatrixQF)){
+    returnObj <- parsePeakAbundanceMatrixQF(
+      qfeatures = filePeakMatrixQF, 
       doPrecursorDeisotoping = parameterSet$doPrecursorDeisotoping, 
       mzDeviationInPPM_precursorDeisotoping = parameterSet$mzDeviationInPPM_precursorDeisotoping, 
       mzDeviationAbsolute_precursorDeisotoping = parameterSet$mzDeviationAbsolute_precursorDeisotoping, 
@@ -1582,6 +1585,10 @@ convertToProjectFile2 <- function(filePeakMatrix,
   metaboliteFamilies <- metaboliteFamilies[orderMS1features]
   furtherProperties <- lapply(X = furtherProperties, FUN = function(props){props[orderMS1features]})
 
+  #TODO: resolve ?
+  #temporary fix
+  #filePeakMatrix <- NULL
+
   if(!is.null(filePeakMatrix)){
     ## allHits: dataFrame$"Average Mz" --> precursorMz; allHits indexes the spectraList
     diffAll <- abs(outer(X = precursorMz, Y = dataFrame$"Average Mz", FUN = function(x, y){abs(x-y)}))

diff --git a/R/parsePeakAbundanceMatrixQF.R b/R/parsePeakAbundanceMatrixQF.R
@@ -0,0 +1,181 @@
+#' Title
+#'
+#' @param qfeatures Object of type QFeatures 
+#' @param doPrecursorDeisotoping 
+#' @param mzDeviationInPPM_precursorDeisotoping 
+#' @param mzDeviationAbsolute_precursorDeisotoping 
+#' @param maximumRtDifference 
+#' @param progress boolean whether or not to show a progress bar
+#'
+#' @return
+#' @export
+#'
+#' @examples
+
+parsePeakAbundanceMatrixQF <- function(qfeatures, 
+                                     doPrecursorDeisotoping, 
+                                     mzDeviationInPPM_precursorDeisotoping, 
+                                     mzDeviationAbsolute_precursorDeisotoping, 
+                                     maximumRtDifference, 
+                                     progress=FALSE)
+{
+  ## read file
+  if(!is.na(progress)) {
+    if(progress) {
+      incProgress(amount = 0.1, detail = paste("Parsing MS1 file content...", sep = ""))
+    } else {
+      print(paste("Parsing MS1 file content...", sep = ""))
+    } 
+  }
+
+
+
+
+  cols_to_exclude <- c("Reference RT","Reference m/z","Comment",
+                       "Manually modified for quantification",
+                       "Total score","RT similarity","Average","Stdev") 
+
+  cols_to_keep <- which(!colnames(rowData(qfeatures))[[1]] %in% cols_to_exclude)
+
+  dataFrame <- cbind(rowData(qfeatures)[[1]][,cols_to_keep] ,assay(qfeatures))
+  #workaround for avoiding change in colnames during coercion
+  cnames <- colnames(dataFrame)
+  dataFrame <- as.data.frame(dataFrame)
+  colnames(dataFrame) <- cnames
+  oldFormat <- ncol(colData(qfeatures))==3
+  numRowDataCols <- ncol(rowData(qfeatures)[[1]])
+  dataColumnStartEndIndeces <- c(numRowDataCols+1,ncol(dataFrame))
+  numberOfPrecursors <- nrow(dataFrame)
+  numberOfPrecursorsPrior <- numberOfPrecursors 
+
+  if(ncol(assay(qfeatures))>0){
+    numberOfDataColumns   <- ncol(assay(qfeatures))
+    sampleClass           <- colData(qfeatures)$Class
+    sampleType            <- colData(qfeatures)$Type
+    sampleInjectionOrder  <- colData(qfeatures)$"Injection order"
+    batchID               <- NULL
+    if(! is.null(colData(qfeatures)$BatchID))
+      batchID            <- colData(qfeatures)$BatchID
+
+    }   else {
+    dataColumnStartEndIndeces <- NULL
+    numberOfDataColumns <- 0
+    sampleClass          <- NULL
+    sampleType           <- NULL
+    sampleInjectionOrder <- NULL
+    batchID              <- NULL
+  }
+
+  commaNumbers <- sum(grepl(x = dataFrame$"Average Mz", pattern = "^(\\d+,\\d+$)|(^\\d+$)"))
+  decimalSeparatorIsComma <- commaNumbers == nrow(dataFrame)
+  if(decimalSeparatorIsComma){
+    if(!is.null(dataFrame$"Average Rt(min)"))     dataFrame$"Average Rt(min)"     <- gsub(x = gsub(x = dataFrame$"Average Rt(min)", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".")
+    if(!is.null(dataFrame$"Average Mz"))          dataFrame$"Average Mz"          <- gsub(x = gsub(x = dataFrame$"Average Mz", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".")
+    if(!is.null(dataFrame$"Fill %"))              dataFrame$"Fill %"              <- gsub(x = gsub(x = dataFrame$"Fill %", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".")
+    if(!is.null(dataFrame$"Dot product"))         dataFrame$"Dot product"         <- gsub(x = gsub(x = dataFrame$"Dot product", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".")
+    if(!is.null(dataFrame$"Reverse dot product")) dataFrame$"Reverse dot product" <- gsub(x = gsub(x = dataFrame$"Reverse dot product", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".")
+    if(!is.null(dataFrame$"Fragment presence %")) dataFrame$"Fragment presence %" <- gsub(x = gsub(x = dataFrame$"Fragment presence %", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".")
+
+    ## replace -1 by 0
+    if(numberOfDataColumns > 0) {
+      for(colIdx in (numRowDataCols+1):ncol(dataFrame)){
+        dataFrame[ , colIdx] <- gsub(x = gsub(x = dataFrame[ , colIdx], pattern = "\\.", replacement = ""), pattern = ",", replacement = ".")
+      }
+    }
+  }
+
+  ###################
+  ## column formats
+  if(!is.null(dataFrame$"Average Rt(min)"))     dataFrame$"Average Rt(min)"     <- as.numeric(dataFrame$"Average Rt(min)")
+  if(!is.null(dataFrame$"Average Mz"))          dataFrame$"Average Mz"          <- as.numeric(dataFrame$"Average Mz")
+  if(!is.null(dataFrame$"Fill %"))              dataFrame$"Fill %"              <- as.numeric(dataFrame$"Fill %")
+  if(!is.null(dataFrame$"MS/MS included"))      dataFrame$"MS/MS included"      <- as.logical(dataFrame$"MS/MS included")
+  if(!is.null(dataFrame$"Dot product"))         dataFrame$"Dot product"         <- as.numeric(dataFrame$"Dot product")
+  if(!is.null(dataFrame$"Reverse dot product")) dataFrame$"Reverse dot product" <- as.numeric(dataFrame$"Reverse dot product")
+  if(!is.null(dataFrame$"Fragment presence %")) dataFrame$"Fragment presence %" <- as.numeric(dataFrame$"Fragment presence %")
+
+  #####################
+  ## sorted by m/z (needed for deisotoping)
+  if(!is.null(dataFrame$"Average Mz"))
+    dataFrame <- dataFrame[order(dataFrame$"Average Mz"), ]
+
+  ## replace -1 by 0
+  if(numberOfDataColumns > 0){
+    for(colIdx in (numRowDataCols+1):ncol(dataFrame)){
+      dataFrame[ , colIdx] <- as.numeric(dataFrame[ , colIdx])
+      if(!is.na(sum(dataFrame[,colIdx] == -1)))
+        dataFrame[(dataFrame[,colIdx] == -1),colIdx] <- 0
+    }
+  }
+  vals <- NULL
+  ## deisotoping
+  numberOfRemovedIsotopePeaks <- 0
+  if(doPrecursorDeisotoping & !is.null(dataFrame$"Average Mz")){
+    if(!is.na(progress))  if(progress)  incProgress(amount = 0, detail = paste("Precursor deisotoping...", sep = "")) else print(paste("Precursor deisotoping...", sep = ""))
+    distance13Cminus12C <- 1.0033548378
+
+    ## mark isotope precursors
+    precursorsToRemove <- vector(mode = "logical", length = numberOfPrecursors)
+
+    if(numberOfDataColumns > 0){
+      intensities <- dataFrame[ , (numRowDataCols+1):ncol(dataFrame)]
+      medians <- apply(X = as.matrix(intensities), MARGIN = 1, FUN = median)
+    }
+
+    for(precursorIdx in seq_len(numberOfPrecursors)){
+      if((precursorIdx %% (as.integer(numberOfPrecursors/10))) == 0)
+        if(!is.na(progress))  if(progress)  incProgress(amount = 0.0, detail = paste("Precursor deisotoping ", precursorIdx, " / ", numberOfPrecursors, sep = "")) else print(paste("Precursor deisotoping ", precursorIdx, " / ", numberOfPrecursors, sep = ""))
+
+      mzError <- dataFrame$"Average Mz"[[precursorIdx]] * mzDeviationInPPM_precursorDeisotoping / 1000000
+      mzError <- max(mzError, mzDeviationAbsolute_precursorDeisotoping)
+
+      ## RT difference <= maximumRtDifference
+      validPrecursorsInRt <- abs(dataFrame$"Average Rt(min)"[[precursorIdx]] - dataFrame$"Average Rt(min)"[-precursorIdx]) <= maximumRtDifference
+
+      ## MZ difference around 1.0033548378 (first isotope) or 1.0033548378 * 2 (second isotope)
+      validPrecursorsInMz1 <- abs((dataFrame$"Average Mz"[[precursorIdx]] - distance13Cminus12C * 1) - dataFrame$"Average Mz"[-precursorIdx]) <= mzError
+      validPrecursorsInMz2 <- abs((dataFrame$"Average Mz"[[precursorIdx]] - distance13Cminus12C * 2) - dataFrame$"Average Mz"[-precursorIdx]) <= mzError
+      validPrecursorsInMz <- validPrecursorsInMz1 | validPrecursorsInMz2
+
+      ## intensity gets smaller in the isotope spectrum
+      if(numberOfDataColumns > 0){
+        validPrecursorsInIntensity <- (medians[-precursorIdx] - medians[[precursorIdx]]) > 0
+      } else {
+        validPrecursorsInIntensity <- TRUE
+      }
+
+      if(any(validPrecursorsInRt & validPrecursorsInMz & validPrecursorsInIntensity))
+        precursorsToRemove[[precursorIdx]] <- TRUE
+
+    }
+
+    ## remove isotopes
+    dataFrame <- dataFrame[!precursorsToRemove, ]
+
+    numberOfRemovedIsotopePeaks <- sum(precursorsToRemove)
+    numberOfPrecursors <- nrow(dataFrame)
+  }
+
+  if(!is.na(progress))  if(progress)  incProgress(amount = 0, detail = paste("Boxing...", sep = "")) else print(paste("Boxing...", sep = ""))
+  returnObj <- list()
+  returnObj$dataFrame <- dataFrame
+  returnObj$vals <- vals
+
+  ## meta
+  returnObj$oldFormat <- oldFormat
+  returnObj$numberOfPrecursors <- numberOfPrecursors
+  returnObj$dataColumnStartEndIndeces <- dataColumnStartEndIndeces
+  returnObj$numberOfDataColumns <- numberOfDataColumns
+
+  ## group anno
+  returnObj$sampleClass          <- sampleClass
+  returnObj$sampleType           <- sampleType
+  returnObj$sampleInjectionOrder <- sampleInjectionOrder
+  returnObj$batchID              <- batchID
+
+  ## misc
+  returnObj$numberOfPrecursorsPrior <- numberOfPrecursorsPrior
+  returnObj$numberOfRemovedIsotopePeaks <- numberOfRemovedIsotopePeaks
+
+  return (returnObj)
+}