diff --git a/DESCRIPTION b/DESCRIPTION old mode 100755 new mode 100644 index 6a5edfa..26976cc --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: sigurd Type: Package Title: Single cell Genotyping Using RNA Data -Version: 0.3.6 +Version: 0.3.8 Authors@R: c( person(given = "Martin", family = "Grasshoff", diff --git a/NAMESPACE b/NAMESPACE old mode 100755 new mode 100644 diff --git a/R/AllelFrequencyFoldChange.R b/R/AllelFrequencyFoldChange.R old mode 100755 new mode 100644 diff --git a/R/AmpliconSupplementing.R b/R/AmpliconSupplementing.R old mode 100755 new mode 100644 diff --git a/R/CalculateAlleleFrequency.R b/R/CalculateAlleleFrequency.R old mode 100755 new mode 100644 diff --git a/R/CalculateAltReads.R b/R/CalculateAltReads.R old mode 100755 new mode 100644 diff --git a/R/CalculateConsensus.R b/R/CalculateConsensus.R old mode 100755 new mode 100644 diff --git a/R/CalculateCorrelationPValue.R b/R/CalculateCorrelationPValue.R old mode 100755 new mode 100644 index 0a0e947..6b0b79c --- a/R/CalculateCorrelationPValue.R +++ b/R/CalculateCorrelationPValue.R @@ -3,12 +3,13 @@ #'@description #'We perform the correlation of SNVs and calculate the P values. #'@importFrom stats cor.test -#'@param variant_values The fraction values you are analysing. A vector. +#'@param variant_values The fraction values you are analyzing. A vector. #'@param other_mutation All other variants you have. A vector of variant names. #'@param all_variants_list List of fraction values for all the variants you want to compare your variant with. #'@param min_intersecting_cells Minimum number of intersecting cells. Correlations with less than this will not be performed. +#'@param value_type Are we using consensus or other information? #'@export -CalculateCorrelationPValue <- function(variant_values, other_mutation, all_variants_list, min_intersecting_cells = 5){ +CalculateCorrelationPValue <- function(variant_values, other_mutation, all_variants_list, value_type = "consensus", min_intersecting_cells = 5){ other_variant_values <- all_variants_list[[other_mutation]] if(sum(names(variant_values) %in% names(other_variant_values)) == 0){ #print("No intersection") @@ -28,10 +29,17 @@ CalculateCorrelationPValue <- function(variant_values, other_mutation, all_varia return(result) } else if(length(variant_values) > 2){ result <- stats::cor.test(variant_values, other_variant_values) - cells_som_alt <- sum(variant_values == 1) - cells_som_ref <- sum(variant_values == 0) - cells_MT_alt <- sum(other_variant_values == 1) - cells_MT_ref <- sum(other_variant_values == 0) + if(value_type == "consensus"){ + cells_som_alt <- sum(variant_values == 1) + cells_som_ref <- sum(variant_values == 0) + cells_MT_alt <- sum(other_variant_values == 1) + cells_MT_ref <- sum(other_variant_values == 0) + } else{ + cells_som_alt <- sum(variant_values > 0) + cells_som_ref <- sum(variant_values == 0) + cells_MT_alt <- sum(other_variant_values > 0) + cells_MT_ref <- sum(other_variant_values == 0) + } result <- c(result$p.value, result$estimate, cells_som_alt, cells_som_ref, cells_MT_alt, cells_MT_ref) } else{ #print("We do not have more than 2 cells for the somatic variant.") diff --git a/R/CalculateCoverage.R b/R/CalculateCoverage.R old mode 100755 new mode 100644 diff --git a/R/CalculateFisherTestPValue.R b/R/CalculateFisherTestPValue.R old mode 100755 new mode 100644 diff --git a/R/CalculateQuality.R b/R/CalculateQuality.R old mode 100755 new mode 100644 diff --git a/R/CalculateRefReads.R b/R/CalculateRefReads.R old mode 100755 new mode 100644 diff --git a/R/CalculateStrandCorrelation.R b/R/CalculateStrandCorrelation.R old mode 100755 new mode 100644 diff --git a/R/CallSupport.R b/R/CallSupport.R old mode 100755 new mode 100644 diff --git a/R/ClonalDefinition.R b/R/ClonalDefinition.R old mode 100755 new mode 100644 diff --git a/R/ClonalDiversity.R b/R/ClonalDiversity.R old mode 100755 new mode 100644 diff --git a/R/CombineSEobjects.R b/R/CombineSEobjects.R old mode 100755 new mode 100644 diff --git a/R/Enrichment_Fisher.R b/R/Enrichment_Fisher.R old mode 100755 new mode 100644 diff --git a/R/Filtering.R b/R/Filtering.R old mode 100755 new mode 100644 diff --git a/R/GetCellInfoPerVariant.R b/R/GetCellInfoPerVariant.R old mode 100755 new mode 100644 diff --git a/R/GetVariantInfo.R b/R/GetVariantInfo.R old mode 100755 new mode 100644 diff --git a/R/HeatmapVOI.R b/R/HeatmapVOI.R old mode 100755 new mode 100644 diff --git a/R/LoadingMAEGATK_typewise.R b/R/LoadingMAEGATK_typewise.R old mode 100755 new mode 100644 diff --git a/R/LoadingRawMatrix_typewise.R b/R/LoadingRawMatrix_typewise.R old mode 100755 new mode 100644 diff --git a/R/LoadingVCF_typewise.R b/R/LoadingVCF_typewise.R old mode 100755 new mode 100644 diff --git a/R/LoadingVarTrix_typewise.R b/R/LoadingVarTrix_typewise.R old mode 100755 new mode 100644 diff --git a/R/Merging_SE_list.R b/R/Merging_SE_list.R old mode 100755 new mode 100644 diff --git a/R/RowWiseSplit.R b/R/RowWiseSplit.R old mode 100755 new mode 100644 diff --git a/R/SeparatingMatrixToList.R b/R/SeparatingMatrixToList.R old mode 100755 new mode 100644 diff --git a/R/SetVariantInfo.R b/R/SetVariantInfo.R old mode 100755 new mode 100644 diff --git a/R/UMIQC.R b/R/UMIQC.R old mode 100755 new mode 100644 diff --git a/R/UMI_seq.R b/R/UMI_seq.R old mode 100755 new mode 100644 diff --git a/R/VariantBurden.R b/R/VariantBurden.R old mode 100755 new mode 100644 diff --git a/R/VariantCloneSizeThresholding.R b/R/VariantCloneSizeThresholding.R old mode 100755 new mode 100644 diff --git a/R/VariantCorrelationHeatmap.R b/R/VariantCorrelationHeatmap.R old mode 100755 new mode 100644 diff --git a/R/VariantFisherTestHeatmap.R b/R/VariantFisherTestHeatmap.R old mode 100755 new mode 100644 diff --git a/R/VariantQuantileThresholding_Combined.R b/R/VariantQuantileThresholding_Combined.R old mode 100755 new mode 100644 diff --git a/R/VariantSelection_Group.R b/R/VariantSelection_Group.R old mode 100755 new mode 100644 diff --git a/R/VariantSelection_Quantile.R b/R/VariantSelection_Quantile.R old mode 100755 new mode 100644 index 680bc30..442c638 --- a/R/VariantSelection_Quantile.R +++ b/R/VariantSelection_Quantile.R @@ -22,14 +22,14 @@ VariantSelection_Quantile <- function(SE, min_coverage = 2, quantiles = c(0.1, 0 SummarizedExperiment::assays(SE)[["fraction"]][nocall_check] <- NA SummarizedExperiment::assays(SE)[["coverage"]][nocall_check] <- NA } - + if(verbose) print("Get the mean allele frequency and coverage.") mean_af <- Matrix::rowMeans(SummarizedExperiment::assays(SE)[["fraction"]], na.rm = TRUE) mean_cov <- Matrix::rowMeans(SummarizedExperiment::assays(SE)[["coverage"]], na.rm = TRUE) - + if(verbose) print("Get the quantiles of the VAFs of each variant.") quantiles <- lapply(quantiles, function(x) apply(SummarizedExperiment::assays(SE)[["fraction"]], 1, quantile, x, na.rm = TRUE)) - + # Apply min_quality filtering if(!is.null(min_quality)){ vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, VariantQuality = SummarizedExperiment::rowData(SE)$VariantQuality, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) @@ -37,9 +37,9 @@ VariantSelection_Quantile <- function(SE, min_coverage = 2, quantiles = c(0.1, 0 } else{ vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) } - + if(verbose) print("Thresholding using the quantile approach.") - vois <- subset(vars, vars$Mean_AF > mean_allele_frequency & vars$Mean_Cov > min_coverage & vars$Quantile1 < thresholds[1] & vars$Quantile2 > thresholds[2]) - + vois <- subset(vars, vars$Mean_AF > mean_allele_frequency & vars$Mean_Cov > min_coverage & vars$Quantile1 <= thresholds[1] & vars$Quantile2 >= thresholds[2]) + return(rownames(vois)) } diff --git a/R/VariantSelection_TopCells.R b/R/VariantSelection_TopCells.R old mode 100755 new mode 100644 diff --git a/R/VariantSelection_VMR.R b/R/VariantSelection_VMR.R index bc397d8..f97334c 100644 --- a/R/VariantSelection_VMR.R +++ b/R/VariantSelection_VMR.R @@ -18,7 +18,20 @@ VariantSelection_VMR <- function(SE, stabilize_variance = TRUE, low_coverage_thr fraction[is.na(fraction)] <- 0 mean_af <- Matrix::rowSums(SummarizedExperiment::assays(SE)[["alts"]]) / Matrix::rowSums(SummarizedExperiment::assays(SE)[["coverage"]]) mean_af[is.na(mean_af)] <- 0 - + + if(verbose) print("We calculate the concordance.") + reads_forward <- SummarizedExperiment::assays(SE)[["forward"]] + reads_reverse <- SummarizedExperiment::assays(SE)[["reverse"]] + dt <- merge(data.table::data.table(summary(reads_forward)), + data.table::data.table(summary(reads_reverse)), + by.x = c("i", "j"), by.y = c("i", "j"), + all = TRUE) + dt <- dt[dt[,x.x] > 0 | dt[,x.y] > 0,] + dt <- data.table::data.table(variant = rownames(SE)[dt[[1]]], + cell_id = dt[[2]], + fw = dt[[3]], rev = dt[[4]]) + concordance <- dt[, .(cor = suppressWarnings(stats::cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] + if(stabilize_variance){ if(verbose) print("We stabilize the variance.") coverage <- SummarizedExperiment::assays(SE)[["coverage"]] @@ -42,7 +55,7 @@ VariantSelection_VMR <- function(SE, stabilize_variance = TRUE, low_coverage_thr if(verbose) print(paste0("We check if a cells has more than ", minimum_fw_rev_reads, " reads.")) detected <- (SummarizedExperiment::assays(SE)[["forward"]] >= minimum_fw_rev_reads) + (assays(SE)[["reverse"]] >= minimum_fw_rev_reads) - + voi <- data.frame( variant = rownames(detected), vmr = variants_variance / (mean_af + 0.00000000001), @@ -50,11 +63,9 @@ VariantSelection_VMR <- function(SE, stabilize_variance = TRUE, low_coverage_thr mean = round(mean_af, 7), variance = round(variants_variance, 7), cells_detected = Matrix::rowSums(detected == 2), # We only select cells that have enough reads for both directions. - strand_correlation = SummarizedExperiment::rowData(SE)$Concordance, + strand_correlation = concordance$cor, #SummarizedExperiment::rowData(SE)$Concordance, mean_coverage = Matrix::rowMeans(SummarizedExperiment::assays(SE)[["coverage"]]), stringsAsFactors = FALSE, row.names = rownames(detected) ) return(voi) } - - diff --git a/R/VariantWiseCorrelation.R b/R/VariantWiseCorrelation.R old mode 100755 new mode 100644 index a9f6742..6c2ef40 --- a/R/VariantWiseCorrelation.R +++ b/R/VariantWiseCorrelation.R @@ -8,8 +8,9 @@ #'@param n_cores Number of cores you want to use. Numeric. #'@param p_value_adjustment Method for P value adjustment. See p.adjust for details. #'@param verbose Should the function be verbose? Default = TRUE +#'@param value_type Are we using consensus or other information? #'@export -VariantWiseCorrelation <- function(variants_list, n_cores = 1, p_value_adjustment = "fdr", verbose = TRUE){ +VariantWiseCorrelation <- function(variants_list, n_cores = 1, p_value_adjustment = "fdr", value_type = "consensus", verbose = TRUE){ # We correlate the somatic variants with each other and the MT variants. # Since we have tens of thousands of MT variants, we do not correlate them with each other. variants <- names(variants_list) @@ -22,7 +23,7 @@ VariantWiseCorrelation <- function(variants_list, n_cores = 1, p_value_adjustmen variants_values_use <- variants_list[[variant_use]] variants_list_use <- variants_list[names(variants_list) != variant_use] all_variants <- names(variants_list_use) - results <- parallel::mclapply(X = all_variants, CalculateCorrelationPValue, variant_values = variants_values_use, all_variants_list = variants_list_use, mc.cores = n_cores) + results <- parallel::mclapply(X = all_variants, CalculateCorrelationPValue, variant_values = variants_values_use, all_variants_list = variants_list_use, mc.cores = n_cores, value_type = value_type) results <- do.call("rbind", results) results <- data.frame(Variant1 = variant_use, Variant2 = all_variants, P = results[,1], Corr = results[,2], Cells_1_Alt = results[,3], Cells_1_Ref = results[,4], Cells_2_Alt = results[,5], Cells_2_Ref = results[,6]) @@ -35,6 +36,10 @@ VariantWiseCorrelation <- function(variants_list, n_cores = 1, p_value_adjustmen if(verbose) print("We remove the negative corrlated SNPs.") results_total <- subset(results_total, Corr > 0) + if(verbose) print("Remove duplicative results.") + results_check <- apply(results_total[, c("Variant1", "Variant2")], 1, function(x) paste(sort(x), collapse = "_")) + results_total <- results_total[!duplicated(results_check), , drop = FALSE] + if(verbose) print(paste0("Adjusting P values using ", p_value_adjustment, ".")) results_total$P_adj <- stats::p.adjust(results_total$P, method = p_value_adjustment) rownames(results_total) <- NULL diff --git a/R/VariantWiseFisherTest.R b/R/VariantWiseFisherTest.R old mode 100755 new mode 100644 diff --git a/R/char_to_numeric.R b/R/char_to_numeric.R old mode 100755 new mode 100644 diff --git a/R/combine_NAMES.R b/R/combine_NAMES.R old mode 100755 new mode 100644 diff --git a/R/combine_SparseMatrix.R b/R/combine_SparseMatrix.R old mode 100755 new mode 100644 diff --git a/R/computeAFMutMatrix.R b/R/computeAFMutMatrix.R old mode 100755 new mode 100644 diff --git a/R/getAltMatrix.R b/R/getAltMatrix.R old mode 100755 new mode 100644 diff --git a/R/getMutMatrix.R b/R/getMutMatrix.R old mode 100755 new mode 100644 diff --git a/R/getReadMatrix.R b/R/getReadMatrix.R old mode 100755 new mode 100644 diff --git a/R/getRefMatrix.R b/R/getRefMatrix.R old mode 100755 new mode 100644 diff --git a/R/get_consensus.R b/R/get_consensus.R old mode 100755 new mode 100644 diff --git a/R/load_object.R b/R/load_object.R old mode 100755 new mode 100644 diff --git a/R/save_object.R b/R/save_object.R old mode 100755 new mode 100644 diff --git a/R/sdiv.R b/R/sdiv.R old mode 100755 new mode 100644 diff --git a/README.md b/README.md old mode 100755 new mode 100644 index d0d36ae..5f647af --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ The mutation data was obtained from the Sanger Institute Catalogue Of Somatic Mu ``` -# Current Features v0.3.6 +# Current Features v0.3.8 - Loading data from VarTrix and MAEGATK. - Transforming the data to be compatible for joint analysis. diff --git a/docs/404.html b/docs/404.html old mode 100755 new mode 100644 index cdf7f65..793d306 --- a/docs/404.html +++ b/docs/404.html @@ -24,7 +24,7 @@ sigurd - 0.3.2 + 0.3.8 + + + + + +
+
+
+ +
+

We calculate the number of forward reads covering a variant.

+
+ +
+

Usage

+
CalculateForwardReads(SE, chromosome_prefix = "chrM")
+
+ +
+

Arguments

+
SE
+

SummarizedExperiment object.

+ + +
chromosome_prefix
+

List of matrices for the alternative reads.

+ +
+ +
+ + +
+ + + + + + + diff --git a/docs/reference/CalculateQuality.html b/docs/reference/CalculateQuality.html old mode 100755 new mode 100644 index 05b67bb..d7b7069 --- a/docs/reference/CalculateQuality.html +++ b/docs/reference/CalculateQuality.html @@ -10,7 +10,7 @@ sigurd - 0.3.2 + 0.3.8 + + + + + +
+
+
+ +
+

We calculate the number of reverse reads covering a variant.

+
+ +
+

Usage

+
CalculateReverseReads(SE, chromosome_prefix = "chrM")
+
+ +
+

Arguments

+
SE
+

SummarizedExperiment object.

+ + +
chromosome_prefix
+

List of matrices for the alternative reads.

+ +
+ +
+ + +
+ + + + + + + diff --git a/docs/reference/CalculateStrandCorrelation.html b/docs/reference/CalculateStrandCorrelation.html old mode 100755 new mode 100644 index 1d68ffe..b01262a --- a/docs/reference/CalculateStrandCorrelation.html +++ b/docs/reference/CalculateStrandCorrelation.html @@ -10,7 +10,7 @@ sigurd - 0.3.2 + 0.3.8 + + + + + +
+
+
+ +
+

We load the MGATK output and transform it to be compatible with the VarTrix output. +The input file is a specifically formatted csv file with all the necessary information to run the analysis. +Note that the source column in the input file needs to be mgaetk or mgatk for this function. This is case insensitive. +If you want to only load a single sample without the use of an input file, you have to set the following variables.

  1. samples_path

  2. +
  3. barcodes_path

  4. +
  5. patient

  6. +
  7. samples_file = NULL

  8. +
+ +
+

Usage

+
LoadingMGATK_typewise(
+  samples_file,
+  patient,
+  samples_path = NULL,
+  patient_column = "patient",
+  type_use = "scRNAseq_MT",
+  chromosome_prefix = "chrM",
+  min_cells = 2,
+  cells_include = NULL,
+  cells_exclude = NULL,
+  barcodes_path = NULL,
+  cellbarcode_length = 18,
+  ignore_quality = TRUE,
+  verbose = TRUE
+)
+
+ +
+

Arguments

+
samples_file
+

Path to the csv file with the samples to be loaded.

+ + +
patient
+

The patient you want to load.

+ + +
samples_path
+

Path to the input folder.

+ + +
patient_column
+

The column that contains the patient information. Use merge, if all samples should be merged.

+ + +
type_use
+

The type of input. Only rows that have the specified type will be loaded.

+ + +
chromosome_prefix
+

The prefix you want use. Default: "chrM"

+ + +
min_cells
+

The minimum number of cells with coverage for a variant. Variants with coverage in less than this amount of cells are removed. Default = 2

+ + +
cells_include
+

A vector of cell barcodes. Only these cells will be retained.

+ + +
cells_exclude
+

A vector of cell barcodes. These cells will be removed from the output.

+ + +
barcodes_path
+

Path to the barcodes file tsv. Default = NULL

+ + +
cellbarcode_length
+

The length of the cell barcode. This should be the length of the actual barcode plus two for the suffix (-1). Default = 18

+ + +
ignore_quality
+

Should the quality assay be ignore? This can be useful if the quality was not reported in mgatk.

+ + +
verbose
+

Should the function be verbose? Default = TRUE

+ +
+ +
+ + +
+ + + + + + + diff --git a/docs/reference/LoadingRawMatrix_typewise.html b/docs/reference/LoadingRawMatrix_typewise.html old mode 100755 new mode 100644 index 98a1472..c02c5ac --- a/docs/reference/LoadingRawMatrix_typewise.html +++ b/docs/reference/LoadingRawMatrix_typewise.html @@ -12,7 +12,7 @@ sigurd - 0.3.2 + 0.3.8 + + + + + +
+
+
+ +
+

This is an adaption of the VMR based selection of variants from Miller et al. +This selection process is designed for ATAC data and not scRNAseq data.

+
+ +
+

Usage

+
VariantSelection_VMR(
+  SE,
+  stabilize_variance = TRUE,
+  low_coverage_threshold = 10,
+  minimum_fw_rev_reads = 2,
+  verbose = TRUE
+)
+
+ +
+

Arguments

+
SE
+

SummarizedExperiment object.

+ + +
stabilize_variance
+

Should the variance be stabilized by using the mean allele frequency for cells with a low coverage? Coverage threshold is set by low_coverage_threshold.

+ + +
low_coverage_threshold
+

Cells below this threshold are set to the mean.

+ + +
minimum_fw_rev_reads
+

How many forward and reverse reads should a cell have? Default = 2

+ + +
verbose
+

Should the function be verbose?

+ +
+ +
+ + +
+ + + + + + + diff --git a/docs/reference/VariantWiseCorrelation.html b/docs/reference/VariantWiseCorrelation.html old mode 100755 new mode 100644 index 5bec1e5..da74c33 --- a/docs/reference/VariantWiseCorrelation.html +++ b/docs/reference/VariantWiseCorrelation.html @@ -12,7 +12,7 @@ sigurd - 0.3.2 + 0.3.8 - - - - - -
- -
-
- - - -
-

Analysis of Blastic Plasmacytoid Dendritic Cell Neoplasm (BPDCN) -

-

Here, we compare the analysis of cells from a patient with BPDCN.

-

The original script is available here: https://github.com/petervangalen/MAESTER-2021/blob/main/4_CH_sample/4.2_Variant_Selection.R

-

In this setup, the cells are all from a single donor and do not have to be separated into categories.

-

Then small clones that might characterize the clonal lineages in the sample are selected.

-
-
-

Loading necessary packages. -

-

We load the data from MAESTER.

-
-print("Libraries for SIGURD.")
-
## [1] "Libraries for SIGURD."
- -
-
-

Loading the data using SIGURD. -

-
-# The design matrix contains information for the genotyping data.
-genotyping <- LoadingMAEGATK_typewise(patient = "BPDCN712", samples_file = "/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/MAESTER_Reproduction.csv", type_use = "Amplicon_MT", verbose = FALSE)
-
-# Loading the scRNA-seq data.
-scrna <- readRDS("/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/BPDCN712_Seurat_with_TCR_Renamed.rds")
-
-
-

Generating a block list -

-

Variants that are also detected in the cell mixture data is treated as possible false positives. They are used as a blacklist and are removed from the results.

-
-# Loading the TenX Cell Mixture Genotyping.
-
-genotyping_tenx <- load_object("/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/TenX_CellMixture_Genotyping.rds.lz4")
-blocklist <- AllelFrequencyFoldChange(genotyping_tenx, group_of_interest = "CellType", group1 = "K562", group2 = "BT142", maximum_foldchange = 5, minimum_coverage = 5, minimum_allele_freq = 0.001, maximum_allele_freq = 0.999)$Variant
-
-# We add the variant chrM_1583_A_G by hand. It is identified as misleading based on downstream analysis.
-blocklist <- c(blocklist, "chrM_1583_A_G")
-
-
-

Selecting the Variants of Interest -

-

Now, we select the variants of interest from the loaded data.

-
-voi.ch.sigurd <- VariantSelection_TopCells(genotyping, min_coverage = 5, quantiles = 0.9, thresholds = 0, top_cells = 10, top_VAF = 0.5, min_quality = 30, remove_nocall = FALSE, verbose = FALSE)
-voi.ch.sigurd <- voi.ch.sigurd[!voi.ch.sigurd %in% blocklist]
-print(voi.ch.sigurd)
-
##  [1] "chrM_683_G_A"   "chrM_1323_G_A"  "chrM_1415_G_A"  "chrM_2593_G_A" 
-##  [5] "chrM_6205_G_A"  "chrM_6243_G_A"  "chrM_8697_G_A"  "chrM_9753_G_A" 
-##  [9] "chrM_10158_T_A" "chrM_15140_G_A" "chrM_15812_G_A" "chrM_779_T_C"  
-## [13] "chrM_3988_T_C"  "chrM_6185_T_C"  "chrM_6293_T_C"  "chrM_9164_T_C" 
-## [17] "chrM_9321_T_C"  "chrM_11864_T_C" "chrM_15299_T_C" "chrM_1171_A_G" 
-## [21] "chrM_1222_A_G"  "chrM_2241_A_G"  "chrM_2623_A_G"  "chrM_3628_A_G" 
-## [25] "chrM_4104_A_G"  "chrM_6218_A_G"
-
-
-

Visualisation using SIGURD -

-
-genotyping <- Filtering(SE = genotyping, cells_include = colnames(scrna))
-
## [1] "We remove all cells not in the allow list."
-## [1] "We remove all the variants that are always NoCall."
-
-colData(genotyping)$CellType <- scrna$CellType
-HeatmapVoi(SE = genotyping, voi = voi.ch.sigurd, annotation_trait = "CellType", sort_cells = TRUE, remove_empty_cells = TRUE, minimum_allele_freq = 0.01)
-

-
-
-
- - - - -
- - - - - - - diff --git a/inst/doc/BoneMarrow_SIGURD_files/figure-html/Vis_SIGURD-1.png b/inst/doc/BoneMarrow_SIGURD_files/figure-html/Vis_SIGURD-1.png deleted file mode 100755 index 381218c..0000000 Binary files a/inst/doc/BoneMarrow_SIGURD_files/figure-html/Vis_SIGURD-1.png and /dev/null differ diff --git a/inst/doc/MPN_SIGURD.Rmd b/inst/doc/MPN_SIGURD.Rmd deleted file mode 100755 index 511e723..0000000 --- a/inst/doc/MPN_SIGURD.Rmd +++ /dev/null @@ -1,206 +0,0 @@ ---- -title: "MPN SIGURD" -author: "Martin Grasshoff1, Ivan G. Costa1" -affiliations: "1. Institute for Computational Genomics, Faculty of Medicine, RWTH Aachen University, Aachen, 52074 Germany" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{MPN SIGURD} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} -header-includes: - - '\usepackage[utf8]{inputenc}' ---- - -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) -``` - -## Loading necessary packages. - -```{r Packages, warning = FALSE} -suppressPackageStartupMessages(library(sigurd)) -suppressPackageStartupMessages(library(SummarizedExperiment)) -suppressPackageStartupMessages(library(ggplot2)) -suppressPackageStartupMessages(library(Seurat)) -suppressPackageStartupMessages(library(ComplexHeatmap)) -suppressPackageStartupMessages(library(circlize)) -suppressPackageStartupMessages(library(grid)) -suppressPackageStartupMessages(library(EnhancedVolcano)) -``` -## Analysis of MPN data - -Here, we illustrate how to analysis genotyping data from MPN samples. The JAK2V617F mutation was genotyped for 6 MPN samples and 3 healthy controls. - -This setup makes this analysis different from the previous analyses. Previously, only one sample was analysed at a time. Now, we load and analyse the samples simultaneously. - -## Loading MPN data -Now, we load the JAK2V617F data and plot the mutated cells on the UMAP. - -Since we are loading multiple samples, this code is more complicated than the code for the MAESTER SW sample. - -```{r MPN, warning = FALSE} -# Loading the scRNA-seq data. -scrna <- load_object("/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/scrna_MPN.rds.lz4") - -# Loading the genotyping data. -genotyping <- LoadingVarTrix_typewise(patient = "MPN", samples_file = "/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/DesignMatrix_VarTrix.csv", min_reads = 0, vcf_path = "/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/CosmicSubset_JAK2V617F.vcf", type_use = "scRNAseq_Somatic", min_cells = 0, verbose = FALSE) -amplicon <- LoadingVarTrix_typewise(patient = "MPN", samples_file = "/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/DesignMatrix_Amplicon.csv", min_reads = 0, vcf_path = "/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/CosmicSubset_UCSC_JAK2V617F.vcf", type_use = "scRNAseq_Amplicon", min_cells = 0, verbose = FALSE) -genotyping <- AmpliconSupplementing(scRNAseq = genotyping, amplicon = amplicon, verbose = FALSE) - -# Filtering the genotyping object to remove false positives. -genotyping <- Filtering(genotyping, min_cells_per_variant = 0, fraction_threshold = 0.21, cells_include = colnames(scrna), min_variants_per_cell = 1, reject_value = "NoCall", verbose = FALSE) -``` - -## Combining Seurat and SIGURD - -Now, we add the genotyping information to the Seurat object. - -```{r warning = FALSE} -scrna <- SetVariantInfo(SE = genotyping, seurat_object = scrna) -``` - -## Visualisation - -We visualise the genotyping information on the UMAP. - -```{r warning = FALSE, fig.width = 4, fig.height = 4} -# Plotting the mutated cells on the UMAP. -scrna$JAK2_p.V617F_c.1849G.T_consensus <- factor(scrna$JAK2_p.V617F_c.1849G.T_consensus, levels = c("Ref", "Alt")) -jak2_cells <- sum(scrna$JAK2_p.V617F_c.1849G.T_consensus == "Alt", na.rm = TRUE) -ref_cells <- sum(scrna$JAK2_p.V617F_c.1849G.T_consensus == "Ref", na.rm = TRUE) -all_cells <- ncol(scrna) -p <- DimPlot(scrna, group.by = "JAK2_p.V617F_c.1849G.T_consensus", reduction = "INTE_UMAP", order = TRUE, pt.size = 1, raster = FALSE, na.value = "grey95", cols = c(Alt = "#aa0051", Ref = "#79b938")) + - ggtitle(paste0("Alt: ", jak2_cells, ", Ref: ", ref_cells, ", All: ", all_cells)) + - xlab("UMAP 1") + ylab("UMAP 2") + scale_color_manual(breaks = c("Alt", "Ref"), values = c("#aa0051", "#79b938"), na.value = "grey95") + - theme(legend.position = "top") -print(p) -``` - -## We determine DEGs between the JAK2V617F and WT cells. - -Now, we determine the DEGs between the JAK2V617F cells and the wild type cells for the Orthochromatic Erythroblasts. - -```{r DEGs_OrthoE_JAK2V617F, fig.width = 6, fig.height = 6, warning = FALSE} -scrna_orthoe <- subset(scrna, CellType == "Orthochromatic Erythroblasts" & Condition != "HC") -degs <- FindMarkers(scrna_orthoe, group.by = "JAK2_p.V617F_c.1849G.T_consensus", ident.1 = "Alt", ident.2 = "Ref", logfc.threshold = 0) -degs <- data.frame(degs, gene = rownames(degs)) -degs_up <- nrow(subset(degs, avg_log2FC > 0.25 & p_val_adj < 0.05)) -degs_down <- nrow(subset(degs, avg_log2FC < -0.25 & p_val_adj < 0.05)) -EnhancedVolcano(degs, x = "avg_log2FC", y = "p_val_adj", lab = degs$gene, FCcutoff = 0.25, pCutoff = 0.05, title = "OrthoE JAK2V617F Pos. vs. Neg.",subtitle = NULL) -``` - -## Combining the JAK2V617F data with the MT data. - -We combine the JAK2V617F SIGURD object with the MT object. Cells and variants in both objects are combined. - -To save time, we are only processing a single sample. All other samples are processed equally. Since the genotyping object is a list, it is easily processed using parallel computing. - -First, we load and filter the mitochondrial variant information. - -```{r Combining, warning = FALSE} -# Loading the data. -genotyping_mt <- LoadingMAEGATK_typewise(patient = "MPN1", samples_file = "/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/DesignMatrix_MT.csv", min_cells = 0, cells_include = colnames(scrna), type_use = "scRNAseq_MT", verbose = FALSE) -genotyping_mt <- Filtering(genotyping_mt, min_variants_per_cell = 1, min_cells_per_variant = 2, fraction_threshold = 0.05, cells_include = colnames(scrna), verbose = FALSE) -``` - -## MPN VOIs and Diversity - -Now, we select the mtVOIs. We then determine the clonal lineages and plot the results on a heatmap. - -We subset the cells to only include the erythroid cells. - -```{r MPN_VOI, fig.width = 6, warning = FALSE, dev = "CairoPNG"} -# Subset to erythroid cells. -erythroid_cells <- subset(scrna, annotation == "ErythroidCells") -genotyping_mt <- Filtering(genotyping_mt, cells_include = colnames(erythroid_cells)) - -# Selecting the mtVOIS. -mt_vois <- VariantSelection_Quantile(genotyping_mt, min_coverage = 1, verbose = FALSE) - -# Determining the clonal lineages. -genotyping_mt <- ClonalDefinition(se = genotyping_mt, variants_ls = list(mt_vois), verbose = FALSE) -``` - -## Visualisation of mtVOIs and Clonal Lineages - -We now plot the variants of interest and the clonal lineages. - -```{r warning = FALSE, fig.width = 6, fig.height = 4, dev = "CairoPNG"} -# Generating a heatmap. -colors_lineage <- c("C1" = "#563e00", "C2" = "#dec234", "C3" = "#e23856", "C4" = "#7f0073", "C5" = "#006639", "C6" = "#b1db8e", "C7" = "#00348f", "Negative" = "black") -ha <- columnAnnotation(Clones = colData(genotyping_mt)[,"Clones"], - col = list(Clones = colors_lineage), - annotation_name_gp = gpar(fontsize = 10, fontface = "plain"), - annotation_legend_param = list(Clones = list(nrow = 1, title = ""))) -fraction <- as.matrix(assays(genotyping_mt)[["fraction"]])[mt_vois, ] -hm <- Heatmap(fraction, - name = "VAF", - row_title_gp = gpar(fontsize = 12), - row_names_gp = gpar(fontsize = 10), - row_names_max_width = unit(16, "cm"), - column_title = "MPN1", - column_title_gp = gpar(fontsize = 12), - column_names_gp = gpar(fontsize = 10), - col = colorRamp2(seq(0, round(max(fraction, na.rm = TRUE)), length.out = 9), c("#FCFCFC", "#FFEDB0", "#FFDF5F", "#FEC510", "#FA8E24", "#F14C2B", "#DA2828", "#BE2222", "#A31D1D")), - show_column_dend = FALSE, - show_row_names = TRUE, - show_column_names = FALSE, - cluster_columns = TRUE, - cluster_rows = FALSE, - heatmap_legend_param = list(border = "#000000", grid_height = unit(10, "mm"), - direction = "horizontal"), - bottom_annotation = ha, - border = TRUE, - use_raster = TRUE, - show_heatmap_legend = TRUE) -draw(hm, annotation_legend_side = "bottom") -``` - -## Calculating clonal diversity. - -We calculate the clonal diversity for the sample MPN1 as an example. - -```{r Diversity, warning = FALSE, dev = "CairoPNG"} -clonal_diversity <- ClonalDiversity(genotyping_mt, grouping = "Clones", diversity_measure = "EffectiveSpecies", verbose = FALSE) -print(paste0("Clonal Diversity in Effective Number of Species for MPN1: ", clonal_diversity)) -``` - -## Repeating the MT analysis for all samples. - -We now repeat the MT analysis for all samples. This will take considereable time. - -We speed the analsyis up by using parallel computing. - -```{r warning = FALSE, dev = "CairoPNG"} -# We get all patients present in the design matrix. -design_matrix <- read.table("/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/DesignMatrix_MT.csv", header = TRUE, sep = ",") -patients <- design_matrix$patient -genotyping_mt_all <- parallel::mclapply(patients, LoadingMAEGATK_typewise, samples_file = "/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/DesignMatrix_MT.csv", min_cells = 0, cells_include = colnames(scrna), type_use = "scRNAseq_MT", verbose = TRUE, mc.cores = length(patients)) -names(genotyping_mt_all) <- patients -genotyping_mt_all <- parallel::mclapply(genotyping_mt_all, Filtering, min_variants_per_cell = 1, min_cells_per_variant = 2, fraction_threshold = 0.05, cells_include = colnames(scrna), verbose = FALSE, mc.cores = length(patients)) -``` - -## Selecting variants of interest for all samples - -We select variants of interest for all samples. - -```{r MPN_VOI, warning = FALSE} -# Selecting the mtVOIS. -mt_vois_all <- parallel::mclapply(genotyping_mt_all, VariantSelection_Quantile, min_coverage = 1, verbose = FALSE, mc.cores = length(patients)) - -# Determining the clonal lineages. This takes very long. We use parallel computing to process all samples simultaneously. -genotyping_mt_all <- parallel::mclapply(1:length(mt_vois_all), function(x){ - sample_use <- names(mt_vois_all)[x] - clones <- sigurd::ClonalDefinition(se = genotyping_mt_all[[sample_use]], mt_vois_all[sample_use], verbose = FALSE) - return(clones) -}, mc.cores = length(mt_vois_all)) -names(genotyping_mt_all) <- patients -``` - -## Calculating the diverstiy for all samples - -```{r warning = FALSE} -clonal_diversity_all <- unlist(lapply(genotyping_mt_all, ClonalDiversity, grouping = "Clones", diversity_measure = "EffectiveSpecies", verbose = FALSE)) -clonal_diversity_all <- data.frame(Sample = names(genotyping_mt_all), EffectiveSpecies = clonal_diversity_all) -knitr::kable(clonal_diversity_all, format="html") -``` diff --git a/inst/doc/MPN_SIGURD.html b/inst/doc/MPN_SIGURD.html deleted file mode 100755 index ff889b4..0000000 --- a/inst/doc/MPN_SIGURD.html +++ /dev/null @@ -1,249 +0,0 @@ - - - - - - - - -MPN SIGURD • sigurd - - - - - - - - - - Skip to contents - - -
- -
-
- - - - -
-

Analysis of MPN data -

-

Here, we illustrate how to analysis genotyping data from MPN samples. The JAK2V617F mutation was genotyped for 6 MPN samples and 3 healthy controls.

-

This setup makes this analysis different from the previous analyses. Previously, only one sample was analysed at a time. Now, we load and analyse the samples simultaneously.

-
-
-

Loading MPN data -

-

Now, we load the JAK2V617F data and plot the mutated cells on the UMAP.

-

Since we are loading multiple samples, this code is more complicated than the code for the MAESTER SW sample.

-
-# Loading the scRNA-seq data.
-scrna <- load_object("/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/scrna_MPN.rds.lz4")
-
-# Loading the genotyping data.
-genotyping  <- LoadingVarTrix_typewise(patient = "MPN", samples_file = "/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/DesignMatrix_VarTrix.csv", min_reads = 0, vcf_path = "/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/CosmicSubset_JAK2V617F.vcf", type_use = "scRNAseq_Somatic", min_cells = 0, verbose = FALSE)
-amplicon <- LoadingVarTrix_typewise(patient = "MPN", samples_file = "/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/DesignMatrix_Amplicon.csv", min_reads = 0, vcf_path = "/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/CosmicSubset_UCSC_JAK2V617F.vcf", type_use = "scRNAseq_Amplicon", min_cells = 0, verbose = FALSE)
-genotyping  <- AmpliconSupplementing(scRNAseq = genotyping, amplicon = amplicon, verbose = FALSE)
-
-# Filtering the genotyping object to remove false positives.
-genotyping <- Filtering(genotyping, min_cells_per_variant = 0, fraction_threshold = 0.21, cells_include = colnames(scrna), min_variants_per_cell = 1, reject_value = "NoCall", verbose = FALSE)
-
-
-

Combining Seurat and SIGURD -

-

Now, we add the genotyping information to the Seurat object.

-
-scrna <- SetVariantInfo(SE = genotyping, seurat_object = scrna)
-
## [1] "You did not supply a vector of variants. All variants will be used."
-
-
-

Visualisation -

-

We visualise the genotyping information on the UMAP.

-
-# Plotting the mutated cells on the UMAP.
-scrna$JAK2_p.V617F_c.1849G.T_consensus <- factor(scrna$JAK2_p.V617F_c.1849G.T_consensus, levels = c("Ref", "Alt"))
-jak2_cells <- sum(scrna$JAK2_p.V617F_c.1849G.T_consensus == "Alt", na.rm = TRUE)
-ref_cells <- sum(scrna$JAK2_p.V617F_c.1849G.T_consensus == "Ref", na.rm = TRUE)
-all_cells <- ncol(scrna)
-p <- DimPlot(scrna, group.by = "JAK2_p.V617F_c.1849G.T_consensus", reduction = "INTE_UMAP", order = TRUE, pt.size = 1, raster = FALSE, na.value = "grey95", cols = c(Alt = "#aa0051", Ref = "#79b938")) +
-  ggtitle(paste0("Alt: ", jak2_cells, ", Ref: ", ref_cells, ", All: ", all_cells)) +
-  xlab("UMAP 1") + ylab("UMAP 2") + scale_color_manual(breaks = c("Alt", "Ref"), values = c("#aa0051", "#79b938"), na.value = "grey95") +
-  theme(legend.position = "top")
-
## Scale for colour is already present.
-## Adding another scale for colour, which will replace the existing scale.
-
-print(p)
-

-
-
-

We determine DEGs between the JAK2V617F and WT cells. -

-

Now, we determine the DEGs between the JAK2V617F cells and the wild type cells for the Orthochromatic Erythroblasts.

-
-scrna_orthoe <- subset(scrna, CellType == "Orthochromatic Erythroblasts" & Condition != "HC")
-degs <- FindMarkers(scrna_orthoe, group.by = "JAK2_p.V617F_c.1849G.T_consensus", ident.1 = "Alt", ident.2 = "Ref", logfc.threshold = 0)
-degs <- data.frame(degs, gene = rownames(degs))
-degs_up   <- nrow(subset(degs, avg_log2FC >   0.25 & p_val_adj < 0.05))
-degs_down <- nrow(subset(degs, avg_log2FC <  -0.25 & p_val_adj < 0.05))
-EnhancedVolcano(degs, x = "avg_log2FC", y = "p_val_adj", lab = degs$gene, FCcutoff = 0.25, pCutoff = 0.05, title = "OrthoE JAK2V617F Pos. vs. Neg.",subtitle = NULL)
-

-
-
-

Combining the JAK2V617F data with the MT data. -

-

We combine the JAK2V617F SIGURD object with the MT object. Cells and variants in both objects are combined.

-

To save time, we are only processing a single sample. All other samples are processed equally. Since the genotyping object is a list, it is easily processed using parallel computing.

-

First, we load and filter the mitochondrial variant information.

-
-# Loading the data. 
-genotyping_mt <- LoadingMAEGATK_typewise(patient = "MPN1", samples_file = "/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/DesignMatrix_MT.csv", min_cells = 0, cells_include = colnames(scrna), type_use = "scRNAseq_MT", verbose = FALSE)
-genotyping_mt <- Filtering(genotyping_mt, min_variants_per_cell = 1, min_cells_per_variant = 2, fraction_threshold = 0.05, cells_include = colnames(scrna), verbose = FALSE)
-
-
-

MPN VOIs and Diversity -

-

Now, we select the mtVOIs. We then determine the clonal lineages and plot the results on a heatmap.

-

We subset the cells to only include the erythroid cells.

-
-# Subset to erythroid cells.
-erythroid_cells <- subset(scrna, annotation == "ErythroidCells")
-genotyping_mt <- Filtering(genotyping_mt, cells_include = colnames(erythroid_cells))
-
## [1] "We remove all cells not in the allow list."
-## [1] "We remove all the variants that are always NoCall."
-
-# Selecting the mtVOIS.
-mt_vois <- VariantSelection_Quantile(genotyping_mt, min_coverage = 1, verbose = FALSE)
-
-# Determining the clonal lineages.
-genotyping_mt <- ClonalDefinition(se = genotyping_mt, variants_ls = list(mt_vois), verbose = FALSE)
-
-
-

Visualisation of mtVOIs and Clonal Lineages -

-

We now plot the variants of interest and the clonal lineages.

-
-# Generating a heatmap.
-colors_lineage <- c("C1" = "#563e00", "C2" = "#dec234", "C3" = "#e23856", "C4" = "#7f0073", "C5" = "#006639", "C6" = "#b1db8e", "C7" = "#00348f", "Negative" = "black")
-ha <- columnAnnotation(Clones = colData(genotyping_mt)[,"Clones"],
-                       col = list(Clones = colors_lineage),
-                       annotation_name_gp = gpar(fontsize = 10, fontface = "plain"),
-                       annotation_legend_param = list(Clones = list(nrow = 1, title = "")))
-fraction <- as.matrix(assays(genotyping_mt)[["fraction"]])[mt_vois, ]
-hm <- Heatmap(fraction,
-  name = "VAF",
-  row_title_gp = gpar(fontsize = 12),
-  row_names_gp = gpar(fontsize = 10),
-  row_names_max_width = unit(16, "cm"),
-  column_title = "MPN1",
-  column_title_gp = gpar(fontsize = 12),
-  column_names_gp = gpar(fontsize = 10),
-  col = colorRamp2(seq(0, round(max(fraction, na.rm = TRUE)), length.out = 9), c("#FCFCFC", "#FFEDB0", "#FFDF5F", "#FEC510", "#FA8E24", "#F14C2B", "#DA2828", "#BE2222", "#A31D1D")),
-  show_column_dend = FALSE,
-  show_row_names = TRUE,
-  show_column_names = FALSE,
-  cluster_columns = TRUE,
-  cluster_rows = FALSE,
-  heatmap_legend_param = list(border = "#000000", grid_height = unit(10, "mm"),
-  direction = "horizontal"),
-  bottom_annotation = ha,
-  border = TRUE,
-  use_raster = TRUE,
-  show_heatmap_legend = TRUE)
-draw(hm, annotation_legend_side = "bottom")
-

-
-
-

Calculating clonal diversity. -

-

We calculate the clonal diversity for the sample MPN1 as an example.

-
-clonal_diversity <- ClonalDiversity(genotyping_mt, grouping = "Clones", diversity_measure = "EffectiveSpecies", verbose = FALSE)
-print(paste0("Clonal Diversity in Effective Number of Species for MPN1: ", clonal_diversity))
-
## [1] "Clonal Diversity in Effective Number of Species for MPN1: 6.72545022391521"
-
-
-
- - - - -
- - - - - - - diff --git a/inst/doc/MPN_SIGURD_files/figure-html/DEGs_OrthoE_JAK2V617F-1.png b/inst/doc/MPN_SIGURD_files/figure-html/DEGs_OrthoE_JAK2V617F-1.png deleted file mode 100755 index 476c735..0000000 Binary files a/inst/doc/MPN_SIGURD_files/figure-html/DEGs_OrthoE_JAK2V617F-1.png and /dev/null differ diff --git a/inst/doc/MPN_SIGURD_files/figure-html/unnamed-chunk-3-1.png b/inst/doc/MPN_SIGURD_files/figure-html/unnamed-chunk-3-1.png deleted file mode 100755 index b650d77..0000000 Binary files a/inst/doc/MPN_SIGURD_files/figure-html/unnamed-chunk-3-1.png and /dev/null differ diff --git a/inst/doc/SW_CellMixture_SIGURD.Rmd b/inst/doc/SW_CellMixture_SIGURD.Rmd deleted file mode 100755 index 02e275c..0000000 --- a/inst/doc/SW_CellMixture_SIGURD.Rmd +++ /dev/null @@ -1,101 +0,0 @@ ---- -title: "Cell Mixture Analysis using Seq-Well Sequencing" -author: "Martin Grasshoff1, Ivan G. Costa1" -affiliations: "1. Institute for Computational Genomics, Faculty of Medicine, RWTH Aachen University, Aachen, 52074 Germany" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Cell Mixture Analysis using Seq-Well Sequencing} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} -header-includes: - - '\usepackage[utf8]{inputenc}' ---- - -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) -``` -## Cell Mixture Analysis using SIGURD - -In this data set, chronic myelogenous leukemia cells (K562) and brain tumor cells (BT142) were mixed and then sequenced using Seq-Well S3. The cells can be separated using the expression of marker genes. - -The original script then identifies 6 variants that can distinguish between the two cell types. - -In this reproduction, we show that the analysis can be faithfully reproduced using a very streamlined set of functions. This greatly simplifies the analysis. Using a set of dedicated functions, the analysis can now also be easily adopted to new data sets. - -## Loading necessary packages. - -We load the data from MAESTER. - -```{r Packages} -print("Libraries for SIGURD.") -suppressPackageStartupMessages(library(sigurd)) -suppressPackageStartupMessages(library(SummarizedExperiment)) -suppressPackageStartupMessages(library(ggplot2)) -``` - -## We load the Seurat object for the scRNA-seq data. - -```{r warning = FALSE} -seu <- readRDS("/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/SW_CellLineMix_Seurat_Keep_Renamed.rds") -``` - -## Loading the data using SIGURD. - -SIGURD loads the data and automatically reformats the data. This code uses dedicated functions to load the and process the data, which streamlines the analysis quite substantially. - -```{r warning = FALSE} -# The design matrix contains information for the genotyping data. -genotyping <- LoadingMAEGATK_typewise(patient = "SW", samples_file = "/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/MAESTER_Reproduction.csv", min_cells = 0, type_use = "Amplicon_MT", verbose = FALSE) - -# Removing cells that are not in the Seurat object. -genotyping <- Filtering(genotyping, cells_include = colnames(seu)) - -# Adding meta data do the SIGURD object. -colData(genotyping)$CellType <- seu$CellType - -``` - -## Selecting the Variants of Interest - -Now, we select the variants of interest. - -```{r warning = FALSE} -# Selecting informative variants. -voi.ch.sigurd <- VariantSelection_Quantile(genotyping, min_coverage = 20, min_quality = 30, quantiles = c(0.1, 0.9), thresholds = c(0.1, 0.9), verbose = FALSE) -print(voi.ch.sigurd) -``` - -## Visualisation using SIGURD - -```{r warning = FALSE, fig.width = 6, fig.height = 4, dev = "CairoPNG"} -# Determining if a cell is supporting one of the two cell types. -cell_support <- CallSupport(SE = genotyping, VOI_group1 = voi.ch.sigurd[voi.ch.sigurd != "chrM_7990_C_T"], VOI_group2 = "chrM_7990_C_T", group1_name = "K562", group2_name = "BT142", min_mutated_reads = 3, min_reads = 30, group_factor = 10, verbose = FALSE, return_nonsupport = TRUE) -colData(genotyping)$CellType_MT <- cell_support$Support -cell_support <- subset(cell_support, Support %in% c("K562", "BT142")) # Selecting only cells with sufficient support for a set of variants. - -# Plotting the results on a heatmap. -HeatmapVoi(SE = genotyping[,cell_support$Cell], voi = voi.ch.sigurd, annotation_trait = "CellType", minimum_coverage = 3, remove_empty_cells = FALSE) -``` - -## Selecting variants with comparatively high VAF between groups - -We now select variants with a higher VAF in the K562 cells than in the BT142 cells. - -```{r warning = FALSE, dev = "CairoPNG"} - -voi.ch.lineages <- sigurd::VariantSelection_Group(SE = genotyping, min_coverage = 20, quantiles = c(0.01, 0.99), thresholds = c(0.01, 0.02), min_quality = 30, - group_of_interest = "CellType_MT", group1 = "K562", group2 = "BT142", group_factor = 5, remove_nocall = FALSE, verbose = FALSE) -voi.ch.lineages <- gsub("chrM_9117_T_C", "chrM_2141_T_C", voi.ch.lineages) # Replace chrM_9117_T_C, which comes up because it has no coverage in some cells, with another homoplasmic variant (chrM_2141_T_C), that has better coverage) -voi.ch.lineages <- voi.ch.lineages[!voi.ch.lineages %in% c("chrM_5378_A_G", "chrM_6384_G_A")] # These variants are not detected in the bulk data and were removed by hand. -print(voi.ch.lineages) -``` - -## Visualisation using SIGURD - -```{r warning = FALSE, dev = "CairoPNG"} -# Plotting the results on a heatmap. -cell_support <- subset(cell_support, Support == "K562") -genotyping <- Filtering(genotyping, cells_include = cell_support$Cell, verbose = FALSE) -HeatmapVoi(SE = genotyping, voi = voi.ch.lineages, minimum_coverage = 3, sort_cells = TRUE, cluster_variants = TRUE) -``` - diff --git a/inst/doc/SW_CellMixture_SIGURD.html b/inst/doc/SW_CellMixture_SIGURD.html deleted file mode 100755 index a8e9fda..0000000 --- a/inst/doc/SW_CellMixture_SIGURD.html +++ /dev/null @@ -1,191 +0,0 @@ - - - - - - - - -Cell Mixture Analysis using Seq-Well Sequencing • sigurd - - - - - - - - - - Skip to contents - - -
- -
-
- - - -
-

Cell Mixture Analysis using SIGURD -

-

In this data set, chronic myelogenous leukemia cells (K562) and brain tumor cells (BT142) were mixed and then sequenced using Seq-Well S3. The cells can be separated using the expression of marker genes.

-

The original script then identifies 6 variants that can distinguish between the two cell types.

-

In this reproduction, we show that the analysis can be faithfully reproduced using a very streamlined set of functions. This greatly simplifies the analysis. Using a set of dedicated functions, the analysis can now also be easily adopted to new data sets.

-
-
-

Loading necessary packages. -

-

We load the data from MAESTER.

-
-print("Libraries for SIGURD.")
-
## [1] "Libraries for SIGURD."
- -
-
-

We load the Seurat object for the scRNA-seq data. -

-
-seu <- readRDS("/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/SW_CellLineMix_Seurat_Keep_Renamed.rds")
-
-
-

Loading the data using SIGURD. -

-

SIGURD loads the data and automatically reformats the data. This code uses dedicated functions to load the and process the data, which streamlines the analysis quite substantially.

-
-# The design matrix contains information for the genotyping data.
-genotyping <- LoadingMAEGATK_typewise(patient = "SW", samples_file = "/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/MAESTER_Reproduction.csv", min_cells = 0, type_use = "Amplicon_MT", verbose = FALSE)
-
-# Removing cells that are not in the Seurat object.
-genotyping <- Filtering(genotyping, cells_include = colnames(seu))
-
## [1] "We remove all cells not in the allow list."
-## [1] "We remove all the variants that are always NoCall."
-
-# Adding meta data do the SIGURD object.
-colData(genotyping)$CellType <- seu$CellType
-
-
-

Selecting the Variants of Interest -

-

Now, we select the variants of interest.

-
-# Selecting informative variants. 
-voi.ch.sigurd <- VariantSelection_Quantile(genotyping, min_coverage = 20, min_quality = 30, quantiles = c(0.1, 0.9), thresholds = c(0.1, 0.9), verbose = FALSE)
-print(voi.ch.sigurd)
-
## [1] "chrM_709_G_A"  "chrM_1888_G_A" "chrM_1420_T_C" "chrM_2141_T_C"
-## [5] "chrM_9117_T_C" "chrM_7990_C_T"
-
-
-

Visualisation using SIGURD -

-
-# Determining if a cell is supporting one of the two cell types.
-cell_support <- CallSupport(SE = genotyping, VOI_group1 = voi.ch.sigurd[voi.ch.sigurd != "chrM_7990_C_T"], VOI_group2 = "chrM_7990_C_T", group1_name = "K562", group2_name = "BT142", min_mutated_reads = 3, min_reads = 30, group_factor = 10, verbose = FALSE, return_nonsupport = TRUE)
-colData(genotyping)$CellType_MT <- cell_support$Support
-cell_support <- subset(cell_support, Support %in% c("K562", "BT142")) # Selecting only cells with sufficient support for a set of variants.
-
-# Plotting the results on a heatmap.
-HeatmapVoi(SE = genotyping[,cell_support$Cell], voi = voi.ch.sigurd, annotation_trait = "CellType", minimum_coverage = 3, remove_empty_cells = FALSE)
-

-
-
-

Selecting variants with comparatively high VAF between groups -

-

We now select variants with a higher VAF in the K562 cells than in the BT142 cells.

-
-voi.ch.lineages <- sigurd::VariantSelection_Group(SE = genotyping, min_coverage = 20, quantiles = c(0.01, 0.99), thresholds = c(0.01, 0.02), min_quality = 30,
-                                                  group_of_interest = "CellType_MT", group1 = "K562", group2 = "BT142", group_factor = 5, remove_nocall = FALSE, verbose = FALSE)
-voi.ch.lineages <- gsub("chrM_9117_T_C", "chrM_2141_T_C", voi.ch.lineages) # Replace chrM_9117_T_C, which comes up because it has no coverage in some cells, with another homoplasmic variant (chrM_2141_T_C), that has better coverage)
-voi.ch.lineages <- voi.ch.lineages[!voi.ch.lineages %in% c("chrM_5378_A_G", "chrM_6384_G_A")] # These variants are not detected in the bulk data and were removed by hand.
-print(voi.ch.lineages)
-
##  [1] "chrM_1197_G_A" "chrM_1227_G_A" "chrM_1327_G_A" "chrM_1982_G_A"
-##  [5] "chrM_2008_G_A" "chrM_2147_G_A" "chrM_2170_G_A" "chrM_2478_G_A"
-##  [9] "chrM_2571_G_A" "chrM_2573_G_A" "chrM_2810_G_A" "chrM_2943_G_A"
-## [13] "chrM_5274_G_A" "chrM_8155_G_A" "chrM_8213_G_A" "chrM_8278_C_A"
-## [17] "chrM_9778_G_A" "chrM_2141_T_C" "chrM_1310_C_T" "chrM_1352_C_T"
-## [21] "chrM_1533_C_T" "chrM_9757_C_T"
-
-
-

Visualisation using SIGURD -

-
-# Plotting the results on a heatmap.
-cell_support <- subset(cell_support, Support == "K562")
-genotyping <- Filtering(genotyping, cells_include = cell_support$Cell, verbose = FALSE)
-HeatmapVoi(SE = genotyping, voi = voi.ch.lineages, minimum_coverage = 3, sort_cells = TRUE, cluster_variants = TRUE)
-

-
-
-
- - - - -
- - - - - - - diff --git a/inst/doc/SW_CellMixture_SIGURD_files/figure-html/unnamed-chunk-4-1.png b/inst/doc/SW_CellMixture_SIGURD_files/figure-html/unnamed-chunk-4-1.png deleted file mode 100755 index 3230d69..0000000 Binary files a/inst/doc/SW_CellMixture_SIGURD_files/figure-html/unnamed-chunk-4-1.png and /dev/null differ diff --git a/inst/doc/SW_CellMixture_SIGURD_files/figure-html/unnamed-chunk-6-1.png b/inst/doc/SW_CellMixture_SIGURD_files/figure-html/unnamed-chunk-6-1.png deleted file mode 100755 index 496d7a0..0000000 Binary files a/inst/doc/SW_CellMixture_SIGURD_files/figure-html/unnamed-chunk-6-1.png and /dev/null differ diff --git a/inst/doc/TenX_CellMixture_SIGURD.Rmd b/inst/doc/TenX_CellMixture_SIGURD.Rmd deleted file mode 100755 index 43e1498..0000000 --- a/inst/doc/TenX_CellMixture_SIGURD.Rmd +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: "Cell Mixture Experiment using 10X Genomics Sequencing" -author: "Martin Grasshoff1, Ivan G. Costa1" -affiliations: "1. Institute for Computational Genomics, Faculty of Medicine, RWTH Aachen University, Aachen, 52074 Germany" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Cell Mixture Experiment using 10X Genomics Sequencing} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} -header-includes: - - '\usepackage[utf8]{inputenc}' ---- - -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) -``` -## Cell Mixture Experiment using 10X Genomics Sequencing - -Here, we reproduce the analysis of a cell mixture experiment. - -## Loading necessary packages. - -We load the data from MAESTER. - -```{r Packages} -print("Libraries for SIGURD.") -suppressPackageStartupMessages(library(sigurd)) -suppressPackageStartupMessages(library(SummarizedExperiment)) -suppressPackageStartupMessages(library(ggplot2)) -``` - -## We load the Seurat object for the scRNA-seq data. - -```{r warning = FALSE} -seu <- readRDS("/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/TenX_CellLineMix_Seurat_Keep_Renamed.rds") -``` - -## Loading the data using SIGURD. - -```{r warning = FALSE} -# The design matrix contains information for the genotyping data. -genotyping <- LoadingMAEGATK_typewise(patient = "TenX", samples_file = "/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/MAESTER_Reproduction.csv", min_cells = 0, type_use = "Amplicon_MT", verbose = FALSE) - -# Removing cells that are not in the Seurat object. -genotyping <- Filtering(genotyping, cells_include = colnames(seu)) - -# Adding meta data do the SIGURD object. -colData(genotyping)$CellType <- seu$CellType -``` - -## Selecting the Variants of Interest - -```{r warning = FALSE} -# Selecting informative variants. -voi.ch.sigurd <- VariantSelection_Quantile(genotyping, min_coverage = 20, min_quality = 30, quantiles = c(0.1, 0.9), thresholds = c(0.1, 0.9), verbose = FALSE) -print(voi.ch.sigurd) -``` - -## Visualisation using SIGURD - -```{r warning = FALSE, fig.width = 6, fig.height = 4, dev = "CairoPNG"} -# Determining if a cell is supporting one of the two cell types. -cell_support <- CallSupport(SE = genotyping, VOI_group1 = voi.ch.sigurd[voi.ch.sigurd != "chrM_7990_C_T"], VOI_group2 = "chrM_7990_C_T", group1_name = "K562", group2_name = "BT142", min_mutated_reads = 3, min_reads = 30, group_factor = c(10,2), verbose = FALSE) -genotyping <- Filtering(genotyping, cells_include = cell_support$Cell) -colData(genotyping)$CellType_MT <- cell_support$Support -cell_support <- subset(cell_support, Support %in% c("K562", "BT142")) # Selecting only cells with sufficient support for a set of variants. - -# Plotting the results on a heatmap. -HeatmapVoi(SE = genotyping, voi = voi.ch.sigurd, annotation_trait = "CellType", minimum_coverage = 3) -``` - -## Selecting variants with comparatively high VAF between groups - -We now select variants with a higher VAF in the K562 cells than in the BT142 cells. - -```{r warning = FALSE, dev = "CairoPNG"} -voi.ch.lineages <- sigurd::VariantSelection_Group(SE = genotyping, min_coverage = 100, quantiles = c(0.01, 0.99), thresholds = c(0.01, 0.02), min_quality = 30, - group_of_interest = "CellType_MT", group1 = "K562", group2 = "BT142", group_factor = 5, remove_nocall = FALSE, verbose = FALSE) -voi.ch.lineages <- voi.ch.lineages[!voi.ch.lineages %in% c("chrM_8251_G_A", "chrM_7693_C_T")] # These variants are not detected in the bulk data and were removed by hand. -voi.ch.lineages <- c(voi.ch.lineages, "chrM_2141_T_C") # Adding a homoplasmic variant as positive control. -print(voi.ch.lineages) - -# Plotting the results on a heatmap. -cell_support <- subset(cell_support, Support == "K562") # Only plotting K562 cells. -genotyping <- Filtering(genotyping, cells_include = cell_support$Cell, verbose = FALSE) -HeatmapVoi(SE = genotyping, voi = voi.ch.lineages, minimum_coverage = 3, sort_cells = TRUE, cluster_variants = TRUE) -``` diff --git a/inst/doc/TenX_CellMixture_SIGURD.html b/inst/doc/TenX_CellMixture_SIGURD.html deleted file mode 100755 index 1d0082c..0000000 --- a/inst/doc/TenX_CellMixture_SIGURD.html +++ /dev/null @@ -1,191 +0,0 @@ - - - - - - - - -Cell Mixture Experiment using 10X Genomics Sequencing • sigurd - - - - - - - - - - Skip to contents - - -
- -
-
- - - -
-

Cell Mixture Experiment using 10X Genomics Sequencing -

-

Here, we reproduce the analysis of a cell mixture experiment.

-
-
-

Loading necessary packages. -

-

We load the data from MAESTER.

-
-print("Libraries for SIGURD.")
-
## [1] "Libraries for SIGURD."
- -
-
-

We load the Seurat object for the scRNA-seq data. -

-
-seu <- readRDS("/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/TenX_CellLineMix_Seurat_Keep_Renamed.rds")
-
-
-

Loading the data using SIGURD. -

-
-# The design matrix contains information for the genotyping data.
-genotyping <- LoadingMAEGATK_typewise(patient = "TenX", samples_file = "/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/MAESTER_Reproduction.csv", min_cells = 0, type_use = "Amplicon_MT", verbose = FALSE)
-
-# Removing cells that are not in the Seurat object.
-genotyping <- Filtering(genotyping, cells_include = colnames(seu))
-
## [1] "We remove all cells not in the allow list."
-## [1] "We remove all the variants that are always NoCall."
-
-# Adding meta data do the SIGURD object.
-colData(genotyping)$CellType <- seu$CellType
-
-
-

Selecting the Variants of Interest -

-
-# Selecting informative variants. 
-voi.ch.sigurd <- VariantSelection_Quantile(genotyping, min_coverage = 20, min_quality = 30, quantiles = c(0.1, 0.9), thresholds = c(0.1, 0.9), verbose = FALSE)
-print(voi.ch.sigurd)
-
##  [1] "chrM_709_G_A"   "chrM_1888_G_A"  "chrM_6249_G_A"  "chrM_8697_G_A" 
-##  [5] "chrM_11719_G_A" "chrM_15452_C_A" "chrM_1420_T_C"  "chrM_2141_T_C" 
-##  [9] "chrM_4216_T_C"  "chrM_6524_T_C"  "chrM_9117_T_C"  "chrM_11251_A_G"
-## [13] "chrM_11764_A_G" "chrM_11812_A_G" "chrM_15607_A_G" "chrM_7990_C_T" 
-## [17] "chrM_12741_C_T"
-
-
-

Visualisation using SIGURD -

-
-# Determining if a cell is supporting one of the two cell types.
-cell_support <- CallSupport(SE = genotyping, VOI_group1 = voi.ch.sigurd[voi.ch.sigurd != "chrM_7990_C_T"], VOI_group2 = "chrM_7990_C_T", group1_name = "K562", group2_name = "BT142", min_mutated_reads = 3, min_reads = 30, group_factor = c(10,2), verbose = FALSE)
-genotyping <- Filtering(genotyping, cells_include = cell_support$Cell)
-
## [1] "We remove all cells not in the allow list."
-## [1] "We remove all the variants that are always NoCall."
-
-colData(genotyping)$CellType_MT <- cell_support$Support
-cell_support <- subset(cell_support, Support %in% c("K562", "BT142")) # Selecting only cells with sufficient support for a set of variants.
-
-# Plotting the results on a heatmap.
-HeatmapVoi(SE = genotyping, voi = voi.ch.sigurd, annotation_trait = "CellType", minimum_coverage = 3)
-

-
-
-

Selecting variants with comparatively high VAF between groups -

-

We now select variants with a higher VAF in the K562 cells than in the BT142 cells.

-
-voi.ch.lineages <- sigurd::VariantSelection_Group(SE = genotyping, min_coverage = 100, quantiles = c(0.01, 0.99), thresholds = c(0.01, 0.02), min_quality = 30,
-                                                  group_of_interest = "CellType_MT", group1 = "K562", group2 = "BT142", group_factor = 5, remove_nocall = FALSE, verbose = FALSE)
-voi.ch.lineages <- voi.ch.lineages[!voi.ch.lineages %in% c("chrM_8251_G_A", "chrM_7693_C_T")] # These variants are not detected in the bulk data and were removed by hand.
-voi.ch.lineages <- c(voi.ch.lineages, "chrM_2141_T_C") # Adding a homoplasmic variant as positive control.
-print(voi.ch.lineages)
-
##  [1] "chrM_1916_G_A"  "chrM_2008_G_A"  "chrM_2147_G_A"  "chrM_2551_G_A" 
-##  [5] "chrM_2571_G_A"  "chrM_2573_G_A"  "chrM_7269_G_A"  "chrM_7337_G_A" 
-##  [9] "chrM_7362_G_A"  "chrM_7706_G_A"  "chrM_8155_G_A"  "chrM_9247_G_A" 
-## [13] "chrM_9778_G_A"  "chrM_10223_C_A" "chrM_11531_G_A" "chrM_11914_G_A"
-## [17] "chrM_15374_G_A" "chrM_8211_T_C"  "chrM_9613_T_C"  "chrM_1733_C_T" 
-## [21] "chrM_2357_C_T"  "chrM_2574_G_T"  "chrM_7825_C_T"  "chrM_9757_C_T" 
-## [25] "chrM_9822_C_T"  "chrM_11965_C_T" "chrM_12102_C_T" "chrM_2141_T_C"
-
-# Plotting the results on a heatmap.
-cell_support <- subset(cell_support, Support == "K562") # Only plotting K562 cells.
-genotyping <- Filtering(genotyping, cells_include = cell_support$Cell, verbose = FALSE)
-HeatmapVoi(SE = genotyping, voi = voi.ch.lineages, minimum_coverage = 3, sort_cells = TRUE, cluster_variants = TRUE)
-

-
-
-
- - - - -
- - - - - - - diff --git a/inst/doc/TenX_CellMixture_SIGURD_files/figure-html/unnamed-chunk-4-1.png b/inst/doc/TenX_CellMixture_SIGURD_files/figure-html/unnamed-chunk-4-1.png deleted file mode 100755 index da0dffc..0000000 Binary files a/inst/doc/TenX_CellMixture_SIGURD_files/figure-html/unnamed-chunk-4-1.png and /dev/null differ diff --git a/inst/doc/TenX_CellMixture_SIGURD_files/figure-html/unnamed-chunk-5-1.png b/inst/doc/TenX_CellMixture_SIGURD_files/figure-html/unnamed-chunk-5-1.png deleted file mode 100755 index fa86bf5..0000000 Binary files a/inst/doc/TenX_CellMixture_SIGURD_files/figure-html/unnamed-chunk-5-1.png and /dev/null differ diff --git a/inst/extdata/CosmicSubset_filtered.vcf b/inst/extdata/CosmicSubset_filtered.vcf old mode 100755 new mode 100644 diff --git a/inst/extdata/Input_Example_local.csv b/inst/extdata/Input_Example_local.csv old mode 100755 new mode 100644 diff --git a/inst/extdata/JAK2V617F.vcf b/inst/extdata/JAK2V617F.vcf old mode 100755 new mode 100644 diff --git a/inst/extdata/MT_Input_VCF_NoMAF_Filtering.vcf b/inst/extdata/MT_Input_VCF_NoMAF_Filtering.vcf old mode 100755 new mode 100644 diff --git a/inst/extdata/chrM_Input_VCF_NoMAF_Filtering.vcf b/inst/extdata/chrM_Input_VCF_NoMAF_Filtering.vcf old mode 100755 new mode 100644 diff --git a/man/AllelFrequencyFoldChange.Rd b/man/AllelFrequencyFoldChange.Rd old mode 100755 new mode 100644 diff --git a/man/AmpliconSupplementing.Rd b/man/AmpliconSupplementing.Rd old mode 100755 new mode 100644 diff --git a/man/CalculateAlleleFrequency.Rd b/man/CalculateAlleleFrequency.Rd old mode 100755 new mode 100644 diff --git a/man/CalculateAltReads.Rd b/man/CalculateAltReads.Rd old mode 100755 new mode 100644 diff --git a/man/CalculateConsensus.Rd b/man/CalculateConsensus.Rd old mode 100755 new mode 100644 diff --git a/man/CalculateCorrelationPValue.Rd b/man/CalculateCorrelationPValue.Rd old mode 100755 new mode 100644 index 700f107..b32223c --- a/man/CalculateCorrelationPValue.Rd +++ b/man/CalculateCorrelationPValue.Rd @@ -8,16 +8,19 @@ CalculateCorrelationPValue( variant_values, other_mutation, all_variants_list, + value_type = "consensus", min_intersecting_cells = 5 ) } \arguments{ -\item{variant_values}{The fraction values you are analysing. A vector.} +\item{variant_values}{The fraction values you are analyzing. A vector.} \item{other_mutation}{All other variants you have. A vector of variant names.} \item{all_variants_list}{List of fraction values for all the variants you want to compare your variant with.} +\item{value_type}{Are we using consensus or other information?} + \item{min_intersecting_cells}{Minimum number of intersecting cells. Correlations with less than this will not be performed.} } \description{ diff --git a/man/CalculateCoverage.Rd b/man/CalculateCoverage.Rd old mode 100755 new mode 100644 diff --git a/man/CalculateFisherTestPValue.Rd b/man/CalculateFisherTestPValue.Rd old mode 100755 new mode 100644 diff --git a/man/CalculateQuality.Rd b/man/CalculateQuality.Rd old mode 100755 new mode 100644 diff --git a/man/CalculateRefReads.Rd b/man/CalculateRefReads.Rd old mode 100755 new mode 100644 diff --git a/man/CalculateStrandCorrelation.Rd b/man/CalculateStrandCorrelation.Rd old mode 100755 new mode 100644 diff --git a/man/CallSupport.Rd b/man/CallSupport.Rd old mode 100755 new mode 100644 diff --git a/man/ClonalDefinition.Rd b/man/ClonalDefinition.Rd old mode 100755 new mode 100644 diff --git a/man/ClonalDiversity.Rd b/man/ClonalDiversity.Rd old mode 100755 new mode 100644 diff --git a/man/CombineSEobjects.Rd b/man/CombineSEobjects.Rd old mode 100755 new mode 100644 diff --git a/man/Enrichment_FisherTest.Rd b/man/Enrichment_FisherTest.Rd old mode 100755 new mode 100644 diff --git a/man/Filtering.Rd b/man/Filtering.Rd old mode 100755 new mode 100644 diff --git a/man/GetCellInfoPerVariant.Rd b/man/GetCellInfoPerVariant.Rd old mode 100755 new mode 100644 diff --git a/man/GetVariantInfo.Rd b/man/GetVariantInfo.Rd old mode 100755 new mode 100644 diff --git a/man/HeatmapVoi.Rd b/man/HeatmapVoi.Rd old mode 100755 new mode 100644 diff --git a/man/LoadingMAEGATK_typewise.Rd b/man/LoadingMAEGATK_typewise.Rd old mode 100755 new mode 100644 diff --git a/man/LoadingRawMatrix_typewise.Rd b/man/LoadingRawMatrix_typewise.Rd old mode 100755 new mode 100644 diff --git a/man/LoadingVCF_typewise.Rd b/man/LoadingVCF_typewise.Rd old mode 100755 new mode 100644 diff --git a/man/LoadingVarTrix_typewise.Rd b/man/LoadingVarTrix_typewise.Rd old mode 100755 new mode 100644 diff --git a/man/Merging_SE_list.Rd b/man/Merging_SE_list.Rd old mode 100755 new mode 100644 diff --git a/man/RowWiseSplit.Rd b/man/RowWiseSplit.Rd old mode 100755 new mode 100644 diff --git a/man/SeparatingMatrixToList.Rd b/man/SeparatingMatrixToList.Rd old mode 100755 new mode 100644 diff --git a/man/SetVariantInfo.Rd b/man/SetVariantInfo.Rd old mode 100755 new mode 100644 diff --git a/man/UMIQC.Rd b/man/UMIQC.Rd old mode 100755 new mode 100644 diff --git a/man/UMI_seq.Rd b/man/UMI_seq.Rd old mode 100755 new mode 100644 diff --git a/man/VariantBurden.Rd b/man/VariantBurden.Rd old mode 100755 new mode 100644 diff --git a/man/VariantCloneSizeThresholding.Rd b/man/VariantCloneSizeThresholding.Rd old mode 100755 new mode 100644 diff --git a/man/VariantCorrelationHeatmap.Rd b/man/VariantCorrelationHeatmap.Rd old mode 100755 new mode 100644 diff --git a/man/VariantFisherTestHeatmap.Rd b/man/VariantFisherTestHeatmap.Rd old mode 100755 new mode 100644 diff --git a/man/VariantQuantileThresholding_Combined.Rd b/man/VariantQuantileThresholding_Combined.Rd old mode 100755 new mode 100644 diff --git a/man/VariantSelection_Group.Rd b/man/VariantSelection_Group.Rd old mode 100755 new mode 100644 diff --git a/man/VariantSelection_Quantile.Rd b/man/VariantSelection_Quantile.Rd old mode 100755 new mode 100644 diff --git a/man/VariantSelection_TopCells.Rd b/man/VariantSelection_TopCells.Rd old mode 100755 new mode 100644 diff --git a/man/VariantWiseCorrelation.Rd b/man/VariantWiseCorrelation.Rd old mode 100755 new mode 100644 index 410335b..1aa9e18 --- a/man/VariantWiseCorrelation.Rd +++ b/man/VariantWiseCorrelation.Rd @@ -8,6 +8,7 @@ VariantWiseCorrelation( variants_list, n_cores = 1, p_value_adjustment = "fdr", + value_type = "consensus", verbose = TRUE ) } @@ -18,6 +19,8 @@ VariantWiseCorrelation( \item{p_value_adjustment}{Method for P value adjustment. See p.adjust for details.} +\item{value_type}{Are we using consensus or other information?} + \item{verbose}{Should the function be verbose? Default = TRUE} } \description{ diff --git a/man/VariantWiseFisherTest.Rd b/man/VariantWiseFisherTest.Rd old mode 100755 new mode 100644 diff --git a/man/char_to_numeric.Rd b/man/char_to_numeric.Rd old mode 100755 new mode 100644 diff --git a/man/combine_NAMES.Rd b/man/combine_NAMES.Rd old mode 100755 new mode 100644 diff --git a/man/combine_SparseMatrix.Rd b/man/combine_SparseMatrix.Rd old mode 100755 new mode 100644 diff --git a/man/computeAFMutMatrix.Rd b/man/computeAFMutMatrix.Rd old mode 100755 new mode 100644 diff --git a/man/getAltMatrix.Rd b/man/getAltMatrix.Rd old mode 100755 new mode 100644 diff --git a/man/getMutMatrix.Rd b/man/getMutMatrix.Rd old mode 100755 new mode 100644 diff --git a/man/getReadMatrix.Rd b/man/getReadMatrix.Rd old mode 100755 new mode 100644 diff --git a/man/getRefMatrix.Rd b/man/getRefMatrix.Rd old mode 100755 new mode 100644 diff --git a/man/get_consensus.Rd b/man/get_consensus.Rd old mode 100755 new mode 100644 diff --git a/man/load_object.Rd b/man/load_object.Rd old mode 100755 new mode 100644 diff --git a/man/save_object.Rd b/man/save_object.Rd old mode 100755 new mode 100644 diff --git a/man/sdiv.Rd b/man/sdiv.Rd old mode 100755 new mode 100644