Skip to content

Commit

Permalink
Update demultplex to take one samplesheet and a csv of multiple fastq
Browse files Browse the repository at this point in the history
  • Loading branch information
chris-cheshire committed Oct 20, 2023
1 parent 78bd0b8 commit 6715a22
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 98 deletions.
87 changes: 57 additions & 30 deletions subworkflows/goodwright/demultiplex/main.nf
Original file line number Diff line number Diff line change
@@ -1,22 +1,24 @@
//
// Convert a goodwright samplesheet into a suitable input for ultraplex and then run the demultiplex operation
// Support for users submitting an xlsx file instead of csv
// Concats fastqs if multiples are passed
//

include { XLSX_TO_CSV } from '../../../modules/goodwright/xlsx_to_csv/main'
include { SAMPLESHEET_TO_BARCODE } from '../../../modules/goodwright/ultraplex/samplesheet_to_barcode/main'
include { CAT_FASTQ } from '../../../modules/nf-core/cat/fastq/main'
include { ULTRAPLEX } from '../../../modules/goodwright/ultraplex/ultraplex/main'

workflow DEMULTIPLEX {
take:
samplesheet // channel/file: [ [csv/xlsx] ]
fastq // channel/file: [ fastq ]
samplesheet // channel/files: [ csv/xlsx ]
fastqs // channel/files: [ fastq/fastq.gz ]

main:
// Init
ch_versions = Channel.empty()
ch_samplesheet = Channel.empty()
ch_fastq = Channel.empty()
ch_fastqs = Channel.empty()

// Resolve inputs
if(samplesheet instanceof groovyx.gpars.dataflow.DataflowVariable ||
Expand All @@ -25,16 +27,16 @@ workflow DEMULTIPLEX {
} else {
ch_samplesheet = Channel.from(samplesheet)
}
if(fastq instanceof groovyx.gpars.dataflow.DataflowVariable ||
fastq instanceof groovyx.gpars.dataflow.DataflowBroadcast) {
ch_fastq = fastq
if(fastqs instanceof groovyx.gpars.dataflow.DataflowVariable ||
fastqs instanceof groovyx.gpars.dataflow.DataflowBroadcast) {
ch_fastqs = fastqs
} else {
ch_fastq = Channel.from(fastq)
ch_fastqs = Channel.from([fastqs])
}

/*
* CHANNEL: Split out samplesheets that need to be converted from excel
*/
//
// CHANNEL: Split out samplesheets that need to be converted from excel
//
ch_samplesheet_branch = ch_samplesheet
.branch {
row ->
Expand All @@ -44,52 +46,77 @@ workflow DEMULTIPLEX {
//ch_samplesheet_branch.csv | view
//ch_samplesheet_branch.xlsx | view

/*
* MODULE: Convert xlsx to csv
*/
//
// MODULE: Convert xlsx to csv
//
XLSX_TO_CSV (
ch_samplesheet_branch.xlsx
)
ch_versions = ch_versions.mix(XLSX_TO_CSV.out.versions)

/*
* CHANNEL: Combine converted / non-converted channels
*/
//
// CHANNEL: Combine converted / non-converted channels
//
ch_csv_samplesheet = XLSX_TO_CSV.out.csv.mix( ch_samplesheet_branch.csv )

/*
* MODULE: Convert the samplesheet(s) into ultraplex input
*/
//
// MODULE: Convert the samplesheet(s) into ultraplex input
//
SAMPLESHEET_TO_BARCODE (
ch_csv_samplesheet.collect()
)
ch_versions = ch_versions.mix(SAMPLESHEET_TO_BARCODE.out.versions)
//SAMPLESHEET_TO_BARCODE.out.csv | view

/*
* CHANNEL: Pull out params for ultraplex
*/
//
// CHANNEL: Pull out params for ultraplex
//
ch_adapter = SAMPLESHEET_TO_BARCODE.out.samplesheet
.splitCsv(header: ['sample_name', 'barcode_seq_5', 'barcode_seq_3', 'adapter_seq_3'], skip:1, sep:"," )
.splitCsv(header: ['sample_name', 'barcode_seq_5', 'barcode_seq_3', 'adapter_seq_3'], skip:1, sep:",")
.map { row -> [row.adapter_seq_3] }
.collect()
.map { it[0] }
//ch_adapter | view

/*
* MODULE: Demultiplex the fastq file
*/
//
// CHANNEL: Split out files which need merging
//
ch_reads = ch_fastqs
.collect{ [it] }
.branch {
fastqs ->
single : fastqs.size() == 1
return [ [id:"single", single_end:true], fastqs.flatten() ]
multiple: fastqs.size() > 1
return [ [id:"merged", single_end:false], fastqs.flatten() ]
}
//ch_reads.single | view
//ch_reads.multiple | view

//
// MODULE: Concat multiple fastqs
//
CAT_FASTQ (
ch_reads.multiple
)
ch_versions = ch_versions.mix(CAT_FASTQ.out.versions)
ch_reads = CAT_FASTQ.out.reads.mix(ch_reads.single)
//ch_reads | view

//
// MODULE: Demultiplex the fastq file
//
ULTRAPLEX (
[ [ id:"fastq" ], fastq ],
ch_reads,
SAMPLESHEET_TO_BARCODE.out.barcodes,
ch_adapter
)
ch_versions = ch_versions.mix(ULTRAPLEX.out.versions)
//ULTRAPLEX.out.fastq | view

/*
* CHANNEL: Create meta data using the samplesheet and the outputs from ultraplex
*/
//
// CHANNEL: Create meta data using the samplesheet and the outputs from ultraplex
//
ch_meta_fastq = SAMPLESHEET_TO_BARCODE.out.samplesheet
.splitCsv (header:true, sep:",")
.combine (ULTRAPLEX.out.fastq)
Expand Down
46 changes: 13 additions & 33 deletions tests/subworkflows/demultiplex/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ workflow test_single_sample {
fastq = file(params.goodwright_test_data['ultraplex']['multiplexed_fastq'], checkIfExists: true)

DEMULTIPLEX ( samplesheet, fastq )

DEMULTIPLEX.out.fastq | view
}

Expand All @@ -18,7 +17,6 @@ workflow test_multi_sample {
fastq = file(params.goodwright_test_data['ultraplex']['multiplexed_fastq'], checkIfExists: true)

DEMULTIPLEX ( samplesheet, fastq )

DEMULTIPLEX.out.fastq | view
}

Expand All @@ -28,7 +26,6 @@ workflow test_with_excel {
fastq = file(params.goodwright_test_data['ultraplex']['multiplexed_fastq'], checkIfExists: true)

DEMULTIPLEX ( samplesheet, fastq )

DEMULTIPLEX.out.fastq | view
}

Expand All @@ -39,7 +36,20 @@ workflow test_multi_sample_paired_end {
fastq2 = file(params.goodwright_test_data['ultraplex']['multiplexed_fastq2'], checkIfExists: true)

DEMULTIPLEX ( samplesheet, [fastq1, fastq2] )
DEMULTIPLEX.out.fastq | view
}

workflow test_multi_sample_multi_paired_end {

samplesheet = file(params.goodwright_test_data['samplesheets']['clip_samplesheet'], checkIfExists: true)
fastq1 = file(params.goodwright_test_data['ultraplex']['multiplexed_fastq'], checkIfExists: true)
fastq2 = file(params.goodwright_test_data['ultraplex']['multiplexed_fastq2'], checkIfExists: true)
fastq3 = file(params.goodwright_test_data['ultraplex']['multiplexed_fastq3'], checkIfExists: true)
fastq4 = file(params.goodwright_test_data['ultraplex']['multiplexed_fastq4'], checkIfExists: true)

ch_fastqs = Channel.from([[fastq1, fastq2], [fastq3, fastq4]])

DEMULTIPLEX ( samplesheet, ch_fastqs )
DEMULTIPLEX.out.fastq | view
}

Expand All @@ -49,7 +59,6 @@ workflow test_adapter_mismatch {
fastq = file(params.goodwright_test_data['ultraplex']['multiplexed_fastq'], checkIfExists: true)

DEMULTIPLEX ( samplesheet, fastq )

DEMULTIPLEX.out.fastq | view
}

Expand All @@ -59,34 +68,5 @@ workflow test_channel_samplesheet {
fastq = file(params.goodwright_test_data['ultraplex']['multiplexed_fastq'], checkIfExists: true)

DEMULTIPLEX ( samplesheet, fastq )

DEMULTIPLEX.out.fastq | view
}

workflow test_channel_samplesheet_multi {

fastq = file(params.goodwright_test_data['ultraplex']['multiplexed_fastq'], checkIfExists: true)
input = file(params.goodwright_test_data['samplesheets']['clip_samplesheet'], checkIfExists: true)
input2 = file(params.goodwright_test_data['samplesheets']['clip_samplesheet_small'], checkIfExists: true)
input3 = file(params.goodwright_test_data['samplesheets']['rna_samplesheet'], checkIfExists: true)

ch_input = Channel.from([input, input2, input3])

DEMULTIPLEX ( ch_input, fastq )

DEMULTIPLEX.out.fastq | view
}

workflow test_channel_samplesheet_multi_mixed {

fastq = file(params.goodwright_test_data['ultraplex']['multiplexed_fastq'], checkIfExists: true)
input = file(params.goodwright_test_data['samplesheets']['clip_samplesheet_xlsx'], checkIfExists: true)
input2 = file(params.goodwright_test_data['samplesheets']['clip_samplesheet_small'], checkIfExists: true)
input3 = file(params.goodwright_test_data['samplesheets']['rna_samplesheet'], checkIfExists: true)

ch_input = Channel.from([input, input2, input3])

DEMULTIPLEX ( ch_input, fastq )

DEMULTIPLEX.out.fastq | view
}
20 changes: 9 additions & 11 deletions tests/subworkflows/demultiplex/test_workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,15 @@
- path: "output/ultraplex/ultraplex_demux_sample_clip_2_Fwd.fastq.gz"
- path: "output/ultraplex/ultraplex_demux_sample_clip_2_Rev.fastq.gz"

- name: "test_sw_demultiplex_multisample_multi_pairedend"
command: nextflow run ./tests/subworkflows/demultiplex -c ./tests/config/nextflow.config -entry test_multi_sample_multi_paired_end
tags:
- "subworkflows"
- "demultiplex"
files:
- path: "output/ultraplex/ultraplex_demux_sample_clip_2_Fwd.fastq.gz"
- path: "output/ultraplex/ultraplex_demux_sample_clip_2_Rev.fastq.gz"

- name: "test_sw_demultiplex_adapter_mismatch"
command: nextflow run ./tests/subworkflows/demultiplex -c ./tests/config/nextflow.config -entry test_adapter_mismatch
tags:
Expand All @@ -48,14 +57,3 @@
- "subworkflows"
- "demultiplex"

- name: "test_sw_demultiplex_channel_samplesheet_multi"
command: nextflow run ./tests/subworkflows/demultiplex -c ./tests/config/nextflow.config -entry test_channel_samplesheet_multi
tags:
- "subworkflows"
- "demultiplex"

- name: "test_sw_demultiplex_channel_samplesheet_multi_mixed"
command: nextflow run ./tests/subworkflows/demultiplex -c ./tests/config/nextflow.config -entry test_channel_samplesheet_multi_mixed
tags:
- "subworkflows"
- "demultiplex"
18 changes: 12 additions & 6 deletions tests/wrappers/subworkflows/demultiplex/test.yml
Original file line number Diff line number Diff line change
@@ -1,19 +1,25 @@
- name: "test_wrappers_demultiplex_single"
command: nextflow run ./wrappers/subworkflows/demultiplex/main.nf -c ./tests/config/nextflow.config --fastq ./tests/data/ultraplex/multiplexed.fastq.gz --samplesheet ./tests/data/samplesheets/clip-samplesheet.csv
- name: "test_wrappers_demultiplex_single_se"
command: nextflow run ./wrappers/subworkflows/demultiplex/main.nf -c ./tests/config/nextflow.config --samplesheet ./tests/data/samplesheets/clip-samplesheet.csv --fastqs ./tests/data/fastq_list/single_se_fastq.csv
tags:
- "wrappers"
- "wrappers/subworkflows"
- "wrappers/subworkflows/demultiplex"

- name: "test_wrappers_demultiplex_multi"
command: nextflow run ./wrappers/subworkflows/demultiplex/main.nf -c ./tests/config/nextflow.config --fastq ./tests/data/ultraplex/multiplexed.fastq.gz --samplesheet ./tests/data/samplesheets/clip-samplesheet-small.csv --samplesheet2 ./tests/data/samplesheets/rna-samplesheet.csv --samplesheet3 ./tests/data/samplesheets/clip-samplesheet.xlsx
- name: "test_wrappers_demultiplex_single_pe"
command: nextflow run ./wrappers/subworkflows/demultiplex/main.nf -c ./tests/config/nextflow.config --samplesheet ./tests/data/samplesheets/clip-samplesheet.csv --fastqs ./tests/data/fastq_list/single_pe_fastq.csv
tags:
- "wrappers"
- "wrappers/subworkflows"
- "wrappers/subworkflows/demultiplex"

- name: "test_wrappers_demultiplex_paired"
command: nextflow run ./wrappers/subworkflows/demultiplex/main.nf -c ./tests/config/nextflow.config --fastq ./tests/data/ultraplex/multiplexed.fastq.gz --fastq2 ./tests/data/ultraplex/multiplexed2.fastq.gz --samplesheet ./tests/data/samplesheets/clip-samplesheet.csv
- name: "test_wrappers_demultiplex_multi_se"
command: nextflow run ./wrappers/subworkflows/demultiplex/main.nf -c ./tests/config/nextflow.config --samplesheet ./tests/data/samplesheets/clip-samplesheet.csv --fastqs ./tests/data/fastq_list/multi_se_fastq.csv
- "wrappers"
- "wrappers/subworkflows"
- "wrappers/subworkflows/demultiplex"

- name: "test_wrappers_demultiplex_multi_pe"
command: nextflow run ./wrappers/subworkflows/demultiplex/main.nf -c ./tests/config/nextflow.config ---samplesheet ./tests/data/samplesheets/rna-samplesheet.csv --fastqs ./tests/data/fastq_list/multi_pe_fastq.csv
tags:
- "wrappers"
- "wrappers/subworkflows"
Expand Down
30 changes: 12 additions & 18 deletions wrappers/subworkflows/demultiplex/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,19 @@ include { DEMULTIPLEX } from '../../../subworkflows/goodwright/demultiplex/main.

workflow {

// Find all samplesheet parameters and build them into a list
def List samplesheet_files = []
Set params_key_set = params.keySet()
params_key_set.each {
if(it.contains("samplesheet")) {
samplesheet_files.add(file(params[it], checkIfExists: true))
}
}
// Create channels from input files
ch_samplesheet = file(params.samplesheet, checkIfExists: true)
fastqs = Channel.of(file(params.fastqs, checkIfExists: true))

// Create channel from list
ch_samplesheet = Channel.from(samplesheet_files)

// Build fastq file list
fastqs = file(params.fastq, checkIfExists: true)
if(params.fastq2) {
fastq2 = file(params.fastq2, checkIfExists: true)
fastqs = [fastqs, fastq2]
}
// Get list of files from fastqs
ch_fastqs = fastqs
.splitCsv(sep:",")
.map { list ->
list.collect { file(it, checkIfExists: true) }
}
//ch_fastqs | view

// Execute
DEMULTIPLEX ( ch_samplesheet, fastqs )
DEMULTIPLEX ( ch_samplesheet, ch_fastqs )
DEMULTIPLEX.out.fastq | view
}

0 comments on commit 6715a22

Please sign in to comment.