Skip to content

Commit

Permalink
migrate wdl files
Browse files Browse the repository at this point in the history
  • Loading branch information
j23414 committed Feb 15, 2022
1 parent e1a36a2 commit 5c7899b
Show file tree
Hide file tree
Showing 3 changed files with 152 additions and 0 deletions.
8 changes: 8 additions & 0 deletions .dockstore.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
version: 1.2
workflows:
- subclass: WDL
primaryDescriptorPath: /workflow/wdl/workflow.wdl
testParameterFiles:
- /workflow/wdl/input_ncov.json
authors:
- name: Nextstrain
5 changes: 5 additions & 0 deletions workflow/wdl/input_ncov.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"Nextstrain_WRKFLW.build_yaml" : "data/build.yaml",
"Nextstrain_WRKFLW.giturl" : "https://github.com/nextstrain/ncov/archive/refs/heads/master.zip",
"Nextstrain_WRKFLW.docker_path": "nextstrain/base:latest"
}
139 changes: 139 additions & 0 deletions workflow/wdl/workflow.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
version 1.0

# import "tasks/nextstrain.wdl" as nextstrain # <= modular method
workflow Nextstrain_WRKFLW {
input {
# Option 1: run the ncov example workflow
File? build_yaml

# Option 2: create a build_yaml from sequence and metadata
File? sequence_fasta
File? metadata_tsv
String? build_name
# It's possible all of the above files are provided
# Option 3? GISAID augur zip?
# File? gisaid_zip # tarball
String? active_builds # "Wisconsin,Minnesota,Iowa"
# By default, run the ncov workflow (can swap it for zika or something else)
String giturl = "https://github.com/nextstrain/ncov/archive/refs/heads/master.zip"
String docker_path = "nextstrain/base:latest"
Int? cpu
Int? memory # in GiB
Int? disk_size
}
if (defined(sequence_fasta)) {
call mk_buildconfig {
input:
sequence_fasta = select_first([sequence_fasta]),
metadata_tsv = select_first([metadata_tsv]),
build = build_name,
dockerImage = docker_path
}
}
# call nextstrain.nextstrain build as build { # <= modular method
call nextstrain_build as build {
input:
build_yaml = select_first([build_yaml, mk_buildconfig.buildconfig]), # Accepts Option 1 or Option 2
cpu = cpu,
memory = memory,
disk_size = disk_size,
dockerImage = docker_path,
giturl = giturl,
active_builds = active_builds
}

output {
Array[File] json_files = build.json_files
File auspice_zip = build.auspice_zip
}
}

# === Define Tasks
task mk_buildconfig {
input {
File sequence_fasta # Could change this to Array[File] and loop the HEREDOC
File metadata_tsv
String build = "example"
String dockerImage
Int cpu = 1
Int disk_size = 5
Float memory = 3.5
}
command {
cat << EOF > build.yaml
inputs:
- name: ~{build}
metadata: ~{metadata_tsv}
sequences: ~{sequence_fasta}
- name: references
metadata: data/references_metadata.tsv
sequences: data/references_sequences.fasta
EOF
}
output {
File buildconfig = "build.yaml"
}
runtime {
docker: dockerImage
cpu : cpu
memory: memory + " GiB"
disks: "local-disk " + disk_size + " HDD"
}
}

# Public Reference Datasets in case we want to add default "context" strains for ncov
# nextstrain remote ls s3://nextstrain-data/files/ncov/open &> list.txt
# files/ncov/open/LOCATION/metadata.tsv.xz where LOCATION = one of ['africa', 'asia', 'europe', 'global', 'north-america', 'oceania', 'south-america']
# files/ncov/open/LOCATION/sequences.fasta.xz
# TODO: since these are all s3, just generate string (avoid download and localazation/delocalization) for the mk_buildconfig
# Clarification: Do these need to be sampled down?
task nextstrain_build {
input {
File? build_yaml
String? active_builds # Wisconsin,Minnesota,Washington
String dockerImage = "nextstrain/base:latest"
String nextstrain_app = "nextstrain"
String giturl = "https://github.com/nextstrain/ncov/archive/refs/heads/master.zip"
Int cpu = 8 # Honestly, I'd max this out unless budget is a consideration.
Int disk_size = 30 # In GiB. Could also check size of sequence or metadata files
Float memory = 3.5
}
command {
# Pull ncov, zika or similar repository
wget -O master.zip ~{giturl}
INDIR=`unzip -Z1 master.zip | head -n1 | sed 's:/::g'`
unzip master.zip

# Max out the number of threads
PROC=`nproc`

# Run nextstrain
"~{nextstrain_app}" build \
--cpus $PROC \
--memory ~{memory}Gib \
--native $INDIR ~{"--configfile " + build_yaml} \
~{"--config active_builds=" + active_builds}

# Prepare output
mv $INDIR/auspice .
zip -r auspice.zip auspice
}
output {
File auspice_zip = "auspice.zip"
Array[File] json_files = glob("auspice/*.json")
# Target the s3
}
runtime {
docker: dockerImage
cpu : cpu
memory: memory + " GiB"
disks: "local-disk " + disk_size + " HDD"
}
}

0 comments on commit 5c7899b

Please sign in to comment.