-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_slurm_jobs.sh
executable file
·134 lines (107 loc) · 4.02 KB
/
run_slurm_jobs.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/bin/bash
# All the slurm arguments can be found here https://slurm.schedmd.com/sbatch.html
# https://snakemake.readthedocs.io/en/stable/executing/cli.html
# Maintainers:
# Vangelis Theodorakis [email protected]
# Florian R. Hoelzlwimmer [email protected]
#
# Fail the script if one command fails
set -e
# # enable debug mode
# set -x
# export SNAKEMAKE_SLURM_DEBUG=1
# limit core dumps to 50MB
ulimit -c 102400
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
# ============================================================================
#
# 1) Make sure that you have snakemake installed in your $PATH
#
# 2) Specify the following arguments according to your own taste
#
# 3) Run ./run_slurm_jobs.sh
#
# Use the default snakemake command determined by your $PATH
# otherwise specify absolute path to snakemake binary
snakemake="snakemake"
# Change kinit path to use system kerberos instead of potential conda
# installed versions
kinit="/usr/bin/kinit"
# The name of the snakefile
snakefile="${SNAKEFILE:-Snakefile}"
# The number of snakemake cores
number_of_snakemake_cores="${N_CORES:-256}"
number_of_snakemake_jobs="${N_JOBS:-128}"
amount_of_snakemake_memory="${MEM_MB:-1000000}"
number_of_snakemake_gpus="${N_GPUS:-8}"
#### IMPORTANT!!!
# Make a environment variable for the project folder
# e.g. project_folder="/path/to/project/folder/"
project_folder="$(dirname "$snakefile")"
project_folder="$(realpath "$project_folder")"
project_name="$(basename "$project_folder")"
# Set the log folder path
logs="$project_folder/logs"
# Set the job name for the job that will be spawned
job_names="${project_name}-$(date +"%Y-%m-%d_%T")"
echo "Starting $job_names with up to:"
echo "- $number_of_snakemake_cores cores"
echo "- $number_of_snakemake_jobs jobs"
echo "- ${amount_of_snakemake_memory}MB of RAM"
echo "- $number_of_snakemake_gpus gpus"
snakemake_version="$($snakemake --version | sed -e 's/\([0-9]\+\).*/\1/')"
if [ "$snakemake_version" -gt "6" ]; then
# snakemake supports the cluster-sidecar script
cluster_status_script="--cluster-status=${SCRIPT_DIR}/slurm-status.sh"
cluster_sidecar_script="--cluster-sidecar=${SCRIPT_DIR}/slurm-sidecar.py"
else
# snakemake does not support cluster-sidecar script; use legacy polling instead
cluster_status_script="--cluster-status=${SCRIPT_DIR}/slurm-status.py"
cluster_sidecar_script=""
fi
# ============================================================================
# By default errors and outputs are printed in the same file
# so here we store the successfull outputs as .out files
# and the errors as .error files
output_files="$logs/%A-$job_names.out"
# Create the log folder if it does not exist
if [[ ! -e "$logs" ]]; then
mkdir "$logs"
echo "New logs folder created under $logs"
fi
# register cleanup function to stop still running snakemake jobs
function cleanup {
echo "cancel still running jobs..."
squeue -u $USER -o "%j,%i,%T,%B,%A,%N" | grep "^$job_names" | cut -f2 -d',' | xargs -r scancel
}
trap cleanup EXIT
## Run the snakemake file on the cluster
# requeue jobs in case of preemption
SBATCH_ARGS="${SBATCH_ARGS} --requeue"
if [ "${AUKS_ENABLED:-false}" = true ]; then
# Fetch kerberos ticket that lasts for 7 days
$kinit -r 7d
# Auks argument caches the kerberos ticket for runs that last more than
# one day (otherwise the jobs lose access to the filesystem)
auks -a
SBATCH_ARGS="${SBATCH_ARGS} --auks=done"
fi
$snakemake --keep-going \
--default-resources ntasks=1 mem_mb=1000 gpu=0 \
--cluster "sbatch $SBATCH_ARGS \
--ntasks {resources.ntasks} \
--cpus-per-task {threads} \
--parsable \
--mem {resources.mem_mb}M \
--output $output_files \
--job-name=$job_names-{rule} \
--gres=gpu:{resources.gpu} \
" \
${cluster_status_script} \
${cluster_sidecar_script} \
--cores $number_of_snakemake_cores \
-j $number_of_snakemake_jobs \
--resources mem_mb=$amount_of_snakemake_memory gpu=$number_of_snakemake_gpus \
--snakefile $snakefile "$@"
# --verbose
# --rerun-incomplete