-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSnakefile_random
95 lines (77 loc) · 3.07 KB
/
Snakefile_random
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# Snakefile_random
# Perform a randomized phyloHiC analysis
# Author: Sylvain PULICANI <[email protected]>
# Created on: September 2, 2019
# Last modified on: September 2, 2019
from itertools import combinations
from os.path import dirname
from string import Template
shell.prefix('{};'.format(config['condacmd']))
THRESHOLDS = ['00', '10', '25', '50', '75', '90']
ADJACENCIES = ['all', 'none', 'and', 'xor']
bindir = config['bin']
orthologs = config['orthologs']
name = config['experiment']
method = config['method']
resolutions = config['resolutions']
n = config['nb_replicates']
rule all:
input:
expand('{rootdir}/{res}/{name}/replicate_{rep}/threshold_{th}_adj_{adj}/trees_{method}/distances.phylip',
rootdir=config['rootdir'],
res=resolutions,
name=name,
rep=n,
th=THRESHOLDS,
adj=ADJACENCIES,
method=method)
rule dist_all_pairs:
input:
[Template('{rootdir}/{res}/{name}/replicate_{rep}/threshold_{th}_adj_{adj}/${d1}_${d2}_values.tsv.gz').substitute(d1=d1, d2=d2)
for d1, d2 in combinations(config['datasets'], 2)]
output:
'{rootdir}/{res}/{name}/replicate_{rep}/threshold_{th}_adj_{adj}/trees_{method}_{rep}/distances.phylip'
shell:
'{bindir}/dist_all_pairs.py -m {method} {orthologs} {output} {input}'
rule get_stats:
input:
expand('{{rootdir}}/{{res}}/{{name}}/replicate_{{rep}}/pairs/{dataset}.tsv.gz',
dataset=config['datasets'])
output:
'{rootdir}/{res}/{name}/replicate_{rep}/pairs/stats.tsv'
shell:
"{bindir}/pairsStats {input} > {output}"
def get_threshold(threshold, stat):
with open(stat, 'r') as f:
lines = [l for l in f.read().split('\n') if l]
d = dict(zip(lines[0].replace('%', '').split('\t'),
lines[-1].split('\t')))
if threshold == '50':
return '-t ' + d['Median']
elif threshold == '00':
return '-t ' + '0.0'
else:
return '-t ' + d.get(threshold, '0.0')
rule join_pairs:
input:
s = rules.get_stats.output,
d1 = '{rootdir}/{res}/{name}/replicate_{rep}/pairs/{dataset1}.tsv.gz',
d2 = '{rootdir}/{res}/{name}/replicate_{rep}/pairs/{dataset2}.tsv.gz'
output:
'{rootdir}/{res}/{name}/replicate_{rep}/threshold_{th}_adj_{adj}/{dataset1}_{dataset2}_values.tsv.gz'
run:
s = '{}/{}/{}/replicate_{}/pairs/stats.tsv'.format(
wildcards.rootdir, wildcards.res, wildcards.name, wildcards.n)
t = get_threshold(wildcards.th, s)
x = '-x' if config.get('exclude', False) else ''
shell("{bindir}/join_pairs.py {t} {x} -a {wildcards.adj} {orthologs} {input.d1} {input.d2} {output}")
def make_pairs_input(wildcards):
return (config['datasets'][wildcards.dataset]['genes'],
config['datasets'][wildcards.dataset]['hic'][wildcards.res])
rule make_pairs:
input:
make_pairs_input
output:
'{rootdir}/{res}/{name}/replicate_{rep}/pairs/{dataset}.tsv.gz'
shell:
"{bindir}/make_pairs.py -sN {input} {output}"