diff --git a/apps/aligner/aligner.cpp b/apps/aligner/aligner.cpp index 0bf8e00..2da421e 100644 --- a/apps/aligner/aligner.cpp +++ b/apps/aligner/aligner.cpp @@ -68,7 +68,7 @@ int main(int argc, char * const argv[]) { " If N = 0 then all sequences will be calculated." " Specifying small N can make total calculation much faster. [default: 0]\n"); fprintf(stderr, "\t-k K Sequences with score > K will be discarded." - " Smaller k, faster calculation.\n"); + " Smaller k, faster calculation. If -1, no sequences will be discarded. [default: -1]\n"); fprintf(stderr, "\t-p If specified, alignment path will be found and printed. " "This may significantly slow down the calculation.\n"); fprintf(stderr, "\t-l If specified, start locations will be found and printed. " diff --git a/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0.fasta b/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0.fasta index 04ccda3..bc133ca 100644 --- a/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0.fasta +++ b/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0.fasta @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4a2942f217f05214cb2f8d4c474c83ccecabebe5656529751824b346ff1a789c -size 1000051 +oid sha256:1c75ea6ff04064c522cc9ecb66b89093667316e9242457352b912f59dc03c48a +size 1000031 diff --git a/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_10per.fa b/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_10per.fa deleted file mode 100644 index 58905a1..0000000 --- a/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_10per.fa +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:89395566677642afa25a9383480054407ecab0213b425331ee62a98221daa183 -size 1000024 diff --git a/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_1per.fasta b/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_1per.fasta deleted file mode 100644 index bb2f109..0000000 --- a/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_1per.fasta +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d345398475baa77b49ca057539f165d3dc0efa5205eba6834067607659c8ee73 -size 1000024 diff --git a/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_25per.fa b/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_25per.fa deleted file mode 100644 index 34e5d57..0000000 --- a/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_25per.fa +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:de6dd12d09ade318b1804736a8684a8730a146a55ccee31447086b78f3d0de4a -size 1000024 diff --git a/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_3per.fa b/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_3per.fa deleted file mode 100644 index feb1ab1..0000000 --- a/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_3per.fa +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:abb3116ab104b9b9daf1b6a80c9f2d6445496376fd2dc3d8da263a8679983bab -size 1000024 diff --git a/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_50per.fa b/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_50per.fa deleted file mode 100644 index 3687c26..0000000 --- a/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_50per.fa +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a73a0edb231e3e987031820565ccdf51450b39821ce860bf43ddafb7b582c8e3 -size 1000024 diff --git a/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_5per.fa b/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_5per.fa deleted file mode 100644 index f5f1f5e..0000000 --- a/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_5per.fa +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6f11c777725dc4a5aed35eb084fdc27cf5d1b5c95c1444b7341c87a029546f7d -size 1000024 diff --git a/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_65per.fa b/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_65per.fa deleted file mode 100644 index ea1afc0..0000000 --- a/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_65per.fa +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b7d8803b30fe6652fd846ec87d2c42619b8f973b5cb58b24c174460b5dd21fcf -size 1000024 diff --git a/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_75per.fa b/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_75per.fa deleted file mode 100644 index 532c145..0000000 --- a/test_data/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0_mutated_75per.fa +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bff5a14137c9aef52ac952498a3864a61402a5749862b8f7897d981976410188 -size 1000024 diff --git a/test_data/Chromosome_2890043_3890042_0/mutated_60_perc.fasta b/test_data/Chromosome_2890043_3890042_0/mutated_60_perc.fasta new file mode 100644 index 0000000..1dfc894 --- /dev/null +++ b/test_data/Chromosome_2890043_3890042_0/mutated_60_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05dde1845ff2e44b6c9ceaf56941d66708084e33978ee971aa55214d8e42d88d +size 1009884 diff --git a/test_data/Chromosome_2890043_3890042_0/mutated_70_perc.fasta b/test_data/Chromosome_2890043_3890042_0/mutated_70_perc.fasta new file mode 100644 index 0000000..e8478e8 --- /dev/null +++ b/test_data/Chromosome_2890043_3890042_0/mutated_70_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73a3db149e3744ad10453ea4eec9474ac9bae470a03cd322a41d3a97d02b5734 +size 1012408 diff --git a/test_data/Chromosome_2890043_3890042_0/mutated_80_perc.fasta b/test_data/Chromosome_2890043_3890042_0/mutated_80_perc.fasta new file mode 100644 index 0000000..af95aa7 --- /dev/null +++ b/test_data/Chromosome_2890043_3890042_0/mutated_80_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f27da7d45abb858518a885a7a988bf56298e104f86da37035c2fb06560aa67c7 +size 1012287 diff --git a/test_data/Chromosome_2890043_3890042_0/mutated_90_perc.fasta b/test_data/Chromosome_2890043_3890042_0/mutated_90_perc.fasta new file mode 100644 index 0000000..5aed63b --- /dev/null +++ b/test_data/Chromosome_2890043_3890042_0/mutated_90_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bfc4f33a1753d9d1006a94bcc90a2525e2df5eb73058a13b28309bd4ee22e07 +size 1012737 diff --git a/test_data/Chromosome_2890043_3890042_0/mutated_94_perc.fasta b/test_data/Chromosome_2890043_3890042_0/mutated_94_perc.fasta new file mode 100644 index 0000000..a21a6f5 --- /dev/null +++ b/test_data/Chromosome_2890043_3890042_0/mutated_94_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11a7532e70fc4d8f1c085b02d332145cfb30ebfa1b4aa05871b9b5bd4c0d671f +size 1012374 diff --git a/test_data/Chromosome_2890043_3890042_0/mutated_97_perc.fasta b/test_data/Chromosome_2890043_3890042_0/mutated_97_perc.fasta new file mode 100644 index 0000000..69f55ab --- /dev/null +++ b/test_data/Chromosome_2890043_3890042_0/mutated_97_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc40c9dd3dac7cc99cbaee4828eb768ab6f18921c67709acfb4848bbd4a20a2 +size 1012346 diff --git a/test_data/Chromosome_2890043_3890042_0/mutated_99_perc.fasta b/test_data/Chromosome_2890043_3890042_0/mutated_99_perc.fasta new file mode 100644 index 0000000..2d802c2 --- /dev/null +++ b/test_data/Chromosome_2890043_3890042_0/mutated_99_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5723e96ed53e3361affbb1a93d135455e61af54b51afd07ed0fb9124235d8b32 +size 1012739 diff --git a/test_data/E_coli_DH1/mason_illumina_read_10kbp/e_coli_DH1_illumina_1x10000_mutated_10%.fasta b/test_data/E_coli_DH1/mason_illumina_read_10kbp/e_coli_DH1_illumina_1x10000_mutated_10%.fasta deleted file mode 100644 index 767e712..0000000 --- a/test_data/E_coli_DH1/mason_illumina_read_10kbp/e_coli_DH1_illumina_1x10000_mutated_10%.fasta +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9cd1409da4e686ce568a5807ba528e54e3967349b53a41a5314f1f30a898b085 -size 10002 diff --git a/test_data/E_coli_DH1/mason_illumina_read_10kbp/e_coli_DH1_illumina_1x10000_mutated_25%.fasta b/test_data/E_coli_DH1/mason_illumina_read_10kbp/e_coli_DH1_illumina_1x10000_mutated_25%.fasta deleted file mode 100644 index 5f50eff..0000000 --- a/test_data/E_coli_DH1/mason_illumina_read_10kbp/e_coli_DH1_illumina_1x10000_mutated_25%.fasta +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5683267db7cbcd49a4f17a097a2e0a36fc3292f2aa8493e9849582b2e0b0911c -size 10002 diff --git a/test_data/E_coli_DH1/mason_illumina_read_10kbp/e_coli_DH1_illumina_1x10000_mutated_3%.fasta b/test_data/E_coli_DH1/mason_illumina_read_10kbp/e_coli_DH1_illumina_1x10000_mutated_3%.fasta deleted file mode 100644 index 1bc2d01..0000000 --- a/test_data/E_coli_DH1/mason_illumina_read_10kbp/e_coli_DH1_illumina_1x10000_mutated_3%.fasta +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:84e65d1d6c4973d31ba818e74b2729040411d704fd4f48880eaf8091bebafeb3 -size 10002 diff --git a/test_data/E_coli_DH1/mason_illumina_read_10kbp/e_coli_DH1_illumina_1x10000_mutated_5%.fasta b/test_data/E_coli_DH1/mason_illumina_read_10kbp/e_coli_DH1_illumina_1x10000_mutated_5%.fasta deleted file mode 100644 index c2aa2d1..0000000 --- a/test_data/E_coli_DH1/mason_illumina_read_10kbp/e_coli_DH1_illumina_1x10000_mutated_5%.fasta +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9575e9652869099f720e3ac2c9a4143384b9bb73fcba10eba4da169a1c56dc47 -size 10002 diff --git a/test_data/E_coli_DH1/mason_illumina_read_10kbp/e_coli_DH1_illumina_1x10000_mutated_50%.fasta b/test_data/E_coli_DH1/mason_illumina_read_10kbp/e_coli_DH1_illumina_1x10000_mutated_50%.fasta deleted file mode 100644 index 719c658..0000000 --- a/test_data/E_coli_DH1/mason_illumina_read_10kbp/e_coli_DH1_illumina_1x10000_mutated_50%.fasta +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ee769a6c167b9f277d75502a4d04318a54b00ffc45d40d6a74f1baf92ea67584 -size 10002 diff --git a/test_data/E_coli_DH1/mason_illumina_read_10kbp/e_coli_DH1_illumina_1x10000_mutated_65%.fasta b/test_data/E_coli_DH1/mason_illumina_read_10kbp/e_coli_DH1_illumina_1x10000_mutated_65%.fasta deleted file mode 100644 index 74ccfab..0000000 --- a/test_data/E_coli_DH1/mason_illumina_read_10kbp/e_coli_DH1_illumina_1x10000_mutated_65%.fasta +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ad979f65864c7ed1c5aa89155387f92ca57eba1231b4a63cbb4d402f4574085e -size 10002 diff --git a/test_data/E_coli_DH1/mason_illumina_read_10kbp/e_coli_DH1_illumina_1x10000_mutated_75%.fasta b/test_data/E_coli_DH1/mason_illumina_read_10kbp/e_coli_DH1_illumina_1x10000_mutated_75%.fasta deleted file mode 100644 index 616f06c..0000000 --- a/test_data/E_coli_DH1/mason_illumina_read_10kbp/e_coli_DH1_illumina_1x10000_mutated_75%.fasta +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:02ba0dd9a3030a5164e372631075022b7681c710b3d30e69d1072b5141a4cd53 -size 10002 diff --git a/test_data/E_coli_DH1/mason_illumina_read_10kbp/mutated_60_perc.fasta b/test_data/E_coli_DH1/mason_illumina_read_10kbp/mutated_60_perc.fasta new file mode 100644 index 0000000..243ab68 --- /dev/null +++ b/test_data/E_coli_DH1/mason_illumina_read_10kbp/mutated_60_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7aa9e92a5bde2863e9d3c8250c33e2e8b7a976d73e2c6bad1a8c5709942beae4 +size 10131 diff --git a/test_data/E_coli_DH1/mason_illumina_read_10kbp/mutated_70_perc.fasta b/test_data/E_coli_DH1/mason_illumina_read_10kbp/mutated_70_perc.fasta new file mode 100644 index 0000000..d82124c --- /dev/null +++ b/test_data/E_coli_DH1/mason_illumina_read_10kbp/mutated_70_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0afe45520fceedcd9869314208e054c3ce8cff743bb7bb14bf63d4534b45d64 +size 10156 diff --git a/test_data/E_coli_DH1/mason_illumina_read_10kbp/mutated_80_perc.fasta b/test_data/E_coli_DH1/mason_illumina_read_10kbp/mutated_80_perc.fasta new file mode 100644 index 0000000..a4666d9 --- /dev/null +++ b/test_data/E_coli_DH1/mason_illumina_read_10kbp/mutated_80_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6307ab2ffeeb57f08badc1bb98ba71f2b10dafed36d3a554243cf5b84dd0e9c5 +size 10116 diff --git a/test_data/E_coli_DH1/mason_illumina_read_10kbp/mutated_90_perc.fasta b/test_data/E_coli_DH1/mason_illumina_read_10kbp/mutated_90_perc.fasta new file mode 100644 index 0000000..a8d610a --- /dev/null +++ b/test_data/E_coli_DH1/mason_illumina_read_10kbp/mutated_90_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:814564da5ddb2dad2c5d0bde71ac3b18510982ca88e87c4e801da57b6926b633 +size 10180 diff --git a/test_data/E_coli_DH1/mason_illumina_read_10kbp/mutated_94_perc.fasta b/test_data/E_coli_DH1/mason_illumina_read_10kbp/mutated_94_perc.fasta new file mode 100644 index 0000000..332dab2 --- /dev/null +++ b/test_data/E_coli_DH1/mason_illumina_read_10kbp/mutated_94_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:274c12e202cb6e0e686a4ce49a38a15f1f72a878e30e48df56f77bc18371e15f +size 10203 diff --git a/test_data/E_coli_DH1/mason_illumina_read_10kbp/mutated_97_perc.fasta b/test_data/E_coli_DH1/mason_illumina_read_10kbp/mutated_97_perc.fasta new file mode 100644 index 0000000..82c6ce4 --- /dev/null +++ b/test_data/E_coli_DH1/mason_illumina_read_10kbp/mutated_97_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acebbff80c1e2b5ba805ce15a40c3b6f7c75825bee4f5b3927e80a53271a497a +size 10188 diff --git a/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline.fasta b/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline.fasta deleted file mode 100644 index 5eadabe..0000000 --- a/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline.fasta +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fc7caad9766e9fd63d4d826ecb0be14f85a08c8940ac086add63e65dec791d3b -size 9861 diff --git a/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_1%.fasta b/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_1%.fasta deleted file mode 100644 index 9e308c8..0000000 --- a/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_1%.fasta +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:249aa40a3a015c9a9cb6e8425d39923b80ca1e15f9a65b04343457ec2c07b239 -size 9862 diff --git a/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_10%.fasta b/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_10%.fasta deleted file mode 100644 index de24f1f..0000000 --- a/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_10%.fasta +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9395acae20035a7ccdecbbda09ae007cf908cf74f18596de1b7f033751bad89e -size 9862 diff --git a/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_25%.fasta b/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_25%.fasta deleted file mode 100644 index d1ee3de..0000000 --- a/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_25%.fasta +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:72437374f0ea91232dad58e8a64efada51f2b8f9d470ea2075d8c8aa9b0a378d -size 9862 diff --git a/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_3%.fasta b/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_3%.fasta deleted file mode 100644 index 4a71f38..0000000 --- a/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_3%.fasta +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1beb9faaaeab0298c6e1db1e5c32f18c515ea55da9c10a5b9cf37c08f50d34d0 -size 9862 diff --git a/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_5%.fasta b/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_5%.fasta deleted file mode 100644 index 3ef83ee..0000000 --- a/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_5%.fasta +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3e097a699c1380d377b0222fcd7f071c0622d8fcbc008c7785f82783570db488 -size 9862 diff --git a/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_50%.fasta b/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_50%.fasta deleted file mode 100644 index 8b59d4b..0000000 --- a/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_50%.fasta +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:48aaec5b3d376b99352fa1a77f5707d5862c31d4c6bb235d4c4a8c81181f7e3e -size 9862 diff --git a/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_65%.fasta b/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_65%.fasta deleted file mode 100644 index b8c358f..0000000 --- a/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_65%.fasta +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:788ac8eda65c65201f566c14091cd03b0fa191ffa0e121af722bd5e36e01c4d7 -size 9862 diff --git a/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_75%.fasta b/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_75%.fasta deleted file mode 100644 index 19533ef..0000000 --- a/test_data/E_coli_DH1/prefix_10kbp/e_coli_DH1_prefix10000_oneline_mutated_75%.fasta +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fdfd7a5958047db43c06020308b6e25d3d27f51b6ce32762f09434dda9a86332 -size 9862 diff --git a/test_data/E_coli_DH1/prefix_10kbp/mutated_60_perc.fasta b/test_data/E_coli_DH1/prefix_10kbp/mutated_60_perc.fasta new file mode 100644 index 0000000..550f6af --- /dev/null +++ b/test_data/E_coli_DH1/prefix_10kbp/mutated_60_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0b92e5e9b2734974d33ee03624ec50269bb35944b67fd9e13e0c2314d924416 +size 10076 diff --git a/test_data/E_coli_DH1/prefix_10kbp/mutated_70_perc.fasta b/test_data/E_coli_DH1/prefix_10kbp/mutated_70_perc.fasta new file mode 100644 index 0000000..86a5dd1 --- /dev/null +++ b/test_data/E_coli_DH1/prefix_10kbp/mutated_70_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ede27d8013c126515b0d95f549e85be6ae97e8012178022cf85149770c815f01 +size 10144 diff --git a/test_data/E_coli_DH1/prefix_10kbp/mutated_80_perc.fasta b/test_data/E_coli_DH1/prefix_10kbp/mutated_80_perc.fasta new file mode 100644 index 0000000..4cc8bd2 --- /dev/null +++ b/test_data/E_coli_DH1/prefix_10kbp/mutated_80_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:075af290e3d678bf28eaebba333fa097f60800dd9e17e32bf943c463c6facabc +size 9890 diff --git a/test_data/E_coli_DH1/prefix_10kbp/mutated_90_perc.fasta b/test_data/E_coli_DH1/prefix_10kbp/mutated_90_perc.fasta new file mode 100644 index 0000000..1912d92 --- /dev/null +++ b/test_data/E_coli_DH1/prefix_10kbp/mutated_90_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be8d576b6359ce9984811adcbb4ec81e2b4170681234a9ba8bad9cfe3af68aec +size 9994 diff --git a/test_data/E_coli_DH1/prefix_10kbp/mutated_94_perc.fasta b/test_data/E_coli_DH1/prefix_10kbp/mutated_94_perc.fasta new file mode 100644 index 0000000..556d8c4 --- /dev/null +++ b/test_data/E_coli_DH1/prefix_10kbp/mutated_94_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcfae21ae6e04d8530a1f02cf7ff13b91926e3f4535b61d80d2f7d69772116bb +size 9981 diff --git a/test_data/E_coli_DH1/prefix_10kbp/mutated_97_perc.fasta b/test_data/E_coli_DH1/prefix_10kbp/mutated_97_perc.fasta new file mode 100644 index 0000000..30f3a69 --- /dev/null +++ b/test_data/E_coli_DH1/prefix_10kbp/mutated_97_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3592dd8104a20c4216bcb587932efc15c84f72f4c0c3f3eb4c9592f7e35072c +size 10004 diff --git a/test_data/E_coli_DH1/prefix_10kbp/mutated_99_perc.fasta b/test_data/E_coli_DH1/prefix_10kbp/mutated_99_perc.fasta new file mode 100644 index 0000000..39e36eb --- /dev/null +++ b/test_data/E_coli_DH1/prefix_10kbp/mutated_99_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b27bb6b75a662dcd0e9942f3f72c3636d9c2cb2d57aa34900e5a0ea832cf2ad +size 9974 diff --git a/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1.fasta b/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1.fasta index 7fba565..ea549a0 100644 --- a/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1.fasta +++ b/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1.fasta @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cf97d5d4b30fd649bb35116dbd9c63e6b4a51a0adba485472748e2610f8e9a5a -size 95942 +oid sha256:fad2f519a6970499e77d8187741ca7eb0eb8f941255e949a37171dcb13a44c17 +size 95856 diff --git a/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_1%.fasta b/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_1%.fasta deleted file mode 100644 index a88ec91..0000000 --- a/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_1%.fasta +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:12dec188f9a56d08ec3a7e885db02c3b989895f323dbb3a43dc17bbf92a50610 -size 94505 diff --git a/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_10%.fa b/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_10%.fa deleted file mode 100644 index 77121b6..0000000 --- a/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_10%.fa +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f02cc7a0399af1b4affdea741e6e7192d7d5fcb43b0f34c71e98a600218984bf -size 94505 diff --git a/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_25%.fa b/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_25%.fa deleted file mode 100644 index 7fed2cf..0000000 --- a/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_25%.fa +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2aad20ab0dd03d9e80d760c35ec1e255ffd714583c8e1e3b5c492ad42a6cc138 -size 94505 diff --git a/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_3%.fa b/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_3%.fa deleted file mode 100644 index 8edcebe..0000000 --- a/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_3%.fa +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:72e77c886e863cce13a52b6e9919d74a84621484cd78b90e486c95c2b10e1a29 -size 94505 diff --git a/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_5%.fa b/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_5%.fa deleted file mode 100644 index bce44c7..0000000 --- a/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_5%.fa +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e861c0da0eb22899243c9b29f87bd53cefa7717467bcad8d79b4c5fb67731265 -size 94505 diff --git a/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_50%.fa b/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_50%.fa deleted file mode 100644 index 827e3cd..0000000 --- a/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_50%.fa +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d5bdb9939367ab9cc7c017fbe6ac281b47aed22540b0745eeb757cb21e7e7b79 -size 94505 diff --git a/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_65%.fa b/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_65%.fa deleted file mode 100644 index 3ae49d4..0000000 --- a/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_65%.fa +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2f13ab1b705ae6d22adb42f5bb1a8a4d2cb6e39797d06e90ae8648d03ffd6964 -size 94505 diff --git a/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_75%.fa b/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_75%.fa deleted file mode 100644 index ba07fcd..0000000 --- a/test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline_mutated_75%.fa +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1437d0583587c1d8e5253386045dc75625c28b62306c45544b35cb66b107b1c3 -size 94505 diff --git a/test_data/Enterobacteria_Phage_1/mutated_60_perc.fasta b/test_data/Enterobacteria_Phage_1/mutated_60_perc.fasta new file mode 100644 index 0000000..3b84ce3 --- /dev/null +++ b/test_data/Enterobacteria_Phage_1/mutated_60_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47894fe745cb1c8dbc18bf6444ca0c2f2850605614d0786c0f92f9556e2f41bc +size 95766 diff --git a/test_data/Enterobacteria_Phage_1/mutated_70_perc.fasta b/test_data/Enterobacteria_Phage_1/mutated_70_perc.fasta new file mode 100644 index 0000000..e5df75f --- /dev/null +++ b/test_data/Enterobacteria_Phage_1/mutated_70_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9061bdfabbe447375b57d39ade29ec91b8abc651b361057993a5c07ec73e81ff +size 95659 diff --git a/test_data/Enterobacteria_Phage_1/mutated_80_perc.fasta b/test_data/Enterobacteria_Phage_1/mutated_80_perc.fasta new file mode 100644 index 0000000..cb1eda7 --- /dev/null +++ b/test_data/Enterobacteria_Phage_1/mutated_80_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ed16319b47ac2e7c8d1716ed470cbb55110f31727cff18385badbd6b84635a5 +size 95909 diff --git a/test_data/Enterobacteria_Phage_1/mutated_90_perc.fasta b/test_data/Enterobacteria_Phage_1/mutated_90_perc.fasta new file mode 100644 index 0000000..2ac6fda --- /dev/null +++ b/test_data/Enterobacteria_Phage_1/mutated_90_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a06abb798de398c8845e87e37dca2ad1847225868a57d5658162868c82a8f337 +size 95871 diff --git a/test_data/Enterobacteria_Phage_1/mutated_94_perc.fasta b/test_data/Enterobacteria_Phage_1/mutated_94_perc.fasta new file mode 100644 index 0000000..3254fdc --- /dev/null +++ b/test_data/Enterobacteria_Phage_1/mutated_94_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3986a39928532908f0e7e6d6a6300d1b3c84725fe6ba514402071888666b6573 +size 95943 diff --git a/test_data/Enterobacteria_Phage_1/mutated_97_perc.fasta b/test_data/Enterobacteria_Phage_1/mutated_97_perc.fasta new file mode 100644 index 0000000..6add60a --- /dev/null +++ b/test_data/Enterobacteria_Phage_1/mutated_97_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29851b360b359fa3589128167a6f4e4d49e3729d39e2dedad41262c6af5e7c0a +size 95657 diff --git a/test_data/Enterobacteria_Phage_1/mutated_99_perc.fasta b/test_data/Enterobacteria_Phage_1/mutated_99_perc.fasta new file mode 100644 index 0000000..ccc97c6 --- /dev/null +++ b/test_data/Enterobacteria_Phage_1/mutated_99_perc.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83a83c1adba9ab9912fb50530a61dc94e5b7c0fc4936df1e65da1f85de69ba9b +size 95630 diff --git a/test_data/mutator.py b/test_data/mutator.py deleted file mode 100755 index 46da195..0000000 --- a/test_data/mutator.py +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python - -import random -import sys -import fileinput - -if (len(sys.argv) < 2): - print("Missing first argument: chance of mutation.") - sys.exit() - -chance = float(sys.argv[1]) - -seq = sys.stdin.readline() - -#alphabet = ['A','R','N','D','C','Q','E','G','H','I','L','K','M','F','P','S','T','W','Y','V','B','J','Z','X'] -alphabet = ['A', 'C', 'T', 'G'] - -seq = list(seq) - -for i in range(0, len(seq)): - if random.random() < chance: - seq[i] = random.choice(alphabet) - -seq = "".join(seq) - -print(seq) diff --git a/test_data/mutatrix_mutator.sh b/test_data/mutatrix_mutator.sh new file mode 100755 index 0000000..ad800a2 --- /dev/null +++ b/test_data/mutatrix_mutator.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +# Mutate given sequence using Mutatrix and compare it with original sequence using Edlib, +# to see how different it is. + +MUTATRIX=~/git/mutatrix/mutatrix +EDLIB=~/git/edlib/build/bin/edlib-aligner + +INPUT_SEQUENCE=$1 +MUTATION_PERC=$2 + +OUTPUT_SEQUENCE="mutated_"$MUTATION_PERC"_"$INPUT_SEQUENCE + +$MUTATRIX -s $MUTATION_PERC -i $MUTATION_PERC -n 1 -m 0 -M 0 -P mutatrix_output $INPUT_SEQUENCE > mutation.vcf +mv mutatrix_output* $OUTPUT_SEQUENCE + +$EDLIB $INPUT_SEQUENCE $OUTPUT_SEQUENCE diff --git a/test_data/perf_tests.sh b/test_data/perf_tests.sh new file mode 100755 index 0000000..b966ec0 --- /dev/null +++ b/test_data/perf_tests.sh @@ -0,0 +1,219 @@ +#!/usr/bin/env bash + +# Runs performance tests - compares Edlib with other aligners (Parasail, Seqan, Myers). + +EDLIB=~/git/edlib/build/bin/edlib-aligner + +PARASAIL=~/git/parasail/apps/parasail_aligner +PARASAIL_FLAGS="-t 1 -d -e 1 -o 1 -M 0 -X 1" + +# Here I am using Seqan through modified edlib aligner. It can be found in seqan-test branch of edlib repo. +SEQAN=~/git/edlib-seqan/src/aligner + +MYERS=~/Dropbox/Mile/SIMD_alignment/Myers/myers_98_martin/myers + +TEST_DATA=. + +########################## TEST RUNNERS ######################## + +function edlib { + mode=$1 + query=$2 + target=$3 + num_tests=$4 + k=$5 + + time_sum=0 + for i in $(seq $num_tests); do + sleep 1 + output=$($EDLIB -m $mode -k $k $query $target) + time=$(echo "$output" | grep "Cpu time of searching" | cut -d " " -f5) + score=$(echo "$output" | grep "#0:" | cut -d " " -f2) + time_sum=$(python -c "print($time_sum + $time)") + echo ">" "#"$i $score $time + done + avg_time=$(python -c "print($time_sum / $num_tests)") + echo "Edlib:" $avg_time $score +} + +function edlib_path { + mode=$1 + query=$2 + target=$3 + num_tests=$4 + + time_sum=0 + for i in $(seq $num_tests); do + sleep 1 + output=$($EDLIB -m $mode -p -s $query $target) + time=$(echo "$output" | grep "Cpu time of searching" | cut -d " " -f5) + time_sum=$(python -c "print($time_sum + $time)") + echo ">" "#"$i $time + done + avg_time=$(python -c "print($time_sum / $num_tests)") + echo "Edlib (path):" $avg_time +} + +function seqan { + mode=$1 + query=$2 + target=$3 + num_tests=$4 + + time_sum=0 + for i in $(seq $num_tests); do + sleep 1 + output=$($SEQAN -m $mode -t $query $target) + time=$(echo "$output" | grep "Cpu time of searching" | cut -d " " -f5) + score=$(($(echo "$output" | grep "Seqan Score:" | cut -d " " -f4) * -1)) + time_sum=$(python -c "print($time_sum + $time)") + echo ">" "#"$i $score $time + done + avg_time=$(python -c "print($time_sum / $num_tests)") + echo "Seqan:" $avg_time $score +} + +function seqan_path { + mode=$1 + query=$2 + target=$3 + num_tests=$4 + + time_sum=0 + for i in $(seq $num_tests); do + sleep 1 + output=$($SEQAN -m $mode -t -p -s $query $target) + time=$(echo "$output" | grep "Cpu time of searching" | cut -d " " -f5) + score=$(($(echo "$output" | grep "Seqan Score:" | cut -d " " -f4) * -1)) + time_sum=$(python -c "print($time_sum + $time)") + echo ">" "#"$i $score $time + done + avg_time=$(python -c "print($time_sum / $num_tests)") + echo "Seqan (path):" $score $avg_time +} + +function parasail { + query=$1 + target=$2 + num_tests=$3 + + time_sum=0 + for i in $(seq $num_tests); do + sleep 1 + output=$($PARASAIL $PARASAIL_FLAGS -a nw_striped_32 -f $target -q $query) + time=$(echo "$output" | grep "alignment time" | cut -d ":" -f2 | cut -d " " -f2) + score=$(($(head -n 1 parasail.csv | cut -d "," -f5) * -1)) + rm parasail.csv + time_sum=$(python -c "print($time_sum + $time)") + echo ">" "#"$i $score $time + done + avg_time=$(python -c "print($time_sum / $num_tests)") + echo "Parasail:" $avg_time $score +} + +function myers { + query=$1 + target=$2 + num_tests=$3 + k=$4 + + time_sum=0 + for i in $(seq $num_tests); do + sleep 1 + tail -n +2 $query | tr -d '\n' > queryMyers.fasta + tail -n +2 $target | tr -d '\n' > targetMyers.fasta + output=$({ time -p $MYERS $(cat queryMyers.fasta) $k targetMyers.fasta; } 2>&1) + rm queryMyers.fasta targetMyers.fasta + time=$(echo "$output" | grep "real" | cut -d " " -f2) + time_sum=$(python -c "print($time_sum + $time)") + echo ">" "#"$i $time + done + avg_time=$(python -c "print($time_sum / $num_tests)") + echo "Myers:" $avg_time + +} + + +############################ TESTS ############################# + + +#Enterobacteria +echo -e "\nEnterobacteria, NW" +target=$TEST_DATA/Enterobacteria_Phage_1/Enterobacteria_phage_1.fasta +for query in $(ls $TEST_DATA/Enterobacteria_Phage_1/mutated_*_perc.fasta); do + echo $query + + edlib NW $query $target 3 -1 + edlib_path NW $query $target 3 + seqan NW $query $target 3 + seqan_path NW $query $target 3 + parasail $query $target 3 +done + + +#E coli and its illumina read, HW +echo -e "\nE coli and its illumina read, HW" +target=$TEST_DATA/E_coli_DH1/e_coli_DH1.fasta +for query in $(ls $TEST_DATA/E_coli_DH1/mason_illumina_read_10kbp/*.fasta); do + echo $query + + edlib HW $query $target 3 -1 + edlib_path HW $query $target 3 + seqan HW $query $target 3 +# seqan_path HW $query $target 3 # Fails because it allocates too much memory. +done + + +#E coli and its prefix, SHW +echo -e "\nE coli and its prefix, SHW" +target=$TEST_DATA/E_coli_DH1/e_coli_DH1.fasta +for query in $(ls $TEST_DATA/E_coli_DH1/prefix_10kbp/mutated_*_perc.fasta); do + echo $query + + edlib SHW $query $target 3 -1 + edlib_path SHW $query $target 3 + seqan SHW $query $target 3 +# seqan_path SHW $query $target 3 # Fails because it allocates too much memory. +done + + +#Chromosome +echo -e "\nChromosome, NW" +target=$TEST_DATA/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0.fasta +for query in $(ls $TEST_DATA/Chromosome_2890043_3890042_0/mutated_*_perc.fasta); do + echo $query + + edlib NW $query $target 3 -1 + edlib_path NW $query $target 3 + seqan NW $query $target 3 + seqan_path NW $query $target 3 + parasail $query $target 3 +done + + +################### Myers ##################### +echo -e "\nMyers" +target=$TEST_DATA/E_coli_DH1/e_coli_DH1.fasta + +k=100 +for query_file in e_coli_DH1_illumina_1x10000.fasta; do + query=$TEST_DATA/E_coli_DH1/mason_illumina_read_10kbp/$query_file + echo $query $k + edlib HW $query $target 3 $k + myers $query $target 3 $k +done + +k=1000 +for query_file in e_coli_DH1_illumina_1x10000.fasta mutated_97_perc.fasta mutated_94_perc.fasta mutated_90_perc.fasta; do + query=$TEST_DATA/E_coli_DH1/mason_illumina_read_10kbp/$query_file + echo $query $k + edlib HW $query $target 3 $k + myers $query $target 3 $k +done + +k=10000 +for query in $(ls $TEST_DATA/E_coli_DH1/mason_illumina_read_10kbp/*); do + echo $query $k + edlib HW $query $target 3 $k + myers $query $target 3 $k +done