Skip to content

Commit

Permalink
Fixed bug with band being reduced to aggresively.
Browse files Browse the repository at this point in the history
  • Loading branch information
Martinsos committed Dec 19, 2016
1 parent 5780b72 commit 4207933
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 38 deletions.
57 changes: 34 additions & 23 deletions edlib/src/edlib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ static int myersCalcEditDistanceNW(Word* Peq, int W, int maxNumBlocks,
k = min(k, bl->score
+ max(targetLength - c - 1, queryLength - ((1 + lastBlock) * WORD_SIZE - 1) - 1)
+ (lastBlock == maxNumBlocks - 1 ? W : 0));

//---------- Adjust number of blocks according to Ukkonen ----------//
//--- Adjust last block ---//
// If block is not beneath band, calculate next block. Only next because others are certainly beneath band.
Expand All @@ -700,12 +700,14 @@ static int myersCalcEditDistanceNW(Word* Peq, int W, int maxNumBlocks,
hout = newHout;
}

// While block is out of band, move one block up. - This is optimal now, by my formula.
// NOTICE: I added + W, and now it works! This has to be added because query is padded with W cells.
// While block is out of band, move one block up.
// NOTE: Condition used here is more loose than the one from the article, since I simplified the max() part of it.
// I could consider adding that max part, for optimal performance.
while (lastBlock >= firstBlock
&& (bl->score >= k + WORD_SIZE
|| ((lastBlock + 1) * WORD_SIZE - 1 >
k - bl->score + 2 * WORD_SIZE - 2 - targetLength + c + queryLength + W))) {
|| ((lastBlock + 1) * WORD_SIZE - 1 >
// TODO: Does not work if do not put +1! Why???
k - bl->score + 2 * WORD_SIZE - 2 - targetLength + c + queryLength + 1))) {
lastBlock--; bl--;
}
//-------------------------//
Expand All @@ -720,19 +722,20 @@ static int myersCalcEditDistanceNW(Word* Peq, int W, int maxNumBlocks,
}
//--------------------------/


// TODO: consider if this part is useful, it does not seem to help much
if (c % STRONG_REDUCE_NUM == 0) { // Every some columns do more expensive but more efficient reduction
while (lastBlock >= firstBlock) {
// If all cells outside of band, remove block
vector<int> scores = getBlockCellValues(*bl);
int r = (lastBlock + 1) * WORD_SIZE - 1;
int numCells = lastBlock == maxNumBlocks - 1 ? WORD_SIZE - W : WORD_SIZE;
int r = lastBlock * WORD_SIZE + numCells - 1;
bool reduce = true;
for (int i = 0; i < WORD_SIZE; i++) {
for (int i = WORD_SIZE - numCells; i < WORD_SIZE; i++) {
// TODO: Does not work if do not put +1! Why???
if (scores[i] <= k && r <= k - scores[i] - targetLength + c + queryLength + W + 1) {
if (scores[i] <= k && r <= k - scores[i] - targetLength + c + queryLength + 1) {
reduce = false;
break;
break;
}
r--;
}
Expand All @@ -743,9 +746,10 @@ static int myersCalcEditDistanceNW(Word* Peq, int W, int maxNumBlocks,
while (firstBlock <= lastBlock) {
// If all cells outside of band, remove block
vector<int> scores = getBlockCellValues(blocks[firstBlock]);
int r = (firstBlock + 1) * WORD_SIZE - 1;
int numCells = firstBlock == maxNumBlocks - 1 ? WORD_SIZE - W : WORD_SIZE;
int r = firstBlock * WORD_SIZE + numCells - 1;
bool reduce = true;
for (int i = 0; i < WORD_SIZE; i++) {
for (int i = WORD_SIZE - numCells; i < WORD_SIZE; i++) {
if (scores[i] <= k && r >= scores[i] - k - targetLength + c + queryLength) {
reduce = false;
break;
Expand Down Expand Up @@ -780,7 +784,6 @@ static int myersCalcEditDistanceNW(Word* Peq, int W, int maxNumBlocks,
}
}
//----------------------------------------------------------//

//---- If this is stop column, save it and finish ----//
if (c == targetStopPosition) {
for (int b = firstBlock; b <= lastBlock; b++) {
Expand Down Expand Up @@ -1125,23 +1128,31 @@ static int obtainAlignmentHirschberg(

// Calculate left half.
AlignmentData* alignDataLeftHalf = NULL;
myersCalcEditDistanceNW(Peq, W, maxNumBlocks,
query, queryLength,
target, targetLength,
alphabetLength, bestScore,
&score_, &endLocation_, false, &alignDataLeftHalf, leftHalfWidth - 1);
int leftHalfCalcStatus = myersCalcEditDistanceNW(
Peq, W, maxNumBlocks,
query, queryLength,
target, targetLength,
alphabetLength, bestScore,
&score_, &endLocation_, false, &alignDataLeftHalf, leftHalfWidth - 1);

// Calculate right half.
AlignmentData* alignDataRightHalf = NULL;
myersCalcEditDistanceNW(rPeq, W, maxNumBlocks,
rQuery, queryLength,
rTarget, targetLength,
alphabetLength, bestScore,
&score_, &endLocation_, false, &alignDataRightHalf, rightHalfWidth - 1);
int rightHalfCalcStatus = myersCalcEditDistanceNW(
rPeq, W, maxNumBlocks,
rQuery, queryLength,
rTarget, targetLength,
alphabetLength, bestScore,
&score_, &endLocation_, false, &alignDataRightHalf, rightHalfWidth - 1);

delete[] Peq;
delete[] rPeq;

if (leftHalfCalcStatus == EDLIB_STATUS_ERROR || rightHalfCalcStatus == EDLIB_STATUS_ERROR) {
if (alignDataLeftHalf) delete alignDataLeftHalf;
if (alignDataRightHalf) delete alignDataRightHalf;
return EDLIB_STATUS_ERROR;
}

// Unwrap the left half.
int firstBlockIdxLeft = alignDataLeftHalf->firstBlocks[0];
int lastBlockIdxLeft = alignDataLeftHalf->lastBlocks[0];
Expand Down
54 changes: 39 additions & 15 deletions test/runTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -322,19 +322,19 @@ bool test6() {
int targetLength = 420;
char query[13] = {1,3,0,1,1,1,3,0,1,3,1,3,3};
char target[420] = {0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,0,1,3,0,1,3,
3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,2,3,2,3,3,
1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,0,1,3,0,1,
3,3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,2,3,2,3,
3,1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,0,1,3,0,
1,3,3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,2,3,2,
3,3,1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,0,1,3,
0,1,3,3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,2,3,
2,3,3,1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,0,1,
3,0,1,3,3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,2,
3,2,3,3,1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,0,
1,3,0,1,3,3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,
2,3,2,3,3,1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,
0,1,3,0,1,3,3,3,1,3,2,2,3,2,3,3,1};
3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,2,3,2,3,3,
1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,0,1,3,0,1,
3,3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,2,3,2,3,
3,1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,0,1,3,0,
1,3,3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,2,3,2,
3,3,1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,0,1,3,
0,1,3,3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,2,3,
2,3,3,1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,0,1,
3,0,1,3,3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,2,
3,2,3,3,1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,0,
1,3,0,1,3,3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,
2,3,2,3,3,1,0,1,1,1,0,1,3,0,1,3,3,3,1,3,2,2,3,2,3,3,1,0,1,1,1,
0,1,3,0,1,3,3,3,1,3,2,2,3,2,3,3,1};

bool r = executeTest(query, queryLength, target, targetLength, EDLIB_MODE_HW);
r = r && executeTest(query, queryLength, target, targetLength, EDLIB_MODE_NW);
Expand Down Expand Up @@ -367,6 +367,28 @@ bool test8() {
return r;
}

bool test9() {
int queryLength = 64;
int targetLength = 393;
char query[64] = {9,5,5,9,9,4,6,0,1,1,5,4,6,0,6,5,5,6,5,2,2,0,6,0,8,3,7,0,6,6,4,8,3,1,9,4,5,5,5,7,8,2,
3,6,4,1,1,2,7,7,6,0,9,2,0,9,6,9,9,4,6,5,2,9};
char target[393] = {7,1,6,2,9,1,1,7,5,5,4,9,6,7,3,4,6,9,4,5,2,6,6,0,7,8,4,3,3,9,5,2,0,1,7,1,4,0,9,9,7,
5,0,6,2,4,0,9,3,6,6,7,4,3,9,3,3,4,7,8,5,4,1,7,7,0,9,3,0,8,4,0,3,4,6,7,0,8,6,6,6,5,
5,2,0,5,5,3,1,4,1,6,8,4,3,7,6,2,0,9,0,4,9,5,1,5,3,1,3,1,9,9,6,5,1,8,0,6,1,1,1,5,9,
1,1,2,1,8,5,1,7,7,8,6,5,9,1,0,2,4,1,2,5,0,9,6,8,1,4,2,4,5,9,3,9,0,5,0,8,0,3,7,0,1,
3,5,0,6,5,5,2,8,9,7,0,8,5,1,9,0,3,3,7,2,6,6,4,3,8,5,6,2,2,6,5,8,3,8,4,0,3,7,8,2,6,
9,0,2,0,1,2,5,6,1,9,4,8,3,7,8,8,5,2,3,1,8,1,6,6,7,6,9,6,5,3,3,6,5,7,8,6,1,3,4,2,4,
0,0,7,7,1,8,5,3,3,6,1,4,5,7,3,1,8,0,8,1,5,6,6,2,4,4,3,9,8,7,3,8,0,3,8,1,3,3,4,6,1,
8,2,6,7,5,8,6,7,8,7,4,5,6,6,9,0,1,1,1,9,4,9,1,9,9,2,2,4,8,0,6,6,4,4,4,2,2,2,9,3,1,
6,8,7,2,9,8,6,0,1,7,7,2,8,6,2,2,1,6,0,3,4,9,8,9,3,2,3,5,3,6,6,9,6,6,2,6,6,0,8,7,9,
5,9,7,4,3,1,7,2,1,0,6,0,0,7,5,2,1,2,6,9,1,5,6,7};

bool r = executeTest(query, queryLength, target, targetLength, EDLIB_MODE_HW);
r = r && executeTest(query, queryLength, target, targetLength, EDLIB_MODE_NW);
r = r && executeTest(query, queryLength, target, targetLength, EDLIB_MODE_SHW);
return r;
}

bool testCigar() {
unsigned char alignment[] = {EDLIB_EDOP_MATCH, EDLIB_EDOP_MATCH, EDLIB_EDOP_INSERT, EDLIB_EDOP_INSERT,
EDLIB_EDOP_INSERT, EDLIB_EDOP_DELETE, EDLIB_EDOP_INSERT, EDLIB_EDOP_INSERT,
Expand All @@ -378,6 +400,7 @@ bool testCigar() {
pass = false;
printf("Expected %s, got %s\n", expected, cigar);
}
printf("Cigar extended: ");
printf(pass ? "\x1B[32m""OK""\x1B[0m\n" : "\x1B[31m""FAIL""\x1B[0m\n");
if (cigar) free(cigar);

Expand All @@ -388,6 +411,7 @@ bool testCigar() {
pass = false;
printf("Expected %s, got %s\n", expected2, cigar);
}
printf("Cigar standard: ");
printf(pass ? "\x1B[32m""OK""\x1B[0m\n" : "\x1B[31m""FAIL""\x1B[0m\n");
if (cigar) free(cigar);

Expand All @@ -396,9 +420,9 @@ bool testCigar() {

bool runTests() {
// TODO: make this global vector where tests have to add themselves.
int numTests = 9;
int numTests = 10;
bool (* tests [])() = {test1, test2, test3, test4, test5, test6,
test7, test8, testCigar};
test7, test8, test9, testCigar};

bool allTestsPassed = true;
for (int i = 0; i < numTests; i++) {
Expand Down

0 comments on commit 4207933

Please sign in to comment.