Skip to content

Commit

Permalink
Added graph generation
Browse files Browse the repository at this point in the history
  • Loading branch information
gsc74 committed Jan 19, 2023
1 parent dfe5b89 commit a4629ff
Show file tree
Hide file tree
Showing 12 changed files with 651 additions and 445 deletions.
6 changes: 3 additions & 3 deletions format.c
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,10 @@ void mg_write_gaf(kstring_t *s, const gfa_t *g, const mg_gchains_t *gs, int32_t
mg_sprintf_lite(s, "\tcg:Z:");
if (rev_sign)
for (j = p->p->n_cigar - 1; j >= 0; --j)
mg_sprintf_lite(s, "%d%c", p->p->cigar[j]>>4, "MIDNSHP=XB"[p->p->cigar[j]&0xf]);
mg_sprintf_lite(s, "%d%c", (int32_t)(p->p->cigar[j]>>4), "MIDNSHP=XB"[p->p->cigar[j]&0xf]);
else
for (j = 0; j < p->p->n_cigar; ++j)
mg_sprintf_lite(s, "%d%c", p->p->cigar[j]>>4, "MIDNSHP=XB"[p->p->cigar[j]&0xf]);
mg_sprintf_lite(s, "%d%c", (int32_t)(p->p->cigar[j]>>4), "MIDNSHP=XB"[p->p->cigar[j]&0xf]);
}
mg_sprintf_lite(s, "\n");
if ((mg_dbg_flag & MG_DBG_LCHAIN) || (flag & MG_M_WRITE_LCHAIN)) {
Expand Down Expand Up @@ -238,4 +238,4 @@ void mg_write_gaf(kstring_t *s, const gfa_t *g, const mg_gchains_t *gs, int32_t
}
}
}
}
}
20 changes: 11 additions & 9 deletions galign.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,31 @@
#include "kalloc.h"
#include "miniwfa.h"

static void append_cigar1(void *km, mg32_v *c, int32_t op, int32_t len)
static void append_cigar1(void *km, mg64_v *c, int32_t op, int32_t len)
{
if (c->n > 0 && (c->a[c->n - 1]&0xf) == op) {
c->a[c->n - 1] += len<<4;
c->a[c->n - 1] += (uint64_t)len<<4;
} else {
if (c->n == c->m) {
c->m += (c->m>>1) + 16;
KREALLOC(km, c->a, c->m);
}
c->a[c->n++] = len<<4 | op;
c->a[c->n++] = (uint64_t)len<<4 | op;
}
}

static void append_cigar(void *km, mg32_v *c, int32_t n_cigar, const uint32_t *cigar)
static void append_cigar(void *km, mg64_v *c, int32_t n_cigar, const uint32_t *cigar)
{
int32_t k;
if (n_cigar == 0) return;
append_cigar1(km, c, cigar[0]&0xf, cigar[0]>>4);
if (c->n + n_cigar - 1 > c->m) {
c->m = c->n + n_cigar - 1;
kroundup32(c->m);
KREALLOC(km, c->a, c->m);
}
memcpy(&c->a[c->n], &cigar[1], sizeof(*cigar) * (n_cigar - 1));
for (k = 0; k < n_cigar - 1; ++k)
c->a[c->n + k] = cigar[1 + k];
c->n += n_cigar - 1;
}

Expand All @@ -35,7 +37,7 @@ void mg_gchain_cigar(void *km, const gfa_t *g, const gfa_edseq_t *es, const char
int32_t i, l_seq = 0, m_seq = 0;
char *seq = 0;
void *km2;
mg32_v cigar = {0,0,0};
mg64_v cigar = {0,0,0};
km2 = km_init2(km, 0);
for (i = 0; i < gt->n_gc; ++i) {
mg_gchain_t *gc = &gt->gc[i];
Expand Down Expand Up @@ -118,11 +120,11 @@ void mg_gchain_cigar(void *km, const gfa_t *g, const gfa_edseq_t *es, const char
j0 = j, l0 = l;
}
// save the CIGAR to gt->gc[i]
gc->p = (mg_cigar_t*)kcalloc(gt->km, 1, cigar.n * 4 + sizeof(mg_cigar_t));
gc->p = (mg_cigar_t*)kcalloc(gt->km, 1, cigar.n * 8 + sizeof(mg_cigar_t));
gc->p->ss = (int32_t)gt->a[off_a0].x + 1 - (int32_t)(gt->a[off_a0].y>>32&0xff);
gc->p->ee = (int32_t)gt->a[off_a0 + gc->n_anchor - 1].x + 1;
gc->p->n_cigar = cigar.n;
memcpy(gc->p->cigar, cigar.a, cigar.n * 4);
memcpy(gc->p->cigar, cigar.a, cigar.n * 8);
for (j = 0, l = 0; j < gc->p->n_cigar; ++j) {
int32_t op = gc->p->cigar[j]&0xf, len = gc->p->cigar[j]>>4;
if (op == 7) gc->p->mlen += len, gc->p->blen += len;
Expand All @@ -135,4 +137,4 @@ void mg_gchain_cigar(void *km, const gfa_t *g, const gfa_edseq_t *es, const char
km_destroy(km2);
kfree(km, seq);
kfree(km, cigar.a);
}
}
44 changes: 28 additions & 16 deletions gchain1.c
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ static inline void copy_lchain(mg_llchain_t *q, const mg_lchain_t *p, int32_t *n
(*n_a) += q->cnt;
}

static void bridge_shortk(bridge_aux_t *aux, const mg_lchain_t *l0, const mg_lchain_t *l1)
static int32_t bridge_shortk(bridge_aux_t *aux, const mg_lchain_t *l0, const mg_lchain_t *l1)
{
int32_t s, n_pathv;
mg_path_dst_t dst;
Expand All @@ -328,10 +328,12 @@ static void bridge_shortk(bridge_aux_t *aux, const mg_lchain_t *l0, const mg_lch
dst.target_hash = l1->hash_pre;
dst.check_hash = 1;
p = mg_shortest_k(aux->km, aux->g, l1->v^1, 1, &dst, dst.target_dist, MG_MAX_SHORT_K, &n_pathv);
if (n_pathv == 0 || dst.target_hash != dst.hash)
fprintf(stderr, "%c%s[%d] -> %c%s[%d], dist=%d, target_dist=%d\n", "><"[(l1->v^1)&1], aux->g->seg[l1->v>>1].name, l1->v^1, "><"[(l0->v^1)&1], aux->g->seg[l0->v>>1].name, l0->v^1, dst.dist, dst.target_dist);
assert(n_pathv > 0);
assert(dst.target_hash == dst.hash);
if (n_pathv == 0 || dst.target_hash != dst.hash) {
fprintf(stderr, "[W::%s] %c%s[%d] -> %c%s[%d], dist=%d, target_dist=%d; chain skiped.\n", __func__, "><"[(l1->v^1)&1], aux->g->seg[l1->v>>1].name, l1->v^1, "><"[(l0->v^1)&1],
aux->g->seg[l0->v>>1].name, l0->v^1, dst.dist, dst.target_dist);
kfree(aux->km, p);
return -1;
}
for (s = n_pathv - 2; s >= 1; --s) { // path found in a backward way, so we need to reverse it
mg_llchain_t *q;
if (aux->n_llc == aux->m_llc) KEXPAND(aux->km, aux->llc, aux->m_llc);
Expand All @@ -341,6 +343,7 @@ static void bridge_shortk(bridge_aux_t *aux, const mg_lchain_t *l0, const mg_lch
q->ed = -1;
}
kfree(aux->km, p);
return 0;
}

static int32_t bridge_gwfa(bridge_aux_t *aux, int32_t kmer_size, int32_t gdp_max_ed, const mg_lchain_t *l0, const mg_lchain_t *l1, int32_t *ed)
Expand Down Expand Up @@ -377,28 +380,30 @@ static int32_t bridge_gwfa(bridge_aux_t *aux, int32_t kmer_size, int32_t gdp_max
return 1;
}

static void bridge_lchains(mg_gchains_t *gc, bridge_aux_t *aux, int32_t kmer_size, int32_t gdp_max_ed, const mg_lchain_t *l0, const mg_lchain_t *l1, const mg128_t *a)
static int32_t bridge_lchains(mg_gchains_t *gc, bridge_aux_t *aux, int32_t kmer_size, int32_t gdp_max_ed, const mg_lchain_t *l0, const mg_lchain_t *l1, const mg128_t *a)
{
if (!l1->inner_pre) { // bridging two segments
int32_t ed = -1;
if (l1->v != l0->v) { // bridging two segments
int32_t ed = -1, ret = 0;
if (aux->n_seg > 1 || !bridge_gwfa(aux, kmer_size, gdp_max_ed, l0, l1, &ed))
bridge_shortk(aux, l0, l1);
ret = bridge_shortk(aux, l0, l1);
if (ret < 0) return -1;
if (aux->n_llc == aux->m_llc) KEXPAND(aux->km, aux->llc, aux->m_llc);
copy_lchain(&aux->llc[aux->n_llc++], l1, &aux->n_a, gc->a, a, ed);
} else { // on one segment
int32_t k;
mg_llchain_t *t = &aux->llc[aux->n_llc - 1];
assert(l0->v == l1->v);
for (k = 0; k < l1->cnt; ++k) { // FIXME: this part is made redundant by resolve_overlap()
const mg128_t *ak = &a[l1->off + k];
if ((int32_t)ak->x > l0->re && (int32_t)ak->y > l0->qe)
break;
}
assert(k < l1->cnt);
t->cnt += l1->cnt - k, t->score += l1->score;
memcpy(&gc->a[aux->n_a], &a[l1->off + k], (l1->cnt - k) * sizeof(mg128_t));
aux->n_a += l1->cnt - k;
if (k < l1->cnt) { // l1 contained. TODO: check what is happening...
t->cnt += l1->cnt - k, t->score += l1->score;
memcpy(&gc->a[aux->n_a], &a[l1->off + k], (l1->cnt - k) * sizeof(mg128_t));
aux->n_a += l1->cnt - k;
}
}
return 0;
}

static void resolve_overlap(mg_lchain_t *l0, mg_lchain_t *l1, const mg128_t *a)
Expand Down Expand Up @@ -483,7 +488,14 @@ mg_gchains_t *mg_gchain_gen(void *km_dst, void *km, const gfa_t *g, const gfa_ed
for (j0 = 0, j = 1; j < nui; ++j) {
const mg_lchain_t *l0 = &lc[st + j0], *l1 = &lc[st + j];
if (l1->cnt > 0) {
bridge_lchains(gc, &aux, kmer_size, gdp_max_ed, l0, l1, a);
int32_t ret, t;
ret = bridge_lchains(gc, &aux, kmer_size, gdp_max_ed, l0, l1, a);
if (ret < 0) {
for (t = j0; t < j; ++t) {
ret = bridge_lchains(gc, &aux, kmer_size, gdp_max_ed, &lc[st + t], &lc[st + t + 1], a);
assert(ret >= 0);
}
}
j0 = j;
}
}
Expand Down Expand Up @@ -517,4 +529,4 @@ void mg_gchain_free(mg_gchains_t *gs)
if (gs->gc[i].p) kfree(km, gs->gc[i].p);
kfree(km, gs->gc); kfree(km, gs->a); kfree(km, gs->lc);
kfree(km, gs);
}
}
Loading

0 comments on commit a4629ff

Please sign in to comment.