From 886f091e0b8db1c578ea43248285c97257e2ba57 Mon Sep 17 00:00:00 2001 From: Ghanshyamchandra74 Date: Thu, 14 Jul 2022 11:11:52 +0530 Subject: [PATCH] minigraph code-base --- LICENSE.txt | 23 + Makefile | 66 + NEWS.md | 306 ++++ README.md | 207 +++ algo.c | 194 +++ algo.h | 33 + asm-call.c | 147 ++ bseq.c | 133 ++ bseq.h | 76 + cal_cov.c | 139 ++ doc/example1.png | Bin 0 -> 73581 bytes doc/example2.png | Bin 0 -> 108251 bytes doc/examples.graffle | Bin 0 -> 28587 bytes format.c | 241 +++ galign.c | 138 ++ gchain1.c | 520 ++++++ gcmisc.c | 223 +++ gfa-aug.c | 260 +++ gfa-base.c | 526 ++++++ gfa-bbl.c | 372 +++++ gfa-ed.c | 617 +++++++ gfa-io.c | 395 +++++ gfa-priv.h | 154 ++ gfa.h | 166 ++ ggen.c | 182 +++ ggen.h | 21 + ggsimple.c | 570 +++++++ gmap.c | 211 +++ index.c | 230 +++ kalloc.c | 224 +++ kalloc.h | 82 + kavl.h | 414 +++++ kdq.h | 134 ++ ketopt.h | 116 ++ khashl.h | 348 ++++ krmq.h | 474 ++++++ kseq.h | 256 +++ ksort.h | 164 ++ kstring.h | 165 ++ kthread.c | 159 ++ kthread.h | 15 + kvec-km.h | 105 ++ kvec.h | 110 ++ lchain.c | 441 +++++ main.c | 301 ++++ map-algo.c | 500 ++++++ mgpriv.h | 128 ++ minigraph.1 | 359 ++++ minigraph.h | 175 ++ miniwfa.c | 834 ++++++++++ miniwfa.h | 95 ++ misc.c | 12 + misc/mgutils.js | 1451 +++++++++++++++++ options.c | 134 ++ shortk.c | 251 +++ sketch.c | 109 ++ sys.c | 147 ++ sys.h | 20 + test/MT-chimp.fa | 277 ++++ test/MT-human.fa | 239 +++ test/MT-orangA.fa | 276 ++++ test/MT.gfa | 19 + tex/Makefile | 13 + tex/minigraph.bib | 676 ++++++++ tex/minigraph.tex | 986 +++++++++++ tex/plots/CHM13-f1-90.bb.anno.gp | 42 + tex/plots/CHM13-f1-90.bb.anno.tbl | 13 + .../CHM13-f1-90.bb.mini-inter-none.win.gp | 269 +++ .../CHM13-f1-90.bb.mini-inter-none.win.sh | 7 + tex/plots/CHM13v1.cen.bed | 23 + tex/plots/CHM13v1.size | 23 + tex/plots/anno2tbl.js | 40 + tex/plots/bedutils.js | 367 +++++ tex/plots/chr-plot.js | 130 ++ tex/plots/gen-anno.mak | 24 + 75 files changed, 17297 insertions(+) create mode 100644 LICENSE.txt create mode 100644 Makefile create mode 100644 NEWS.md create mode 100644 README.md create mode 100644 algo.c create mode 100644 algo.h create mode 100644 asm-call.c create mode 100644 bseq.c create mode 100644 bseq.h create mode 100644 cal_cov.c create mode 100644 doc/example1.png create mode 100644 doc/example2.png create mode 100644 doc/examples.graffle create mode 100644 format.c create mode 100644 galign.c create mode 100644 gchain1.c create mode 100644 gcmisc.c create mode 100644 gfa-aug.c create mode 100644 gfa-base.c create mode 100644 gfa-bbl.c create mode 100644 gfa-ed.c create mode 100644 gfa-io.c create mode 100644 gfa-priv.h create mode 100644 gfa.h create mode 100644 ggen.c create mode 100644 ggen.h create mode 100644 ggsimple.c create mode 100644 gmap.c create mode 100644 index.c create mode 100644 kalloc.c create mode 100644 kalloc.h create mode 100644 kavl.h create mode 100644 kdq.h create mode 100644 ketopt.h create mode 100644 khashl.h create mode 100644 krmq.h create mode 100644 kseq.h create mode 100644 ksort.h create mode 100644 kstring.h create mode 100644 kthread.c create mode 100644 kthread.h create mode 100644 kvec-km.h create mode 100644 kvec.h create mode 100644 lchain.c create mode 100644 main.c create mode 100644 map-algo.c create mode 100644 mgpriv.h create mode 100644 minigraph.1 create mode 100644 minigraph.h create mode 100644 miniwfa.c create mode 100644 miniwfa.h create mode 100644 misc.c create mode 100755 misc/mgutils.js create mode 100644 options.c create mode 100644 shortk.c create mode 100644 sketch.c create mode 100644 sys.c create mode 100644 sys.h create mode 100644 test/MT-chimp.fa create mode 100644 test/MT-human.fa create mode 100644 test/MT-orangA.fa create mode 100644 test/MT.gfa create mode 100644 tex/Makefile create mode 100644 tex/minigraph.bib create mode 100644 tex/minigraph.tex create mode 100644 tex/plots/CHM13-f1-90.bb.anno.gp create mode 100644 tex/plots/CHM13-f1-90.bb.anno.tbl create mode 100644 tex/plots/CHM13-f1-90.bb.mini-inter-none.win.gp create mode 100644 tex/plots/CHM13-f1-90.bb.mini-inter-none.win.sh create mode 100644 tex/plots/CHM13v1.cen.bed create mode 100644 tex/plots/CHM13v1.size create mode 100755 tex/plots/anno2tbl.js create mode 100755 tex/plots/bedutils.js create mode 100755 tex/plots/chr-plot.js create mode 100644 tex/plots/gen-anno.mak diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..d7ac24e --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,23 @@ +The MIT License + +Copyright (c) 2019- Dana-Farber Cancer Institute + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..4dd33e6 --- /dev/null +++ b/Makefile @@ -0,0 +1,66 @@ +CC= gcc +CFLAGS= -g -Wall -Wc++-compat -std=c99 -msse4 -O3 +CPPFLAGS= +INCLUDES= +OBJS= kalloc.o kthread.o algo.o sys.o gfa-base.o gfa-io.o gfa-aug.o gfa-bbl.o gfa-ed.o \ + sketch.o misc.o bseq.o options.o shortk.o miniwfa.o \ + index.o lchain.o gchain1.o galign.o gcmisc.o map-algo.o cal_cov.o \ + format.o gmap.o ggsimple.o ggen.o asm-call.o +PROG= minigraph +LIBS= -lz -lpthread -lm + +ifneq ($(asan),) + CFLAGS+=-fsanitize=address + LIBS+=-fsanitize=address -ldl +endif + +.SUFFIXES:.c .o +.PHONY:all clean depend + +.c.o: + $(CC) -c $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $< -o $@ + +all:$(PROG) + +minigraph:$(OBJS) main.o + $(CC) $(CFLAGS) $^ -o $@ $(LIBS) + +clean: + rm -fr gmon.out *.o a.out $(PROG) *~ *.a *.dSYM + +depend: + (LC_ALL=C; export LC_ALL; makedepend -Y -- $(CFLAGS) $(DFLAGS) -- *.c) + +# DO NOT DELETE + +algo.o: kalloc.h algo.h miniwfa.h kvec-km.h ksort.h +asm-call.o: mgpriv.h minigraph.h gfa.h ggen.h bseq.h gfa-priv.h algo.h +bseq.o: bseq.h kvec-km.h kalloc.h kseq.h +cal_cov.o: mgpriv.h minigraph.h gfa.h gfa-priv.h algo.h kalloc.h +format.o: kalloc.h mgpriv.h minigraph.h gfa.h +galign.o: mgpriv.h minigraph.h gfa.h kalloc.h miniwfa.h +gchain1.o: mgpriv.h minigraph.h gfa.h ksort.h khashl.h kalloc.h gfa-priv.h +gcmisc.o: mgpriv.h minigraph.h gfa.h kalloc.h +gfa-aug.o: gfa-priv.h gfa.h ksort.h +gfa-base.o: gfa-priv.h gfa.h kstring.h khashl.h kalloc.h ksort.h +gfa-bbl.o: gfa-priv.h gfa.h kalloc.h ksort.h kvec.h +gfa-ed.o: gfa-priv.h gfa.h kalloc.h ksort.h khashl.h kdq.h kvec-km.h +gfa-io.o: kstring.h gfa-priv.h gfa.h kseq.h +ggen.o: kthread.h kalloc.h sys.h bseq.h ggen.h minigraph.h gfa.h mgpriv.h +ggen.o: gfa-priv.h +ggsimple.o: mgpriv.h minigraph.h gfa.h gfa-priv.h kalloc.h bseq.h algo.h +ggsimple.o: sys.h ggen.h kvec-km.h +gmap.o: kthread.h kalloc.h bseq.h sys.h mgpriv.h minigraph.h gfa.h gfa-priv.h +index.o: mgpriv.h minigraph.h gfa.h khashl.h kalloc.h kthread.h kvec-km.h +index.o: sys.h +kalloc.o: kalloc.h +kthread.o: kthread.h +lchain.o: mgpriv.h minigraph.h gfa.h kalloc.h krmq.h +main.o: mgpriv.h minigraph.h gfa.h gfa-priv.h sys.h ketopt.h +map-algo.o: kalloc.h mgpriv.h minigraph.h gfa.h khashl.h ksort.h +miniwfa.o: miniwfa.h kalloc.h +misc.o: mgpriv.h minigraph.h gfa.h ksort.h +options.o: mgpriv.h minigraph.h gfa.h sys.h +shortk.o: mgpriv.h minigraph.h gfa.h ksort.h kavl.h algo.h khashl.h kalloc.h +sketch.o: kvec-km.h kalloc.h mgpriv.h minigraph.h gfa.h +sys.o: sys.h diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..3c59465 --- /dev/null +++ b/NEWS.md @@ -0,0 +1,306 @@ +Release 0.19-r551 (12 June 2022) +-------------------------------- + +This release fixes a segmental fault if minigraph is compiled with certain +compiler-libc combinations. This is apparently caused by memcpy(0,0,0). +Minigraph is otherwise identical v0.18. + +(12 June 2022, r551) + + + +Release 0.18-r538 (9 May 2022) +------------------------------ + +This release uses heuristics to speed up base alignment in long divergent +regions. The heuristics does not guarantee optimal alignment but it reliably +produces alignment close to the optimal except in centromeres where the +algorithmically optimal alignment may not represent true evolution in biology. +The new version is 10-700% faster than v0.17 depending on input data and +parameters in use. + +(9 May 2022, r538) + + + +Release 0.17-r524 (29 April 2022) +--------------------------------- + +This release adds base alignment to minigraph. It represents the first major +improvement to minigraph. Specifically, this release attempts to connect linear +chains with the graph wavefront alignemnt algorithm (GWFA) and produces the +final alignment with miniwfa under the 2-piece gap penalty. Graph generation +also considers base alignment. This gives more accurate graph alignment and +generally simpler graph topology. Note that minigraph still focuses on +structural variations and does not generate base-level graphs. To endusers, +minigraph remains similar feature wise. + +Notable changes: + + * New feature: option `-c` for base alignment and graph generation. In the + alignment mode, the option adds the `cg:Z` CIGAR tag like minimap2. Graph + generation still works without `-c` but applying this option is generally + recommended now. + +It should be noted that the base alignment is currently slow for species of +high diversity. This will be addressed in the next couple of releases. + +(29 April 2022, r524) + + + +Release 0.16-r436 (21 February 2022) +------------------------------------ + +Notable changes: + + * Improvement: 2-level chaining. This is a feature backported from minimap2. + It speeds up graph generation for human graphs. + + * Improvement: break a chain at poorly aligned regions, another recent + minimap2 feature. + + * Added the script for generating figures in the minigraph paper. + +(21 February 2022, r436) + + + +Release 0.15-r426 (21 March 2021) +--------------------------------- + +Fixed a bug in bubble identification around inversions. This version should be +used together with the latest gfatools for consistency. + +(21 March 2021, r426) + + + +Release 0.14-r415 (19 December 2020) +------------------------------------ + +Notable changes: + + * Added the `--call` option to find the allele/walk in each bubble. + + * Reduced the default minimum variant length (option `-L`) from 100 to 50 for + the consistency with the SV community. + +(19 December 2020, r415) + + + +Release 0.13-r397 (3 December 2020) +----------------------------------- + +Notable change: + + * Fixed incorrect anchors in linear chains. In older versions, a linear chain + may contain two anchors with identical reference or query coordinates. + +(3 December 2020, r397) + + + +Release 0.12-r389 (26 October 2020) +----------------------------------- + +Notable changes: + + * Improve alignments towards ends of graph segments. If there is an SV close to + the ends but not at the ends, older versions may produce an excessively + large bubble including high-identity matches. + + * Heuristically accelerates alignment in complex subgraphs by skipping + many unnecessary sequence-aware graph traversals. This speeds up graph + generation for CHM13 by three folds without obviously affecting accuracy. + + * Added option --inv to optionally disable inversions. Graph traversal is hard + with inversions. + + * Fixed the bug that prevents large -K. + + * Apply option -K4g to the asm preset. + + * Added option --write-mz to output the positions of minimizer anchors. + +(26 October 2020, r389) + + + +Release 0.11-r371 (13 September 2020) +------------------------------------- + +Notable changes: + + * Added option --max-rmq-size to limit the max RMQ size, which is set 100k by + default. This heuristic reduces the long running time for aligning long + centromeric sequences. The accuracy might be affected in rare cases. + + * Cap the max k-mer occurrence to 250 by default. For maize genomes, the + current heuristic may choose an occurrence cutoff larger than 1000. This + makes minigraph too slow to be practical. + + * Added option -S to output more detailed information about linear chains. + + * Added option -D to ignore diagonal minimizer anchors. This is useful to + mapping a sequence against itself. + +(13 September 2020, r371) + + + +Release 0.10-r356 (14 February 2020) +------------------------------------ + +Notable changes: + + * Older releases miss a small fraction of INDELs involving repeats. This + release fixes this issue. + + * Added the "stableGaf" command to mgutils.js to convert unstable GAF (e.g. by + GraphAligner) to stable GAF. + +(14 February 2020, r356) + + + +Release 0.9-r343 (31 December 2019) +----------------------------------- + +Notable changes: + + * RMQ based linear chaining. The chaining accuracy should be higher for large + events. The speed remains similar. + + * Use ksw2 to check the sequence divergence of events to be inserted. + + * Treat inversions as special events. Don't insert them as long substitutions. + +(31 December 2019, r343) + + + +Release 0.8-r316 (11 December 2019) +----------------------------------- + +This release reduces suboptimal chains caused by the chaining heuristics. It +generates slightly simpler human graphs. + +(11 December 2019, r316) + + + +Release 0.7-r310 (21 November 2019) +----------------------------------- + +Notable changes: + + * Increased the default maximum INDEL/event length from 10kb to 100kb for + assembly mapping and graph generation. + + * Decreased the default minimum INDEL/event length from 250bp to 100bp. + + * Accelerated graph mapping by pre-filtering isolated anchors and disconnected + linear chains. This triples the performance when long gaps are desired. + +Due to the change of default parameters, this release generates graphs +different from the previous versions. + +(21 November 2019, r310) + + + +Release 0.6-r302 (17 November 2019) +----------------------------------- + +Notable changes: + + * Assign weight to seeds based on their repetitiveness. This helps chaining in + repetitive regions a little bit. + + * For short-read mapping, prefer the reference path if the alternate path is + not much better. + +Major changes may be coming in the next release. + +(17 November 2019, r302) + + + +Release 0.5-r285 (8 September 2019) +----------------------------------- + +Notable changes: + + * Fixed a bug that leads to wrong mapping positions in GAF. + + * Fixed two bugs related to graph chaining. + + * Added option `-j` to set expected sequence divergence and to adjust other + chaining parameters accordingly. + + * Increased the k-mer thresholds for fast divergence estimate. This improves + the alignment around low-complexity regions. + + * Tuned the default parameters to add highly divergent events only. + + * Warn about duplicated sequence names in graph construction (#3). + +This version generates graphs different from the previous versions. The mapping +accuracy is improved due to the bug fixes and parameter tuning. + +(8 September 2019, r285) + + + +Release 0.4-r267 (22 August 2019) +--------------------------------- + +Notable changes: + + * Support paired-end mapping for short reads. + + * Remap and calculate coverage (see the new --cov option in the manpage). + + * Fixed multiple edges in the generated graphs. The v0.3 14-genome graph + contains one multiple edge. + + * Use dynamic minimizer occurrence cutoff. For human data, the dynamic cutoff + is around 137, higher than the default cutoff 100 used in earlier versions. + As a result, graph generations will become a little slower. + +Due to the last two changes, graphs generated with this version are different +from the previous versions. + +(22 August 2019, r267) + + + +Release 0.3-r243 (7 August 2019) +-------------------------------- + +This release generates graphs with SR tags on L-lines. The topology of the +graph is identical to the one generated with v0.2. + +(7 August 2019, r243) + + + +Release 0.2-r235 (19 July 2019) +------------------------------- + +This release fixes multiple minor bugs. It also considers k-mer matches and +improves the accuracy of graph chaining. Nonetheless, the old chaining +algorithm, albeit simple, works quite well. The improvement is marginal. + +(19 July 2019, r235) + + + +Release 0.1-r191 (6 July 2019) +------------------------------ + +Initial proof-of-concept release. + +(6 July 2019, r191) diff --git a/README.md b/README.md new file mode 100644 index 0000000..8eab3df --- /dev/null +++ b/README.md @@ -0,0 +1,207 @@ +[![Build Status](https://travis-ci.org/lh3/minigraph.svg?branch=master)](https://travis-ci.org/lh3/minigraph) +## Getting Started + +```sh +git clone https://github.com/lh3/minigraph +cd minigraph && make +# Map sequence to sequence, similar to minimap2 without base alignment +./minigraph test/MT-human.fa test/MT-orangA.fa > out.paf +# Map sequence to graph +./minigraph test/MT.gfa test/MT-orangA.fa > out.gaf +# Incremental graph generation (-l10k necessary for this toy example) +./minigraph -cxggs -l10k test/MT.gfa test/MT-chimp.fa test/MT-orangA.fa > out.gfa +# Call per-sample path in each bubble/variation (-c not needed for this) +./minigraph -xasm -l10k --call test/MT.gfa test/MT-orangA.fa > orangA.call.bed + +# The lossy FASTA representation (requring https://github.com/lh3/gfatools) +gfatools gfa2fa -s out.gfa > out.fa +# Extract localized structural variations +gfatools bubble out.gfa > SV.bed +``` + +## Table of Contents + + + +- [Getting Started](#started) +- [Introduction](#intro) +- [Users' Guide](#uguide) + - [Installation](#install) + - [Sequence-to-graph mapping](#map) + - [Graph generation](#ggen) + - [Calling structural variations](#callsv) + - [Prebuilt graphs](#prebuilt) + - [Algorithm overview](#algo) +- [Limitations](#limit) + +## Introduction + +Minigraph is a sequence-to-graph mapper and graph constructor. For graph +generation, it aligns a query sequence against a sequence graph and +incrementally augments an existing graph with long query subsequences diverged +from the graph. The figure on the right briefly explains the procedure. + +Minigraph borrows ideas and code from [minimap2][minimap2]. It is fairly +efficient and can construct a graph from 90 human assemblies in a couple of +days using 24 CPU cores. Older versions of minigraph was unable to produce +base alignment. The latest version can. **Please add option `-c` for graph +generation** as it generally improves the quality of graphs. + +## Users' Guide + +### Installation + +To install minigraph, type `make` in the source code directory. The only +non-standard dependency is [zlib][zlib]. For better performance, it is +recommended to compile with recent compliers. + +### Sequence-to-graph mapping + +To map sequences against a graph, you should prepare the graph in the [GFA +format][gfa1], or preferrably the [rGFA format][rgfa]. If you don't have +a graph, you can generate a graph from multiple samples (see the [Graph +generation section](#ggen) below). The typical command line for mapping is +```sh +minigraph -cx lr graph.gfa query.fa > out.gaf +``` +You may choose the right preset option `-x` according to input. Minigraph +output mappings in the [GAF format][gaf], which is a strict superset of the +[PAF format][paf]. The only visual difference between GAF and PAF is that the +6th column in GAF may encode a graph path like +`>MT_human:0-4001Graph generation + +The following command-line generates a graph in rGFA: +```sh +minigraph -cxggs -t16 ref.fa sample1.fa sample2.fa > out.gfa +``` +which is equivalent to +```sh +minigraph -cxggs -t16 ref.fa sample1.fa > sample1.gfa +minigraph -cxggs -t16 sample1.gfa sample2.fa > out.gfa +``` +File `ref.fa` is typically the reference genome (e.g. GRCh38 for human). +It can also be replaced by a graph in rGFA. Minigraph assumes `sample1.fa` to +be the whole-genome assembly of an individual. This is an important assumption: +minigraph only considers 1-to-1 orthogonal regions between the graph and the +individual FASTA. If you use raw reads or put multiple individual genomes in +one file, minigraph will filter out most alignments as they cover the input +graph multiple times. + +The output rGFA can be converted to a FASTA file with [gfatools][gfatools]: +```sh +gfatools gfa2fa -s graph.gfa > out.stable.fa +``` +The output `out.stable.fa` will always include the initial reference `ref.fa` +and may additionally add new segments diverged from the initial reference. + +### Calling structural variations + +A minigraph graph is composed of chains of bubbles with the reference as the +backbone. Each *bubble* represents a structural variation. It can be +multi-allelic if there are multiple paths through the bubble. You can extract +these bubbles with +```sh +gfatools bubble graph.gfa > var.bed +``` +The output is a BED-like file. The first three columns give the position of a +bubble/variation and the rest of columns are: + +* (4) \# GFA segments in the bubble including the source and the sink of the bubble +* (5) \# all possible paths through the bubble (not all paths present in input samples) +* (6) 1 if the bubble involves an inversion; 0 otherwise +* (7) length of the shortest path (i.e. allele) through the bubble +* (8) length of the longest path/allele through the bubble +* (9-11) please ignore +* (12) list of segments in the bubble; first for the source and last for the sink +* (13) sequence of the shortest path (`*` if zero length) +* (14) sequence of the longest path (NB: it may not be present in the input samples) + +Given an assembly, you can find the path/allele of this assembly in each bubble with +```sh +minigraph -cxasm --call graph.gfa sample-asm.fa > sample.bed +``` +On each line in the BED-like output, the last colon separated field gives the +alignment path through the bubble, the path length in the graph, the mapping +strand of sample contig, the contig name, the approximate contig start and +contig end. The number of lines in the file is the same as the number of lines +in the output of `gfatools bubble`. You can use the `paste` Unix command to +piece multiple samples together. + +### Prebuilt graphs + +Prebuilt human graphs in the rGFA format can be found [at Zenodo][human-zenodo]. + +### Algorithm overview + + + +In the following, minigraph command line options have a dash ahead and are +highlighted in bold. The description may help to tune minigraph parameters. + +1. Read all reference bases, extract (**-k**,**-w**)-minimizers and index them + in a hash table. + +2. Read **-K** [=*500M*] query bases in the mapping mode, or read all query + bases in the graph construction mode. For each query sequence, do step 3 + through 5: + +3. Find colinear minimizer chains using the [minimap2][minimap2] algorithm, + assuming segments in the graph are disconnected. These are called *linear + chains*. + +4. Perform another round of chaining, taking each linear chain as an anchor. + For a pair of linear chains, minigraph tries to connect them by doing graph + wavefront alignment algorithm (GWFA). If minigraph fails to find an + alignment within an edit distance threshold, it will find up to 15 shortest + paths between the two linear chains and chooses the path of length closest + to the distance on the query sequence. Chains found at this step are called + *graph chains*. + +5. Identify primary chains and estimate mapping quality with a method similar + to the one used in minimap2. Perform base alignment. + +6. In the graph construction mode, collect all mappings longer than **-d** + [=*10k*] and keep their query and graph segment intervals in two lists, + respectively. + +7. For each mapping longer than **-l** [=*100k*], finds poorly aligned regions. + A region is filtered if it overlaps two or more intervals collected at step + 6. + +8. Insert the remaining poorly aligned regions into the input graph. This + constructs a new graph. + +## Limitations + +* A complex minigraph subgraph is often suboptimal and may vary with the order + of input samples. It may not represent the evolution history + or the functional relevance at the locus. Please *do not overinterpret* + complex subgraphs. If you are interested in a particular subgraph, it is + recommended to extract the input contig subsequences involved in the subgraph + with the `--call` option and manually curated the results. + +* Minigraph needs to find strong colinear chains first. For a graph consisting + of many short segments (e.g. one generated from rare SNPs in large + populations), minigraph will fail to map query sequences. + +* The base alignment in the current version of minigraph is slow for species of + high diversity. + + +[zlib]: http://zlib.net/ +[minimap2]: https://github.com/lh3/minimap2 +[rgfa]: https://github.com/lh3/gfatools/blob/master/doc/rGFA.md +[gfa1]: https://github.com/GFA-spec/GFA-spec/blob/master/GFA1.md +[gaf]: https://github.com/lh3/gfatools/blob/master/doc/rGFA.md#the-graph-alignment-format-gaf +[paf]: https://github.com/lh3/miniasm/blob/master/PAF.md +[gfatools]: https://github.com/lh3/gfatools +[bandage]: https://rrwick.github.io/Bandage/ +[gfaviz]: https://github.com/ggonnella/gfaviz +[human-zenodo]: https://zenodo.org/record/6499594 diff --git a/algo.c b/algo.c new file mode 100644 index 0000000..4e00006 --- /dev/null +++ b/algo.c @@ -0,0 +1,194 @@ +#include +#include +#include "kalloc.h" +#define __STDC_LIMIT_MACROS +#include "algo.h" +#include "miniwfa.h" + +/************************ + * Max-scoring segments * + ************************/ + +#include "kvec-km.h" + +#define MSS_NEG_INF INT32_MIN + +typedef struct { + int32_t st, en; + MG_MSS_TYPE L, R; + int32_t pre; +} msseg_aux_t; + +typedef kvec_t(mg_msseg_t) msseg_v; +typedef kvec_t(msseg_aux_t) msseg_aux_v; + +static void move_segs(void *km, msseg_v *ret, msseg_aux_v *seg, MG_MSS_TYPE min_sc) +{ + int32_t i; + for (i = 0; i < seg->n; ++i) { + msseg_aux_t *p = &seg->a[i]; + if (p->R - p->L >= min_sc) { + mg_msseg_t *q; + kv_pushp(mg_msseg_t, km, *ret, &q); + q->st = p->st, q->en = p->en, q->sc = p->R - p->L; + } + } + seg->n = 0; +} + +// Reference: Ruzzo and Tompa (1999) A linear time algorithm for finding all maximal scoring subsequencs +mg_msseg_t *mg_mss_all(void *km, int32_t n, const MG_MSS_TYPE *S, MG_MSS_TYPE min_sc, MG_MSS_TYPE xdrop, int32_t *n_seg) +{ + int32_t i, j; + MG_MSS_TYPE L, max; + msseg_v ret = {0,0,0}; + msseg_aux_v seg = {0,0,0}; + msseg_aux_t t; + + kv_resize(mg_msseg_t, km, ret, 16); + kv_resize(msseg_aux_t, km, seg, 16); + for (i = 0, L = 0, max = MSS_NEG_INF; i < n;) { + if (S[i] > 0) { + int32_t k; + MG_MSS_TYPE R = L + S[i]; + for (k = i + 1; k < n && S[k] > 0; ++k) + R += S[k]; + if (R > max) max = R; + t.st = i, t.en = k, t.L = L, t.R = R; + while (1) { + msseg_aux_t *p; + for (j = seg.n - 1; j >= 0;) { + p = &seg.a[j]; + if (p->L < t.L) break; + j = p->pre >= 0? p->pre : j - 1; + } + if (j >= 0 && seg.a[j].R < t.R) { + p = &seg.a[j]; + t.st = p->st, t.L = p->L, t.pre = p->pre; + seg.n = j; + } else { + if (j < 0) { + move_segs(km, &ret, &seg, min_sc); + max = R; + } + t.pre = j; + kv_push(msseg_aux_t, km, seg, t); + break; + } + } + L = R, i = k; + } else { + if (xdrop > 0 && L + S[i] + xdrop < max) { // reset + move_segs(km, &ret, &seg, min_sc); + L = 0, max = MSS_NEG_INF; + } + L += S[i++]; + } + } + move_segs(km, &ret, &seg, min_sc); + kfree(km, seg.a); + KREALLOC(km, ret.a, ret.n); + *n_seg = ret.n; + return ret.a; +} + +/************************** + * Interval overlap query * + **************************/ + +#include +#include "ksort.h" + +#define sort_key_intv(a) ((a).st) +KRADIX_SORT_INIT(mg_intv, mg_intv_t, sort_key_intv, 4) + +int32_t mg_intv_index(int32_t n, mg_intv_t *a) +{ + int32_t i, last_i, last, k; + if (n <= 0) return -1; + radix_sort_mg_intv(a, a + n); + for (i = 0; i < n; i += 2) last_i = i, last = a[i].far = a[i].en; + for (k = 1; 1LL< el? e : el; + e = e > er? e : er; + a[i].far = e; + } + last_i = last_i>>k&1? last_i - x : last_i + x; + if (last_i < n && a[last_i].far > last) + last = a[last_i].far; + } + return k - 1; +} + +typedef struct { + int64_t x; + int32_t k, w; +} istack_t; + +int32_t mg_intv_overlap(void *km, int32_t n_a, const mg_intv_t *a, int32_t st, int32_t en, int32_t **b_, int32_t *m_b_) +{ + int32_t t = 0, h, *b = *b_, m_b = *m_b_, n = 0; + istack_t stack[64], *p; + + for (h = 0; 1<k = h, p->x = (1LL<k) - 1, p->w = 0; // push the root into the stack + while (t) { // stack is not empyt + istack_t z = stack[--t]; + if (z.k <= 3) { // the subtree is no larger than (1<<(z.k+1))-1; do a linear scan + int32_t i, i0 = z.x >> z.k << z.k, i1 = i0 + (1LL<<(z.k+1)) - 1; + if (i1 >= n_a) i1 = n_a; + for (i = i0; i < i1 && a[i].st < en; ++i) + if (st < a[i].en) { + if (n == m_b) KEXPAND(km, b, m_b); + b[n++] = i; + } + } else if (z.w == 0) { // if left child not processed + int32_t y = z.x - (1LL<<(z.k-1)); + p = &stack[t++]; + p->k = z.k, p->x = z.x, p->w = 1; + if (y >= n_a || a[y].far > st) { + p = &stack[t++]; + p->k = z.k - 1, p->x = y, p->w = 0; // push the left child to the stack + } + } else if (z.x < n_a && a[z.x].st < en) { + if (st < a[z.x].en) { // then z.x overlaps the query; write to the output array + if (n == m_b) KEXPAND(km, b, m_b); + b[n++] = z.x; + } + p = &stack[t++]; + p->k = z.k - 1, p->x = z.x + (1LL<<(z.k-1)), p->w = 0; // push the right child + } + } + *b_ = b, *m_b_ = m_b; + return n; +} + +/******************** + * Global alignment * + ********************/ + +int32_t mg_wfa_cmp(void *km, int32_t l1, const char *s1, int32_t l2, const char *s2, int32_t max_pen, int32_t *mlen, int32_t *blen) +{ + mwf_opt_t opt; + mwf_rst_t r; + int32_t i; + mwf_opt_init(&opt); + opt.max_s = max_pen; + opt.flag |= MWF_F_CIGAR; + mwf_wfa_exact(km, &opt, l1, s1, l2, s2, &r); + *mlen = *blen = 0; + for (i = 0; i < r.n_cigar; ++i) { + int32_t op = r.cigar[i]&0xf, len = r.cigar[i]>>4; + *blen += len; + if (op == 7) *mlen += len; + } + kfree(km, r.cigar); + return r.s < 0? -(l1 + l2) : (l1 + l2) / 2 - r.s; +} diff --git a/algo.h b/algo.h new file mode 100644 index 0000000..29c827a --- /dev/null +++ b/algo.h @@ -0,0 +1,33 @@ +#ifndef MG_ALGO_H +#define MG_ALGO_H + +#include + +#define MG_MSS_TYPE int32_t +#define MG_LIS_TYPE uint64_t + +typedef struct { + int32_t st, en; + MG_MSS_TYPE sc; +} mg_msseg_t; + +typedef struct { + uint32_t st, en:31, rev:1; + int32_t far, i; +} mg_intv_t; + +#ifdef __cplusplus +extern "C" { +#endif + +mg_msseg_t *mg_mss_all(void *km, int32_t n, const MG_MSS_TYPE *S, MG_MSS_TYPE min_sc, MG_MSS_TYPE xdrop, int32_t *n_seg); +int32_t mg_intv_index(int32_t n, mg_intv_t *a); +int32_t mg_intv_overlap(void *km, int32_t n_a, const mg_intv_t *a, int32_t st, int32_t en, int32_t **b_, int32_t *m_b_); +void radix_sort_mg_intv(mg_intv_t *st, mg_intv_t *en); +int32_t mg_wfa_cmp(void *km, int32_t l1, const char *s1, int32_t l2, const char *s2, int32_t max_pen, int32_t *mlen, int32_t *blen); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/asm-call.c b/asm-call.c new file mode 100644 index 0000000..a81b7eb --- /dev/null +++ b/asm-call.c @@ -0,0 +1,147 @@ +#include +#include "mgpriv.h" +#include "ggen.h" +#include "gfa-priv.h" +#include "algo.h" + +int32_t mg_gc_index(void *km, int min_mapq, int min_map_len, int min_depth_len, const gfa_t *g, int32_t n_seq, mg_gchains_t *const* gcs, + double *a_dens, int32_t **soff_, int32_t **qoff_, mg_intv_t **sintv_, mg_intv_t **qintv_); + +typedef struct { + int32_t bid; + uint8_t is_stem:4, is_src:4; +} callaux_t; + +typedef struct { + int32_t t, i; + int32_t st, en, strand; + int32_t qs, qe, glen; +} bbaux_t; + +void mg_call_asm(const gfa_t *g, int32_t n_seq, const mg_bseq1_t *seq, mg_gchains_t *const *gcs, int32_t min_mapq, int32_t min_blen) +{ + int32_t i, j, t, max_acnt, *soff, *qoff, n_bb, m_ovlp = 0, *ovlp = 0; + mg_intv_t *sintv, *qintv; + double a_dens; + gfa_bubble_t *bb; + callaux_t *ca; + bbaux_t *ba; + kstring_t out = {0,0,0}; + + max_acnt = mg_gc_index(0, min_mapq, min_blen>>1, min_blen, g, n_seq, gcs, &a_dens, &soff, &qoff, &sintv, &qintv); + if (max_acnt == 0) return; + + bb = gfa_bubble(g, &n_bb); + GFA_CALLOC(ba, n_bb); + GFA_CALLOC(ca, g->n_seg); + for (i = 0; i < n_bb; ++i) { + gfa_bubble_t *b = &bb[i]; + assert(b->n_seg >= 2); + for (j = 0; j < b->n_seg; ++j) + ca[b->v[j]>>1].bid = i; + ca[b->v[0]>>1].is_stem = ca[b->v[b->n_seg-1]>>1].is_stem = 1; + ca[b->v[0]>>1].is_src = 1; + ba[i].t = -1; + } + + for (t = 0; t < n_seq; ++t) { + const mg_gchains_t *gt = gcs[t]; + for (i = 0; i < gt->n_gc; ++i) { + const mg_gchain_t *gc = >->gc[i]; + int32_t st = -1; + for (j = 1; j < gc->cnt; ++j) { + const mg_llchain_t *lc = >->lc[gc->off + j]; + if (!ca[lc->v>>1].is_stem && ca[(lc-1)->v>>1].is_stem) { + st = gc->off + j; + } else if ((ca[lc->v>>1].is_stem && !ca[(lc-1)->v>>1].is_stem && st > 0) || (ca[lc->v>>1].is_stem && ca[(lc-1)->v>>1].is_stem)) { + int32_t n_ovlp, k, en = gc->off + j, qs, qe, span, bid, strand, glen; + bbaux_t *p; + + // determine the source and sink nodes + if (ca[lc->v>>1].is_stem && ca[(lc-1)->v>>1].is_stem) { // two adjacent stems: this is a deletion + st = gc->off + j; + } else { + assert(en > st); + } + + // test overlap on the query + span = gt->a[gt->lc[st].off].y >> 32 & 0xff; + qs = (int32_t)gt->a[gt->lc[st - 1].off + gt->lc[st - 1].cnt - 1].y + 1; // NB: it is fine even if .cnt==0 + qe = (int32_t)gt->a[gt->lc[en].off].y + 1 - span; + n_ovlp = mg_intv_overlap(0, qoff[t+1] - qoff[t], &qintv[qoff[t]], qs, qe, &ovlp, &m_ovlp); + if (n_ovlp > 1) continue; // overlap on the query - not orthologous + + // test overlap on the graph + for (k = st, glen = 0; k < en; ++k) { + const mg_llchain_t *lk = >->lc[k]; + int32_t seg = lk->v>>1; + n_ovlp = mg_intv_overlap(0, soff[seg+1] - soff[seg], &sintv[soff[seg]], 0, g->seg[seg].len, &ovlp, &m_ovlp); + glen += g->seg[seg].len; + if (n_ovlp > 1) break; // overlap on the graph - not orthoologous + } + if (k < en) continue; + + // determine the bubble ID + assert(ca[gt->lc[st-1].v>>1].is_stem && ca[gt->lc[en].v>>1].is_stem); + if (ca[gt->lc[st-1].v>>1].bid < ca[gt->lc[en].v>>1].bid) + strand = 1; + else if (ca[gt->lc[st-1].v>>1].bid > ca[gt->lc[en].v>>1].bid) + strand = -1; + else { + if (ca[gt->lc[st-1].v>>1].is_src + ca[gt->lc[en].v>>1].is_src != 1) { + fprintf(stderr, "[W::%s] type-1 folded inversion alignment around %c%s <=> %s:%d-%d\n", + __func__, "><"[gt->lc[st].v&1], g->seg[gt->lc[st].v>>1].name, seq[t].name, qs, qe); + continue; + } + if (ca[gt->lc[st-1].v>>1].is_src) strand = 1; + else strand = -1; + } + bid = strand > 0? ca[gt->lc[st-1].v>>1].bid : ca[gt->lc[en].v>>1].bid; + + // attach the bubble + for (k = st; k < en; ++k) // check consistency + if (ca[gt->lc[k].v>>1].bid != bid) + break; + if (k != en) { // this may happen around an inversion towards the end of an alignment chain + fprintf(stderr, "[W::%s] type-2 folded inversion alignment around %c%s <=> %s:%d-%d\n", + __func__, "><"[gt->lc[st].v&1], g->seg[gt->lc[st].v>>1].name, seq[t].name, qs, qe); + continue; + } + p = &ba[bid]; + p->t = t, p->i = i, p->st = st, p->en = en, p->strand = strand, p->qs = qs, p->qe = qe, p->glen = glen; + } + } + } + } + + for (i = 0; i < n_bb; ++i) { + gfa_bubble_t *b = &bb[i]; + bbaux_t *a = &ba[i]; + const mg_gchains_t *gt = gcs[a->t]; + out.l = 0; + mg_sprintf_lite(&out, "%s\t%d\t%d\t%c%s\t%c%s\t", g->sseq[b->snid].name, b->ss, b->se, "><"[b->v[0]&1], g->seg[b->v[0]>>1].name, + "><"[b->v[b->n_seg-1]&1], g->seg[b->v[b->n_seg-1]>>1].name); + if (a->t >= 0) { + assert(a->strand != 0); + if (a->st == a->en) { + mg_sprintf_lite(&out, "*"); + } else if (a->strand > 0) { + for (j = a->st; j < a->en; ++j) + mg_sprintf_lite(&out, "%c%s", "><"[gt->lc[j].v&1], g->seg[gt->lc[j].v>>1].name); + } else { + for (j = a->en - 1; j >= a->st; --j) + mg_sprintf_lite(&out, "%c%s", "<>"[gt->lc[j].v&1], g->seg[gt->lc[j].v>>1].name); + } + mg_sprintf_lite(&out, ":%d:%c:%s:%d:%d", a->glen, a->strand > 0? '+' : '-', seq[a->t].name, a->qs, a->qe); + } else { + mg_sprintf_lite(&out, "."); + } + puts(out.s); + } + + free(ba); free(ca); + free(soff); free(qoff); free(sintv); free(qintv); + for (i = 0; i < n_bb; ++i) free(bb[i].v); + free(bb); + free(out.s); +} diff --git a/bseq.c b/bseq.c new file mode 100644 index 0000000..c4b95c4 --- /dev/null +++ b/bseq.c @@ -0,0 +1,133 @@ +#include +#include +#include +#include +#define __STDC_LIMIT_MACROS +#include "bseq.h" +#include "kvec-km.h" +#include "kseq.h" +KSEQ_INIT(gzFile, gzread) + +#define CHECK_PAIR_THRES 1000000 + +struct mg_bseq_file_s { + gzFile fp; + kseq_t *ks; + mg_bseq1_t s; +}; + +mg_bseq_file_t *mg_bseq_open(const char *fn) +{ + mg_bseq_file_t *fp; + gzFile f; + f = fn && strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(0, "r"); + if (f == 0) return 0; + fp = (mg_bseq_file_t*)calloc(1, sizeof(mg_bseq_file_t)); + fp->fp = f; + fp->ks = kseq_init(fp->fp); + return fp; +} + +void mg_bseq_close(mg_bseq_file_t *fp) +{ + kseq_destroy(fp->ks); + gzclose(fp->fp); + free(fp); +} + +static inline char *kstrdup(const kstring_t *s) +{ + char *t; + t = (char*)malloc(s->l + 1); + memcpy(t, s->s, s->l + 1); + return t; +} + +static inline void kseq2bseq(kseq_t *ks, mg_bseq1_t *s, int with_qual, int with_comment) +{ + int i; + if (ks->name.l == 0) + fprintf(stderr, "[WARNING]\033[1;31m empty sequence name in the input.\033[0m\n"); + s->name = kstrdup(&ks->name); + s->seq = kstrdup(&ks->seq); + for (i = 0; i < (int)ks->seq.l; ++i) // convert U to T + if (s->seq[i] == 'u' || s->seq[i] == 'U') + --s->seq[i]; + s->qual = with_qual && ks->qual.l? kstrdup(&ks->qual) : 0; + s->comment = with_comment && ks->comment.l? kstrdup(&ks->comment) : 0; + s->l_seq = ks->seq.l; +} + +mg_bseq1_t *mg_bseq_read(mg_bseq_file_t *fp, int64_t chunk_size, int with_qual, int with_comment, int frag_mode, int *n_) +{ + int64_t size = 0; + int ret; + kvec_t(mg_bseq1_t) a = {0,0,0}; + kseq_t *ks = fp->ks; + *n_ = 0; + if (fp->s.seq) { + kv_resize(mg_bseq1_t, 0, a, 256); + kv_push(mg_bseq1_t, 0, a, fp->s); + size = fp->s.l_seq; + memset(&fp->s, 0, sizeof(mg_bseq1_t)); + } + while ((ret = kseq_read(ks)) >= 0) { + mg_bseq1_t *s; + assert(ks->seq.l <= INT32_MAX); + if (a.m == 0) kv_resize(mg_bseq1_t, 0, a, 256); + kv_pushp(mg_bseq1_t, 0, a, &s); + kseq2bseq(ks, s, with_qual, with_comment); + size += s->l_seq; + if (size >= chunk_size) { + if (frag_mode && a.a[a.n-1].l_seq < CHECK_PAIR_THRES) { + while (kseq_read(ks) >= 0) { + kseq2bseq(ks, &fp->s, with_qual, with_comment); + if (mg_qname_same(fp->s.name, a.a[a.n-1].name)) { + kv_push(mg_bseq1_t, 0, a, fp->s); + memset(&fp->s, 0, sizeof(mg_bseq1_t)); + } else break; + } + } + break; + } + } + if (ret < -1) + fprintf(stderr, "[WARNING]\033[1;31m wrong FASTA/FASTQ record. Continue anyway.\033[0m\n"); + *n_ = a.n; + return a.a; +} + +mg_bseq1_t *mg_bseq_read_frag(int n_fp, mg_bseq_file_t **fp, int64_t chunk_size, int with_qual, int with_comment, int *n_) +{ + int i; + int64_t size = 0; + kvec_t(mg_bseq1_t) a = {0,0,0}; + *n_ = 0; + if (n_fp < 1) return 0; + while (1) { + int n_read = 0; + for (i = 0; i < n_fp; ++i) + if (kseq_read(fp[i]->ks) >= 0) + ++n_read; + if (n_read < n_fp) { + if (n_read > 0) + fprintf(stderr, "[W::%s]\033[1;31m query files have different number of records; extra records skipped.\033[0m\n", __func__); + break; // some file reaches the end + } + if (a.m == 0) kv_resize(mg_bseq1_t, 0, a, 256); + for (i = 0; i < n_fp; ++i) { + mg_bseq1_t *s; + kv_pushp(mg_bseq1_t, 0, a, &s); + kseq2bseq(fp[i]->ks, s, with_qual, with_comment); + size += s->l_seq; + } + if (size >= chunk_size) break; + } + *n_ = a.n; + return a.a; +} + +int mg_bseq_eof(mg_bseq_file_t *fp) +{ + return (ks_eof(fp->ks->f) && fp->s.seq == 0); +} diff --git a/bseq.h b/bseq.h new file mode 100644 index 0000000..796a5f1 --- /dev/null +++ b/bseq.h @@ -0,0 +1,76 @@ +#ifndef MM_BSEQ_H +#define MM_BSEQ_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +extern unsigned char gfa_comp_table[256]; + +struct mg_bseq_file_s; +typedef struct mg_bseq_file_s mg_bseq_file_t; + +typedef struct { + int32_t l_seq, rid; + char *name, *seq, *qual, *comment; +} mg_bseq1_t; + +mg_bseq_file_t *mg_bseq_open(const char *fn); +void mg_bseq_close(mg_bseq_file_t *fp); +mg_bseq1_t *mg_bseq_read(mg_bseq_file_t *fp, int64_t chunk_size, int with_qual, int with_comment, int frag_mode, int *n_); +mg_bseq1_t *mg_bseq_read_frag(int n_fp, mg_bseq_file_t **fp, int64_t chunk_size, int with_qual, int with_comment, int *n_); +int mg_bseq_eof(mg_bseq_file_t *fp); + +extern unsigned char seq_nt4_table[256]; +extern unsigned char gfa_comp_table[256]; + +static inline int32_t mg_qname_len(const char *s) +{ + int32_t l; + l = strlen(s); + return l >= 3 && s[l-1] >= '0' && s[l-1] <= '9' && s[l-2] == '/'? l - 2 : l; +} + +static inline int32_t mg_qname_same(const char *s1, const char *s2) +{ + int32_t l1, l2; + l1 = mg_qname_len(s1); + l2 = mg_qname_len(s2); + return (l1 == l2 && strncmp(s1, s2, l1) == 0); +} + +static inline void mg_toupper(int32_t len, char *seq) +{ + int32_t j; + for (j = 0; j < len; ++j) + seq[j] = seq[j] < 'a' || seq[j] > 'z'? seq[j] : seq[j] - 32; +} + +static inline void mg_revcomp_seq(int32_t len, char *seq) +{ + int32_t i; + for (i = 0; i < len>>1; ++i) { + int32_t t = seq[len - i - 1]; + seq[len - i - 1] = gfa_comp_table[(uint8_t)seq[i]]; + seq[i] = gfa_comp_table[t]; + } + if (len&1) seq[len>>1] = gfa_comp_table[(uint8_t)seq[len>>1]]; +} + +static inline void mg_revcomp_bseq(mg_bseq1_t *s) +{ + int32_t i, t, l = s->l_seq; + mg_revcomp_seq(s->l_seq, s->seq); + if (s->qual) + for (i = 0; i < l>>1; ++i) + t = s->qual[l - i - 1], s->qual[l - i - 1] = s->qual[i], s->qual[i] = t; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/cal_cov.c b/cal_cov.c new file mode 100644 index 0000000..9bcd71a --- /dev/null +++ b/cal_cov.c @@ -0,0 +1,139 @@ +#include +#include +#include "mgpriv.h" +#include "gfa-priv.h" +#include "algo.h" +#include "kalloc.h" + +void mg_cov_map(const gfa_t *g, const mg_gchains_t *gt, int32_t min_mapq, int32_t min_blen, double *c_seg, double *c_link, const char *qname) +{ + int32_t i, j; + if (c_seg == 0 && c_link == 0) return; + if (gt == 0 || gt->n_gc == 0) return; + for (i = 0; i < gt->n_gc; ++i) { + const mg_gchain_t *gc = >->gc[i]; + const mg128_t *last_an; + assert(gc->cnt > 0 && gc->n_anchor > 0); + if (gc->mapq < min_mapq || gc->blen < min_blen) continue; + // count segment coverage + for (j = 0; j < gc->cnt; ++j) { + const mg_llchain_t *lc = >->lc[gc->off + j]; + int32_t s, e; + s = 0, e = g->seg[lc->v>>1].len; + if (j == 0) s = (int32_t)gt->a[lc->off].x + 1 - (int32_t)(gt->a[lc->off].y>>32&0xff); + if (j == gc->cnt - 1) e = (int32_t)gt->a[lc->off + lc->cnt - 1].x + 1; + if (c_seg) c_seg[lc->v>>1] += (double)(e - s) / g->seg[lc->v>>1].len; + } + // count link + assert(gt->lc[gc->off].cnt > 0); + last_an = >->a[gt->lc[gc->off].off + gt->lc[gc->off].cnt - 1]; + for (j = 1; j < gc->cnt; ++j) { + const mg_llchain_t *lc0 = >->lc[gc->off + j - 1]; + const mg_llchain_t *lc1 = >->lc[gc->off + j]; + int64_t a01, a10; + if (lc1->cnt > 0) { + const mg128_t *curr_an = >->a[lc1->off]; + int32_t is_skip = (mg_seg_id(*curr_an) != mg_seg_id(*last_an)); + last_an = >->a[lc1->off + lc1->cnt - 1]; + if (is_skip) continue; + } + a01 = gfa_find_arc(g, lc0->v, lc1->v); + a10 = gfa_find_arc(g, lc1->v^1, lc0->v^1); + if (a01 < 0 || a10 < 0) { + if (mg_verbose >= 2) + fprintf(stderr, "[W] Multi/disconnected link: %c%s[%d] -> %c%s[%d] (%s, %ld, %ld). Continue anyway!\n", + "><"[lc0->v&1], g->seg[lc0->v>>1].name, lc0->v, + "><"[lc1->v&1], g->seg[lc1->v>>1].name, lc1->v, qname, (long)a01, (long)a10); + continue; + } + assert((g->arc[a01].comp ^ g->arc[a10].comp) == 1); + if (c_link) c_link[a01] += 1.0, c_link[a10] += 1.0; + } + } +} + +void mg_cov_asm(const gfa_t *g, int32_t n_seq, mg_gchains_t *const *gcs, int32_t min_mapq, int32_t min_blen, double *cov_seg, double *cov_link) +{ + int32_t i, j, t, *soff, *scnt, *cnt_link; + int64_t k; + mg_intv_t *sintv = 0; + void *km = 0; + + // precalculate the size of sintv[] for each segment + KCALLOC(km, scnt, g->n_seg); + for (t = 0; t < n_seq; ++t) { + const mg_gchains_t *gt = gcs[t]; + if (gt == 0 || gt->n_gc == 0) continue; + for (i = 0; i < gt->n_gc; ++i) { + const mg_gchain_t *gc = >->gc[i]; + assert(gc->cnt > 0 && gc->n_anchor > 0); + if (gc->mapq < min_mapq || gc->blen < min_blen) continue; + for (j = 0; j < gc->cnt; ++j) { + const mg_llchain_t *lc = >->lc[gc->off + j]; + ++scnt[lc->v>>1]; + } + } + } + KMALLOC(km, soff, g->n_seg + 1); + for (soff[0] = 0, i = 1; i <= g->n_seg; ++i) + soff[i] = soff[i - 1] + scnt[i - 1]; + memset(scnt, 0, 4 * g->n_seg); + KMALLOC(km, sintv, soff[g->n_seg]); + + // fill sintv[] + KCALLOC(km, cnt_link, g->n_arc); + for (t = 0; t < n_seq; ++t) { + const mg_gchains_t *gt = gcs[t]; + if (gt == 0 || gt->n_gc == 0) continue; + for (i = 0; i < gt->n_gc;) { + const mg_gchain_t *gc = >->gc[i]; + if (gc->mapq < min_mapq || gc->blen < min_blen) continue; + // count segment coverage + for (j = 0; j < gc->cnt; ++j) { + const mg_llchain_t *lc = >->lc[gc->off + j]; + int32_t s, e, tmp; + mg_intv_t *p; + s = 0, e = g->seg[lc->v>>1].len; + if (j == 0) s = (int32_t)gt->a[lc->off].x + 1 - (int32_t)(gt->a[lc->off].y>>32&0xff); + if (j == gc->cnt - 1) e = (int32_t)gt->a[lc->off + lc->cnt - 1].x + 1; + if (lc->v&1) // convert to the forward strand of segment lc->v>>1 + tmp = g->seg[lc->v>>1].len - s, s = g->seg[lc->v>>1].len - e, e = tmp; + p = &sintv[soff[lc->v>>1] + scnt[lc->v>>1]]; + ++scnt[lc->v>>1]; + p->st = s, p->en = e, p->rev = lc->v&1, p->far = -1, p->i = -1; + } + // count link + for (j = 1; j < gc->cnt; ++j) { + const mg_llchain_t *lc0 = >->lc[gc->off + j - 1]; + const mg_llchain_t *lc1 = >->lc[gc->off + j]; + int64_t a01, a10; + a01 = gfa_find_arc(g, lc0->v, lc1->v); + a10 = gfa_find_arc(g, lc1->v^1, lc0->v^1); + assert(a01 >= 0 && a10 >= 0); + assert((g->arc[a01].comp ^ g->arc[a10].comp) == 1); + ++cnt_link[a01]; + ++cnt_link[a10]; + } + } + } + + // update cov_link[] and cov_seg[] + for (k = 0; k < g->n_arc; ++k) + if (cnt_link[k] > 0) cov_link[k] += 1.0; + for (i = 0; i < g->n_seg; ++i) { + int32_t st = 0, en = 0, cov = 0; + assert(scnt[i] == soff[i+1] - soff[i]); + radix_sort_mg_intv(&sintv[soff[i]], &sintv[soff[i+1]]); + for (j = soff[i]; j < soff[i+1]; ++j) { + if (sintv[j].st > en) + cov += en - st, st = sintv[j].st, en = sintv[j].en; + else en = sintv[j].en > en? sintv[j].en : en; + } + cov += en - st; + cov_seg[i] += (double)cov / g->seg[i].len; + } + + // free + kfree(km, cnt_link); + kfree(km, sintv); kfree(km, soff); kfree(km, scnt); +} diff --git a/doc/example1.png b/doc/example1.png new file mode 100644 index 0000000000000000000000000000000000000000..f03b2bfced79d2c357e7e98e6f4cea8de98c606e GIT binary patch literal 73581 zcmdRW1yEOC*Deh|x}-a$ySr0BB&8dr8v*I=4ru{t1VK_-q&o%aZlpoLyV3vqe(!uU zcjmh@ckaw}eh&Q3*=L_~_FjAKwVvl$M5w9Ap&=6?LqS2IDacE!LqWkXLqWkhBEf(o zL98D_!4GH`bva3>su7Z1@CD*1uj>K@g-84NA6h}3<^)`V@=8O;Rp*(qpqYamtBJXT zsRgU2og-)s1tsh$2!6G*a5bUuw6nE$5%d(H`qM%X{Qh{Djf&z=6IUA%DxGI)6jBb( z78E?J+^phVVZ{`dDe zU0+%L*PZNL{&_9%f^3gx*f?0(+5X)(xK#M@sGz!qi-WD(WA~c&uUthrh5t1E?}z{E zZhx?Z?9(U;OYRz+WD24 zg1xJSGq~A5SO4n)|MxBa=}VaHak&0t`2Nn#pQB)MM3IHr{yov6$e|NeG*D3DPzur# z8lKPx*@!+brty1|>g&e(8Osk^;i12KM^U`ERFi&=E0Iiz9GS=bL|Wh0{7wQF6FQ;w zYN;hURsE^!G0ktI#%s?ng$na@!#ixJ5m$>IN4IU<^xI1=qni7!f=`TS#9^_N$@q{pF3bz6GJmnw&YuN|9un<$vf1?=YxXG#faY~$7_|}pN)zcc^`iMWHESq zeZ4V~-69)9WYFw-bAPq$G-w|b&m-)%H-$wlOCsPbotjo!csT$W{vgvF?wwH<67u(U z(O7DFzBy_d8XC)OZ~5$hSXYo~W^h_;4QI}NdE-*xQOjKGyg7i+W~l$g^Jhl@Y~_UY zz?@3i%Gpsai%YrsGUJm^*ndYP!oMhWECdCAZ>kg)Ety&_cB$2SG+XHEe0Ng&qv2w+ z+kUl4_m|ezk4EiFHDSD5$|we_pzmX2nFIAu%y={Onc(mGI z5f$%?3H_NE73mHt+VerSc5&D@o}LGDHL#cQt0#)7%n<6*m!Dr_VPWmhRPN7y%*~Ik z-LU?EI+iW8!rW>h_Bb{jrI2)O$)7)e8oj-Eg7CdS4x@lbKDzz&bQu~EH^1XbN8}SG z8d|u+kTsp0{!Bi$)Yn_1xqgT9p9NgDaM=t=idF`prb8gDEgr|;dU`hdleUY}3e(G|M#JtjF-JC{U(xywk#GEsBgwu-~=MdC|$c7WLP{@Ze4?>~25%+&GUPfe}_# zT59+EJyE2j^EB}*3@q&P%e$L1HL;_kqxWRi)JVohRgxjdq10U(OGk?>MMU)u%jksW zIYJ&)j;meBH=$_6p}^q?_35`S)O!_`RMc}qUa^6kg3r&S-l!=f=xww z5&Gx;6b5iZdXi&I*Su`%h4PgzKjRwhM-IZM8b&JXcL{s#S2n~y)vh8*h8s%Q*f3}2 zG-&f_a^0N(mJtMjcXQhs%IHs`4+{-NCle-kr);QdYDzVebrRcC46J%}rtR&o6_Ogi z-w(ht3MeX`HVKn_uHojf9;0A3TE!FY12%^Cna_DcK7V?nOZo32n~U++O_?s&W^+aN z>Z&Nr_pc7lLr_e9T4OazreJ)$)LJq*I*Nf~qfUoSMczR3OLl@#&A_oW358tHRbKgH z47r%$0DdawYn@N#5y=r>D|Bn)Bedn^!-bb^-=)UuO5;A-Yyc__`|*30XzWHZ;Tv?n z%Mat$c^T1dd6GH(r5ba_b#KnLzQqtLWw3ix7a-xO=1YbYD$`UC%IQmr=frW>=b2*3 z=jb-r8|d)%Wr%pYD-YFbl5BLmBw{!A*KbtK6?Lo-N5{+9xcS#RCHUX$@;@a$$H1|U zG?Y9h74}q1U|?OHU>QCB(Nt@m7w+RpzCxQOL9}Pm6%2_~-o>_uSAVI9O2}yrtgNP{ zM#sHO`(sY>@_QK+1{UnAdP!LCHiSSE&$ z3!!cPghyNivxAI!s3*Ux-flq=z6g#xCRvLn7~AXmBwyI*^lOsU`{y^blmEDBdSgRK z=RInxvb?;wcs<{;&!lvm$uL1HF(zhSUl{(6XhN>)Usm&VujXVls*NQmp+t(~ansln z*?YD(H;WphN+qvDs3#r!6)F3b{<X8>j)Ci;UHP)8FM+jKHJ7!h2#)SN{@Vxb z{z{eR9=N|-(>hMIJf!uc3x!>moxt`cxcK7ddT+=*WoVd4@$fsIU#nr{sKtxXvE%{n zUk?ZmC|^CG)sMWFi2^H#Hj?0-w4;gtEYD1QFq9y>`+?3mVp^`>L({knML{K3(%(69 zLeeP=9y17z)2p{7?AjKGHAz8gk;cMD_V(%LQba;Jt89o?!dR^GQwUOsCdyc~h(~XbVCw7^cwO^+Zcb6~ zS??}pjVK)y3X&-w&0CHlPo!4W`#zfb{43UNd~A~D=3>vci2}JeNxm=FYf+(ObIuAl zudX>)o_^`;lee`P&J}B?iRht+)9rYmv8F2#OxLgO_zFiVOg)k#66IF9H(mbg*DocR z^gk1BMVY7cY<$Q|72kcZ@)cjhx9sVri-4TkL#80br-Pmh$!e*e2Sfiv1nzJ-d--My z8n5}>Vm^$??#;7IljYRgjGNe~^txts#|Ds6KR)m_Ro-Ras^}V`TV*O^ww8x8J!-*J z!~QEgm7&l{R}IOLi*XAFuw&^5`x|1!WR+Jf?(KT6y|4StIvIVOc=}vuIP^_(A9r_p zQ{VM(VuX}2KbmX5$D!|g38Q*QselnNb8I-xn)~inC1~oE!)pw?nIgs@IV(GIyQOyp zA0Z}Ym@sD@Qk30C7`$@U?Fj0{=rh^#7VVecIYG2^2)Tx9arDMK5JK=*h0|7$dRDbd0l5rUednJ_a!zXK?5C&dX?%q{|OJ^?PC;o2ncQ z)wb2X*=r#SoqaCQlN=dJeM|rN`Y+&e1Ya|8tBpI`KWlF0*-d z9lKvZU4V_;oukJkBa20H2)E3JTwk)4wK_}IzDi(S%lX2@Ux8$}f)v9zYX0tVao9?T zd#qOL&0aYV-SxJv$J>57F@rG^WX?Bi^;*5lNupO>^Q6S#5xlNUuF2Nu++GyZ+tMm( zo6-zc@1ev6Tew^-NdRx+=|7I4ngLp=-I333KG;mv>0QtZ8;Zl&=`rz8@z&N>vHSBE zBpLxh7pTXa|2R)sIOfm6d$S+?cE<8?=v4@E`t1sGbjx4fd#UOtvt9?fAA2nx@w|T- zLL011f)U`7c)B_0aX8PDGx?h3y5dhK$~e`1*n;i{1l($y11Z@muqG$(7+}?xD~d9H z7)^-FMy#C7uo^VOAtFA;XeCfwe?!Cyl@2QW!zP#Q_a;L<4DV(|kSC8GUU(dTVC2+K z=~l{ai1&4IxR8C#=h0Ni${^A3o6Qrhge;`g`F5wr@b$h{Qh@GxsJgKly`hzz-0LdR_{lNB zqEQ}bXMPX@4Ia;Rrb3tBNx!+>ygyMcj%@W5g#3dE)M3e^f1RZG79>44w%b)J00!PM znh$-|e*AsNRH5Zz-2AdtX&n)R9L<#5LQ}gK%D72S(b2c>Qy-1AdxW(jHEni8E!$kf zUksG+mc{Rb0C$#jzHqTwpY>qo9=5^$*aqJ(fAKU^*yu_8sM&y^+XL08#k&@HLLJSN zWMh8PFlT;F68c!F)x{PMj2ABN>9TGa(TREu$jQlFrq4wGcyMt+r1Xa1kz6qhv|vW1 zRAvUvip2)QmOWy9$;}d`R=5`F6wHFfD|}|M#Z^C7H{Md>*2tFTjlwAg*czGwM%|wK z%~{P5#S-i_%xm3Wqw0&_@8>OEEFwOO-;p`?2_0$ZwYP`+EZ} zJ&&JAwiFGity4VjO252yDmD03z{n_B>-woTOl-iWPnjqzyc^M=WI^=pt@#JWRIo&V zcc>2be?wd6ePyo^{P}jRCrXLuwa2dZ^`haM$<}3lhRI6Hv#FA>X8NR8jQPP;B(pXY zpXbf)us<8+6eqHAYi(joJn+hR+m=g0O6`r@e|zTWRTf|3X{hYU*_lx8j*z?hxqQ+q zUsAR4_xLPZU?8OJaZxzctck5wrzWY_V4spaHdkZTYqXI<2o9D1>qEJ#c zEs+KTv)qX3Ruy)Y)$-d9#@WeGeeF$_b7feO(w7FG+g=~)Pp>;>YZd4Odd<6rK4_O| zFnF4T-|#)}1ZfYK1`bNXpX4i@o??>e6_S`|o|uJ}D1Mky9LL#{HU0-x53k(I71=j! zL`#UG4`q!zQwnc?2%@@|Ps(b3fzgPX=WFmTZrDhqs;rt<%h)CMCvj6Flxdq^P)#sP zQQ}bk8d|MrR}uVo7v=^5n7j{lw={c&JPJ8)#mo=kVwBeWE-@*3cn^%L)nyxBGPERk zC86u$Q;3>&2|R{i0|M#v!<#2=z=1J3=NNi@NN~^>TyHW>=O5%**Q}=b8$k5H4KYz- zeUlBoPA;2X6wbY!w4Odxx+VFjUlvGv9y?`WQ_>aqc|l9wRKBrwOh;s${4k*1joy8S zm2BpiSZ3`j`r!bfeXCqc12F}Dy*E)y$XM8hIlkD`;!)Q3KW2mZc!z5%sdJD*Vsjcy zbx)N#l)NV^8`SV1W;Zmb`cTJM@MevZULi7gp0A_znUQ8@o8U7>D*T<;hw{aG^(VxT zk2Y@k*!Dc{7&i_*-+&mY4xL;;=rZ8hV^A-CNog!<8aT&smW3_ohe;$KQd<90X|Xy( zqm74$Qe{L1-`POS0^e88ylheA$AE^%@kK)K5K9?7m#r-N=ajV3(>el6V`Jmgh9QJU z_eLRv&HgbM(~!tmQc_Y~T^){cF}wU0u8UCknAE=yX&y%?INGzNh_NJrH*BQv8_Ld`)q^_|$D5i=ZAlirdN}w> zE$?5Kb^%fbjfJ94Y&zr-+wEyb65(5!`HUv#cUBm?t?PNOMh<%JGJP}^KxlGw>h#08 z{9OMe=Qm^R!tW65*As;iQ$b)z&4O?Z`Hv(9%md+U6^%iFKl$OA)CT&O_>Q7ynP!_b z#&R%Qv%!p772(e$J&u=p@aw80r=~M0%wt83JHO7>J=r&gjb~-ndEzX!nS#@aUZ`oWc9|zId#6RY)rzs!HNeta>|{-i4MT24Q4? zSTo5!BqU_9sUTM+nd$j1D-Mk@3QV}k(~o#*FN#%Cw%MP-uu64&p$*P=Kl(v}IJ=_A435ZB@@x(5Eum*gj$o2?D|92ja*Ha}R<8hxTRLFgMnPDK*^{rM2`e0j&B9F2P zs!J)2(l`OT%i8G8W=ae(UykV8X0MAqRxZyQabwsgOj@V`>(%G9A!Dg1U@&xL{XzJr zR!iA}Q*AAX-by0oE4<`)y^7d!H784|)8cMtZM{S6@I?a6oVu$b_0bKWLUdk3^3B_Q z@3kslh9r>=q_H{ZB)r%z{P4l_6NjMSZC40NlZ#lUjO(66&Dn^Oe`@@hsL-Ww>tm## z#-=aR`sIthbF2AkR|pcGsMQF`>%4Pc|EH>t(O3p$Jh~FnFh%++kY)1@n}1M1WR8+w zg+L%xe5{Dl!>qcJpgY>+r|U#;6UD*$WFfieprEk+Ek+7eOH8bt zt^IDoI&6D5Gb~=|Fy~9Kze((P;1DhxX7BAtYx0>k4i2_kQIxmV)6EnndcW?UlM*l) zwBoeMx`DB3WaHZHzm9~)lh8>8r^>ZQtF-~*BS&)X?+;Z%=`_pUTaD`i7Z60hCi~Xy zcqvE&B5&{4unK#2b)41PE%oLgkfs|%7XG?0cMMi|2{=g1I@M6E=T}#~zW|<+o!6@} z-D9y&c6EP(&aI{<5odo0W9TiA=b1o~Fg~x~(nUYqbgUs|Vx`cr3ZOxOAJhsCVU7p4 zmxl`?!oryTPb{!@g?V{r5*67=LA>@EL-e`>d)1Z+2P;GHFbz>KyX*ziZv&=*ArfuR z`Sh*@U#5GnD`Df)h33A+FqZe(cF@i%G{*0sQsQ5UsyKQ8bV;Ym8aM?2=Q$BzbC3Og zxDr#kHE48P<+K>s0I=R;=e5?u>vNuV4_}VBzTu@>E;)Oa^tS8e`7o#&e}D6ob)jpu zLxJK9u8}VJP3UEXM8UW?Bp>nq*A>9bkEhFL55CuOP+x6jI=nl?n+dl6(HH<Ss8gp1z~uZ#aDDU-51_4dR4FR;(c zB!I&J+$wbgo?fp$S=*Yba(d65Bzn<<`C%z2IT+5)Klc|(inE*bk#?gv`kT^V>BC#Q zarzPZ;E>{L&lY^-bL)k#0R3@j8Y2HKEhB5ARj3AV=?)lZW=~$6c3AOU{=Z z%snK)zLg*=a@J~hREf$Z04PY%lRz!^HAu|+vcgo&QKzF4f~wl+@}}Xz?ngvkc^#C5#OWGVFDS$PJ9^3=A_ie1|V!I zlQt`Ao=UbL{}o;&^tUkpYE1|g#)&<60%YUqbg?Qq|F}d8iLfW8RWtSBZfUNU`WHtE zuP8hgN0hcV(`A}LDA$R!N-A0WKLO0#E&2LKV~wPtZ${R8;_LBjVvoeACnMu+rAC!y zO{ZJ#?XDuPMkc~f!lyVUk4&HuP8Ag99}4W{daTQn&%6rcmJ%g$Y9R9lVWw$!Hw2fH zvtl8tB!+5+hxp53Mi3REIUd#SmrG$~f=K)oUgo_+ z*hnZSg9%b%cAw39(G{+v*xJNZ+I(&t@A8r(g7X&(`kH+21?kS!@Q7p#F&Qh>@+H@( zi>ia;b!#n$0Vtp#N=r-2&W?E_6^fd`8ABJF6&pnOyy$TAB#glsIw4MP73=oyzSk^) zb^3g*piiS|?oJb{&tY!8ESZ6s(%gQ5oZ6xbD)BV!qaX7oHHV8Kf3VTsX$8WKLG5lx z`@WEWY<6pwu@-+TOkn+;NGp$84qR4<|LIlsoy%xl5ROi>D?{6P4f|w;E>~%}qpG+d z6%|ZoSfR2K_Lzu{N!iQKd*N8r8>b-4`*qTdu_fAwPHqC?Z`V65Z8;M7=FC1<%R z!e|hoO&$FM-u~tClscP{Lbx~|I!>R4=y$NgAQy-;?>TbGGv@jawsP>*5#jP8r;W)T~#*nB;IHZOhYwy^iR zYVh9f7GoRT7VSAB|iM#_^ zG5{IkaXgRYO70LQ(N&<9tkc+xWa$%j#tXu}G{ljKbkT0@RELxRkn2>BZlbfZv(ZLF z^yyP16tVvP{>U|%NL;M5fJmP~wi*m%rgFJ6tR+@>}{bfZ;g8ExEgK?xmv5@UrH_xjS}e)M`Ny5F(wO*(A2F|O zy5z%g`){oc^9Seq8EXyB>(5`Ler+Vp;-#Hr6?9(cDWpFH>Pggec@4tWx=b@|b+t<@ zo^tQHiEt=19Ew*wOTpByL5yofLDML$m`xaTL3FxDa}tWi>PnUQv+`<4h{rV}4u35U3xGKijUXe?+#Qww>)soV zH;*D7C%{#tTw4?^4Z{91=fJnB6x784Qz7j?>4c43tbnwRP@}j|Z)fvEO%7Klq`$5lxj(^EYV-0=I}U618Um1~p+@*_E3$nKEB z$%iBWG!b0^3r)h1ru$VE5?`IFVXnTt35NQroBN9Nm&}(UtM@SNDh!dZ-fDIyi^=;9 z7cbmjh_Q0tR*E-{u1e>UBChiuMt0umv*L=GeKvThxwl6|5CIkVOZDk+8Kr)8^*<}%Z&IH2}x{=ypD$<`f80s2#s9t>l%=3ZFF?(MwnP@8#-+nzio=VAK>!qSM z-Yc%tkA0O!59QGO7@9qO4r8*+t^+rk8X8?)C{LYJ>c)e)HGU(e+_=5 z0&K8WT(dsQ3%(UW;*VL^tx-iPmOMZBjzhmlxecJ8wJ(nRBiTY8tX3-2T2Yg6KYk)~ z?DF?|(;XyKK637n6DFETezpo{IejPSA&1DO}tanX==wSlN9$s^1 zHytqXBxg-?wc1EjVYLTGORZ55L6U?p?U#C{hu@Xb>@yYnipbdzuSlR7^S2#O0^vvk zKzvOt41Y;!gEZ$DCo_ZUU-_4d=Vt?%&-PpQ1A)hEhhGGIEA|T}`2!XGkIps8z*Tp@ z`WlYKpVdY63MIHsTg94}WkP0WJ?cybh6bdys0&F7{wgm>^!(h9mkd6cD>#kBPo4`+ zlJ+d=Wrb(ZB;FFjGgW0E#L_=&=xukt zGyZDPLiFE>oYwvl7}4H1^5rtk@`De-6|yG^GV`S6`2n0p^zW$v75DAb%kiu1%P#&s zhDyC5dgxwyaoUaR(QuJEF6es>WT)rND=71=`))0>Iz@ajL_CuGZ6LX_`oyad$d${f z3yDhdCj|!Z9ZNLR9UTUs&92+%It?GgBiI3f4nk)0PcJnR`-yb2;0}~P*4TDe``)cW zBse(uV7Z+0$>`Sr!Tv9y7g_!V@%NUO0GolSRp?lSc^vwox{}D2WmeKmQ%zkX^Qh7!1Gg z!(ldfgDmqg5!nz7u9g}SWZuZL1zA)P`i~|T#NPPzz6g`Q;%Dn`GE2Ox$?dLaQfRd*U5(XQ7ImFW+s-cW@xBIQX;8(v4M{NOh0StBT#z#0WAoqlWPl3;UNu_$luGF!^Kd?~&@T;PNP#G)nSH;L`XFF<76dqp77* z;m9eiWQl|bBSfvrcgcsx-rt>#j+D9~e+B>t$a+xh{m6#Q(Ez+GWn zYBKGqeY5Wp?c*FP^_}N7G5vUZe=)09XIJPjlF1xN-kf}w;3W5fD5Tp4Q2p({AASMa zGzL4k|9nQzVO6Wic{7j3IF^Q*mJ9^x1&4 zic`Lg`-{La~Gci3iE7&`q9)7Wyy1D`*Cp2_DB>YR8a8St2A{!3-Rs9nX(N=Y#)N93Mq zx~~d`UnbXTG>1hj4Wp|%6?c&S?d|!`ld(}a;{=80E~y4y-rh>(7Q1%1@Xv)?ivUs- z`v~QX=g6mCBF9^dE_8) zDM|>DeVy8Sv7|z{dWLyqjuYkMMDdUYm~K6gFiBY2+A^nRZ+LJ}GPFw*O};f9V5Opx zbYfy+;}R}PT@BO~5UiI0+py!aK4vOyyhi6ueM!Liu*J-GxTAL=`5T-78LTRXIV=#>^?q}iWy`a|ewaoFIOHybU z(HZ%U1Apy07_@WfFBrHPuMAj?+DIB!w4l7@C&c@nU*pU7jC0i7YYkulRYSf(VTiBh z54RyDzk=v!+syKakc-Rq~d1-tlZ`N^+@9wnbe1Cfp9vIY`yM}jw zaq#Miw`<$|r5;}cw-|lNDiFxYmC8in0eDu08Owys0brqy8eBEb)I`|FNOjbfqsABt zXAaeFYQ-C*R@T@iO-P2b>bWLRAS~7TYAv1?%?i^`>{h~kx&5*a*%Z^gWTcjhCY}N2 z`o%Vr5T-wD-F2RL0R;@q`z`r`Z&m8668=R*(t#2*Fssan{R&BR3Z}1UPY&4*m1IQH z5pl*BFf)&4jr?Q^LNhgiBXp3lo5N;m4r6yZxs>Jm8cNCU(k%tuU>p;nCS_X7_+@>* z!6DA)pd^GR^jYIX8h6pR=iqT0=6uy#xI+fqUT?zE~ULlDRw$_gdMu(A--X>!z zzp(T#xRW$12XL4X2_Fw6p`73_8(&z5ll$F07qtzB?y1Yfy~Vji3taA_uI~?t`GAEa zC;hdPkB)Aq+Bud0F}xsX5C}XJWuy=IrD$mm2ow_Xa7lV=wsGM(#_3%*5XpV~mGXo0 z=u9RkE#o%y(FvKII$@ILu_0E^CBw-nwRv9^ZJN;GQg6~i_^F|J1hZ#)vv0n~K-~>j zOCaLt>Zm2)s(VpVY%x%oN2!CS;(%cl*X%eOPV{1r3!`?YqNdIl2+p8Vr0gROWTrUc ziRMaotxn4G2AYfdfgBNUyvz(^&j<#fy>V71BuMz6N)VVyMRg%yx&569yN>|6+1e^P z(C$Imha3o;8Y-_1BF|D)L`pxB?Tj(Yy=< z<;lB+5KoeHQIl|1h{~<`cRL)o34KT2=b;Vq;wn&86XCJ9->q^@6rT)l920-X8B5ZO8EXinijK=3H{^Y^_QrvLL zg!jLW+mW1Nj!f~(e2RLp5LejWViSkb;A6|QSEq$91mP-s?!tg5ReyzY{teQJT*n8E z5~?gm#^q0u#>hzHcIS=4LRWJEnZ2R;b;)YYm)HU)nBRG@J!7FradNslqlPd$pi4a| z^U$Ly`O8f9e2!do-r1lg%@v>`Ij1XV#YFgXUK99A=M?&Wr$$WZ8WVX`(4qcLr}I-; z5bF~LbTMOB3qkkeNGQ&=DjfhxY#>zex~)WtTcWrnBpgkuBxE6oGb(YDG!u%Nj{ow^ zm^|7LD*~V{mnMu4?RnP26G;(1dCexS07=k3O32h=W0c0e?f>4*p~JpN zlP}0pW#x0Y8!YwnOVo-A{@9?G4~Q5xs=ARJ=*d z*;MjQVOfPo+c>h%7T}JNMHF#8)S7sr^Rlzw%WU6E-FbjZs9bh4%Z~WhXPY(cPbI}u zv~=Zeoiv6pqPh}`*wp;uRz9e&2-Kx{eK2Cu7qaXpOcv9Ta+Jm((G%rZ*_ZlNw?P4nKi$t0ItT%^U4b+ykF;zT18`~8FIxf51x!!Og!$Fs zOboJDFZs^zk0z-^mqwcV$nigt3JAFFKg@M}IU?RpLmrY-wO{Zt)hP!q`S!}pS?!Ag z3@^>e=8s=v;%-KL7wd%-+tcDu!6Rl!1W2J=H@%L;RA$pO@w{NI)_p%ghY(;k;^%yN zrHxwuZiGa84nFmq0}d6g`+Fl7kx4LW=pZU~wE$SGnbelBRnjYCrJgae(;Kw8??VjC zImlr<9ob8Q=r5{}eGq7%L~vdn12F*<*d;1XYS}pG7*ZYLV@@1T z=}Dkl4$dDP#sii!l&&%q;@2k(D^54E6QPG3xZz(nokh|Bdv$Yz)Yd)7wtXexCrVL+ALofA^mVs)}^ z)P>QS%B|$~pSg0HpsZeC6$g$Qo{TCiej4t_k13-#*5G)X_J`x~3?;8w&SLNUg&pD0 z6LX4KPvqQ5N{cYAN6y~thYn*4PRVnI*%h)kSQf_Dg=6Xb^b0MK=L?AlSU_hvpajBqj;BMRAZD+T=aT@4u$qmtO+{>$Z zHqPOS0s8;tK450b|3wByR{@L%)2mvRG}@D&@g3TFH@&phh%s$AK78NDzeTX-o9&PZ zMpOyDyqe2BP!>Og`T6nF+1dIWX_&Zkr2Sa0hoiiE=N%c7)e|<&q32rq_Dc#!B5Rjm zr$942H{@<52%_d6u?iJ|hJGn|M-cDthO{#H_F-0Lf9B-d?AwnR_REdyn3tH%0V8=z zJ9A$?MykW=ZkyENJr3w6GNqa1Lx|_LvmSS)b?dvORjkt1(B)>lEOR589jL$85HwpY zKkzH7_fd4{7qU;BJ*<2tqW#0jygVe4d=^*k!*EfSy0k*WW^oR89K5Jvob%v;KJxeIx4Qe3UTRRqNMF>n*>O z;!G9jcCX)}^W zZz`|D8K3vt8zHX-2{6O;6cl*DG>8Hmmi%<$SFU#5_fj)AsDMQZqqYIMaYqgqWa4le zF@!Pg-6%S!h*haE>5nNdwp?3)SJ6$5NX5*8-!SY`Z%(=i;UZCoWeSxS5* zq}CTig7nmmJLbamx15}kGs3)#`4y`}T0kw66)L_iD5^54y?y*NCH_oIDvVG9lu?j; zV7rawSMmGwfnIkqG9+#;Uy%Ctz~A5#<_DW7r`&iOF{9Yn$0(BI-D8A%zHYMpsJ{8b z#x-4@ZSMT}mj4L8|DT*2o*2JB+ej&T_Re<5={AW+r2n0kGL@hEB)EO^roVyLHzwlZ zM%AapVm>w)_WkZ2q*bTpbsF_XVlx$=X8J+#`+yb|bpKMn=VQ-rzId_x zDqisL*0^VCMgQJ7hs9PI|dXh187w)<2b1@$jkGzBYWzzEbnlU2>63QA`waBMPOjh%6qpp5Se~yj6ujh7 ze;-ZT@;vK(IQr{=7{ zRAW10fs9ChL&BK=MDHW=;%V>R-`J-gUAtRsPuuSiJzIR|t~vXgkv6d^Fa9H~^$ZlE)mYJ$yv%}sc~oT7%%SwrxcW0;5jOY*Ms{J%-w zizn?Dr#|bIi3r-1i#CMrH<;RK*Q zc;ZYJ`o;K!DQaF3&vX9fD6iofRr_8))y4Xo-FN|uDoP{R{G0kzJmQ1@ zKb-8?f=f(F>WwAU+vrDu=+A#q;1!2)r|KvmIe^ODu|LwVsZl2K2=Q?TrvIRE)bpV0} zFHi@$-Wx}4sSL1p0P3F~wH(h>87ws0)$l$pNoD_<^-pJ$e4_5bBL|BuSAfKiS@H!X zP(X35ktYsa;BlBD@b=cVQfmR!34rRN`Em^F*_VsP2mC^!(!AL{AjeT;9C^l`?t_tZg`KMs30lOJ4vgX9@+x0)gotKYmSIt24M34qp=YVhj)MNxi zW988%bJ(-$AoqR%5?2&hZ&0)tiO-fZgJc3Lp}pF%3z#1?5`HyScfdQ43b|+TJFNlX z+Y?5O5}*t!*Q!K*!bp<_h8211901M;NhnFafLiJyA=1iUxfpwtU$N*7ge1hq#Tfx7 z-x-7m(4+nBZG2qZsn0gEzygtL78)FW|Li4~HOF__TA!ZespqCeb#8Udq6}nnL9$prDsliiTV3|%9S+dy==(X1YSYaP!`#jUN2i)|2Ov1gTHpKIkL?d$ zfS+g6Z#)5YvN?io$f1K^POnFVj{BlI_Fj*_hu^yawhg#V1QBRb1e`r!|6w6hjZS7r z-G;9wKMaGKFGHW7J)lB5nMIG6j?NO0d}BD*6TKBHK;o#B#>z6E&1r!goi7oD(D_Y$ zrwa%hk-FtQGr;WYE#cJe#>Zp2Le(ZaB9U;KBMI}6iTR>3VbWrkl{1@GOtNv9(F?0+h6YCO-)TAKG$m4Qfmti?J~X**t8oN0?r%abe5MJ8Sf?Q z&{svgFXbC1EFfs4g19_pzNcWhsiVpJ01~gS7iCXtfNvgcw`vpey!LMK^iwhoC@ceL z-i?u0y^2{Y-1=yTD6tPIJq?X9Dg2(qy9-H89ia?R4cy)?ri_>`u;F7hlpdf-$bdNM zZ%T-OqD1wA`D}$$Q3YEH#zt5tnTXexz;a{b=2g+Ec+HzLOA~`4+9WMe=6ZM(yp^M1 z2##@Ne~)oirw42(H8aZ_(MR*!rRvyibX|3}FtJ8K#jup~LX7A@emc?MVALZ_z-ba? zESRc8`1@af3&8&8T92e`8DVo70M;jpQ5Cq)ewuo&$Zrx38qU zV;zIyo;wVY#cmOSCtmXVRl7colpWGRZwpRdh?jyzW7IAq>u#W>v)0O@%BYrp0 zOXcH*$c&d1jLgBzbQ%)v+901mWxAy}z@gm1d%OJm0kMB)XD0%Oz741s9MwCrD1R(W z6fwly;u7#$aUL12WF#1FpuuTd#|%2Oh+ww>K^`YaYM;W6>DMCBwXm~W;|9yHb4Xgl1fK+SVP?9m ztkSh@I{aufsY_<31L_O2^Q!O4Lw%jK=vCGyrX5*J^tnDTm3a90R}a6Ae*?P2GD=Fk z?bKY$@TrD^@u=dWTMupvn=G zkT73vKJw#Cw32y%&XhN9LFK)D8rd=I1=eMs&*A4;+=d}TDqd3kc}LG+No?=uAuobL zmYgEj8gWI!wu#rM`;J4y!lJHG)al$SO^vXUS$WwNGN@`m&8W1~+mjUuAR@tS@C0>> z#*e_xpl(wejYf#0#WzmWE@+BIe|E30efgFrMI^+;C_Mf#CD`Z(Dm@xxG^=3<3o-+} zuDZYpJ3_Ldi)c6X(*0kM(EPr?GOD9JM(~-qJYJ5|Q$Haq46z}fqMxuds&@GLjjqwF z`IckNJHJVkVFM30bk-5c?wRL_Xy%ptW%D99YZ8V}snVDItHhZ}V>k(Hyya`Iq6*52#%XaSgr`80 z-W^L$V@yej412n1;tf>@N_r&;p1dcvMlrum8UW@H4erS$nA&U=BImQ4$WNA5#> zQ-9IIp*9rCG_?vUMnZ*v!_x3>b9?tl5A=R$B_P;S4oN9ZVu2_lga=SU4DTlPAa~0qo{Y*pixa z>`}Y?Z}{?TOn5uDC5h7H2@XRgza=UOxF^&X;#$ae5JFDWgu2^3wuEW@352hU{6OVM zW67#8CBH1OTcy@X*UPBz_bl4-$QiB%l8DAt%E6i;2$FC%z4ajFqwT%mNjJ0n`Zm z9F4IOK+@_92b=$f&EO<9XG9p^Km5UCgqZ58%l9Y3_Ww6F`p63SvplM25?S~Ek^=0H zO=-xM{;AWa1;(j2VX^pl3vn1a&@|&~cjn)3^)vvFQfjVP*hC7S(`V&lMi2jYWrpK7 zB#J@?Q{!T{P(y^hDMNw$?{)I{`C|PG=lftQl|?-UI+lT+wYAT^Bi}yG6EvL}A1r$J zpXxpS4T5yq6%(yMkVvV)Io2_u3zq5D(yjUE6`8#Y$t@2YEmzXe*k-bQFF7ZUg2&t; zAr1Ih98*XHD1|usI3HfUHafslzJ1m&1V4~+?v>hu3s(C)a4r*;-9q2}{oQ+CTT>_@ zGy_?-G~)4cyV%If5Fgiy4})WR|BI`$j*2?$+BFOV3_Y|00z-FqcMC{}lz@~VN_RJc zDBVbhl$3;YqjXA0cSyH@=izyM})|If3Uo5E13;{-A4Ia0wT~(9lQ@g^H{xhWHwQ}n(NX*TI>C5&+Tg!yrbFk zl=>RDPB5;sk`$#-uCKqdXBgDF@!Q9GUMpw;$GxOa*<)^Dmp>Zx@(fGh>Zk#klm|Jy z-Y7o6>L=1dBgzihJOG#csxXd{HFtEO#O&?1At@ir!jd>Szl}rdv1E z+leB9D0?6mySB3mK_{-ZneKeDE*fwGD?*olMLb(C%xqM z5sf^d`KwyS4GAm~3O^?o#KKX+3RLqj*yL8YrV+AX5l!x0T5)mb^55(6H2Vch$|bMK zzdGv?6x8;|wgEJSFR18GC?;$&-h56%^7u$8E>hkE`bkAbY9sY)b%+y0Q{Zaq>(&0n z_L!`XJ^1P`2uKuPuQfj_rrK%O1`Lwlt-ehFLK3Osu(P+<1hS@I!T{JMYS4LWcty@3 z)Tb*9r>nD*&Wuj3&+q>tZ$Ata-Tx0<@@6y}d6HNi zGfT{qXZRnbn8y*Ijcn-BGcW+C3(+B!?^$fh2^iv2W#n8QF5%HiQE+o4hvy0KZDdOa z0~!Si3YKqhP{!p*k`Le#Fc z3MtmPdO?z>eF72hD@*V3BDE|8ggih#K>clXR~%;Ge#V=+m52}^=c zNfEJ(M+N$**|YcD4YFJOo4={zzl^zy-uBM^37HkG!s>&I-lRW$jOCV z4fk|@YM^J%8Ik;Fg!nMP*?I*zN$H6C_>sgtlpu&c2abz}cX>2wc%f+=X;{ellr;xD zLFV>wHnB4M)QYW_9!}vMg4uy3bxl5ud_QWr4m#tsOQ(VqdKaHjB&NKL0Pm}Q=!d6F z7W;TX1x7(gg>78%VP6+Oo}39SWKhq9ouIXM(%sefg|M}$&9i=Sq2e9RVd@$POee^4n9@Ys(#wv9yfCO&J7X!Q zd3anwEN}g_M)&Yt$3CGKk_K1aozegq*CZ9!zrGKTQB2-mk!2G>K^OJl)dQ9;z+;)N zbb3wJr4~z@02iBBhbXwxktGgTNZ=@9H_U?!U5iv8MB&YRv*=BHs#)xqfxk)pm_6#n zPJ$@AoI(^xxt%Y#FNH?(d;M}@sLXD#dHi43;e)f3N!6Rl?nxQ;VIf&LvI0o##^sz$yygRsmtrjFSjpI5PpIM3OV3f} zzuv3=_K9(D{PNF+m7mE+wiH44@Bh+3fzECJKE@?7jlEG$ny6kbj_2XlXkb|H*XYCb z?ZOl@2%Sa#`da>U6djoAo?8^UzAqs99n{5l@l1sbCTw%J8;&2m!PGU!l^E_8`w6Ee z%>8XzaA+~W-^j3iLC<$W-;aNY2KtKbv>O{R1vj3V$9E%HSs9xEMk>BE2# zQz5wonkKccDz5{v!|sGWar@9o9YA;i_&nw-nmuS5P`54@lIamc@HPPcKA#-gc4RRk zO^l4s_+E-7IE#+pF!=YUeree%T2nj)Sr)RV3=cTjo2@CLOXbJ7qHSkIUOB<0#mGTn zv< zvzve0prVn=vr#i5LD)JLghEr)ffR(+S@*O%ez?xTj?aOKI3vMKc>|t@Rt+Vx%m|hG zUjIvYpnYZpn=qhST5KQ0q=74HKSHh&g$>J08iuHN#^{z~uuYr1f@?7nM zL7a6E)F=Rn9RUnW(^Qa*Zw@7bxJo@($gZZ?9nWwK{k!!srr+Ytn>$iUPnZ2OtlxV6 z73Vm5S@(6W0W76qz>seF12BUMuW6fS8OI~ZoOb+H8!#jG+l_GiY`M~9OvtUc;Q`%F zS@swS5&u~MRkME-iA@&Yv!+zf>(kJk_R@_ZjlO}NDpD=)qt!RyoNG&Gd z4kQ2DgGY+ZM*t}T>F7njoC{ME#XmDMNoEFo&^d$zVGvYG0^?Kvs9rkwn5V$eZa0H~ zBNB0j5DDi?J7wmd{TS4nD;fhd!^Tg)donhf`hEJj(HwR@U{Wrl-uS_FqBf*#ApTn{ z@IwI+w;o6)kp`8{rH*U8k!mklVD057pOcR2<*1n`{*3{_B@~JCDdCI1_C)HK>T2Vj zo0_DYacl2hV2(5KZ2oD_@jXf9NMC8vk$J4p7er@D2s!#FyY6%yhIofp4dqjRCHLBx zg4XG9QC!ZyJ|AYe4m}7h;yd)FW!4ug^KZTUyLEcvq^dIQ*G9M6XNeEXZrJ_1`mJZ5G5y2 z^L=vBe4i|2p6^1w{i>}_-nz`7rl5i0zpk}X4C>sA?)QW}VTf(I0TerHSH&lJEk{R3 zfMCZ9Qj!bOx2}%rx*&%XMaub+Rv4b$0!;0&5D%bp0&YuJ**<`%Z3}(;AOh%uC%!&D z4wzh4(!@pmBcKkyb-aSHD6X@F=yuC?cC@2i$-nrZF&$8Y$33%pw^?4Y&ks(u%(cLe{I@Gob8~DrV$o5>)s#gRR zQSjg&qO7PPDgncJ`0`7WeWNtmH`qzHUUC!@NY1zO_(YE0KqzpHrVlZZY~;7 zT@s(NR-KxvO}Qnj3N$kP(<|BOnIflBmA}7YthPJdf>Il`%}+I%{fJWweV)wntIVWoQj3X_aiTr8Bzy7dvzx%DO_1pMli+25z!cR5UzFal& z8_O$k(7`^Z3y=-&CZ!;FUN%i%8bl9`LPtjqe&jP7+lxhs8Ap)DXG8D~L=lL+%Rxns z3hO+lrKUzgXYV8c<@ORN+h_v|550AwZ>XW(Mvq9R|NZIlp}!-kaKDD~*k0Rmlk%u? zLAvp+oAg!a8;p9HxVk@|JU52YV|FgyRq8M%Xr^wGzHx6IKfPOK_s;#gpmOW$Y>`;! zbv|AF&vm=;x|C+vdQw?(dL*+rzsADC$+efNIr!5#~Ad`zi4wCO^2=VY8jy8kkv{NisK^i zzdc4j5f>Lb5pp}{nwlOL*k+>sCWln{Ad zIzj9+merCGaQhIl{QkGn`UWjEcI?ZBfl-K4WTZN=Wljh+8cxEkoa_6+)YHz`_x??| zeGwGLi*BPh7Wa+xTAyu{y-q8D^r(+UZ~fi}&EoSA7iO6c7%VzKA+NgN_2`!O`pY2- zgP)@xaYIT)a|M{_=-`Yms~`_U4^ZaGE)8ai>Me3iZ)N|;rzoRDMC(hii7KZmH3Om5 zgH%=f#K-fy#j_DGRLAxEv-7NHHA%hbXRPc}eaZ`*#VDdl&MEWI&GB>V0WM^}gLIOF zxuZAA2xiRHyI|>SXAiL_q#UIQ6ZYH86x(BG^d`z^?^P_%u+(3O$62R6llFE+M~8yEs*{Lg2&ekc`DkbO3I_4RU4jI>m5trNj{O=51}JQ54~? z*ck6_L>dF8)iMc~1wXhJEKmbMd!j74Sd%OM>n#eU69 zEQSV@866{U>%*g=gk%*~-wmtrps7z6RG+XK|LGh28mpM|IEA33re2(w#hP+>8 zqaEStUjO~x@_W$!MVpU2miNy$ZhuFby(P5$5)aOP;>4QVJ)$K4%5L)0d9#5=04F(M z7!0BRVpM^-G!C*Y^&3YHqlQWyVzD}85>--eZtgJA$lSd1CbMNn>K}%JRePHI%Z~0< zf6y*Q_ZCi**jMzwH&~C!2!C&NR@IVujW1#|Z7tubK1nHr@4xOMwm4#}e7m&t4EQrE zQVnxmsm3Am=4HqqiP5GVg zrum;ahmDsi6d2u!?A6Djg>S#9?7{kLof8PJa_K2s)h`U>f&-aP?CJD_9?cJVkrSwW zeto({NO=CTGxaO!eAse$3xJF=YZodJ_xAR--rwwLI=ztM`5l^et?(?^okI|cCck>t z1O3w3{~E`tT`ogYZb(`}Y}WL6(BvFePggamv#_J;W!;#Tc>Z#&&cm1YAEM;$YyooL zdQev^SuP^{B$?##CeZ$7QvXw?GBbjd+8xu2O6t{(M-{6U*->B+(957L^wzv@1azmS zm+feCRp#h?nv2bzX4um5me~e1^2?5+WuNh6mYXl-U(ffpqMmLw5XXI>r+QsDH|6w1 zG@Z-ZMoy)q0^m+I_Gmd@YS@nNd7a+09@U3^YVROC2J#>Jrt9I{G{c^*L%8gq9k$ue zPb&(fw0l!$y)3b_+*bdcguDO_j`lieW#iV>@fsKi1Q*1CVMX#wFN{4tg*FP_=}#cf zH&tQmeRZ@F;Z99ay;6Au#PHj*2@Rm64ZX-rN=V}_tZXuzk@S8AV)22^MMd9jo@9NM z;BZ+n7$-MB$PmhM*nRbCPr*;sis-LZN3g45UO{D!rVc2IgvM6o?Q*F{#JFol3r_X2 zo?`!0gc>uo)xJto6n=rvPzuugl@R0^dJvQB6Wx_@$q;ef_3#w0C-234d#i`~JUu(4 z@*JI9k9Xho&crh5p6b-DGLTCYT~g5fvPjf}hDMu~^35{m*5(BtBeycSlO$>?5E()A z_UPDcXo`>pp(-hu0JxF;WDuB0<+}xu!Dh1}o%`GCn!;ywuQ-7YeL*wg2DHs_x&WY? z=V$P3cywJq_=NuPuEYN08nVG?*rdywkid1zfL4e+0B8c#fr&zF0tj-^!PtP>tdhu5kg)7}K@EIx;-7gHl_4MHmca`i$QW{G0xp=X zzrX<9HGi>+Q(pzhw5pCZ+ywobP&k5U7d>ab9a&U%u9BmEE?^z{S2!CkRjOS)8bu&EmU`a%AKHYN(9cDex^XsLdl?*88U4yb&okb zE`Psa6pmzEYrLiJL`Me_c9as6;r;9P+UWjj`oG0la@f#@HjQ%ExvxHCzOtG{f~!0(DR$#OoR0PxNUYV>4syUS(?}5C->SbFm<`_K1WQtx3c-Zazd>R{M;fJsSsBD1QhAy zdH&;a-6_M8--Q@#ySHEh4(bIB)eh>T?Ht+g2XxOnY)Y||3UmQq3Jy)-#273wIoysF z*l+)olgp1OZFP#O_$O1gx`|H0d%wc~`N#BgfEc6tlm^IYy}{BNFD7zuxo$@m_9R|G}_vbb#dXgFj< ztk2cH4?rdf5Z=ny?x|u_ulp2mbAXEwVjSZH!7YBMD%Ar`(HUic<#jeIjvtef2bwnQV0b^h(5pIHDb*asZ1x~)FJF9A>?Je_B~yUjp01vxj< zyJi3axk3rIhxmmtFf!7j$J?g_1=AY!+FCIKu$gKiTq3l^>pTegU~O#zIG(_NL);J+ z2LIQ^B$l#U3~V|E@H|y)$rl`INYj$dcvirw9#_JmH|~59LX-G&-DUl_Vu_63%j<kftzEirXc*?M(2;ys~7XrlxKo*7^)3g0gAJ2;Ehx zrSe<}k#HD!fO&|fK%@b<-!O(k-md{(C!0yDkz+D&#gaj}Pz7(JM$7-2b5qQatc9V? zxnK9mlr-V>;ZnNmEq3VNZ!3>o zu1@vr$g7(XDpuAI66PF$m-auO(&KXE;sgr-9P^rcfdCA`s2Un0M@J6#5-w`8`@bTmKRE6VM+(cab))UVvos#JQ)H^gRtn)8YMJkow)e4Xe<4I66 z2=LxuQ2hoGNIc-gB9I*h)!j7qq9#a%^fE4_!Su5g=dD*3znDy{vkL>O>vfRehv429mu!6(VCa z_ur2M!)D*AahjIo0M8J7Q$zTA&D4Mf?j&j1uIOF~Bhnc)|1tQ=GE($)x4@UKu=i$! zvVh=Wvowi!?-b^a<9t*UxK>}_6Ve)nMW6$+@1L?gj&Q~47UUsI1U7fZ13kcgj`~1c zUpfKl=ni`-53@pIb`_n-Zz^=zTy&X%{P_SMKs}z2t9vyA;B(X(pSQ%^Ozoa#2;pZr zg+9DzbM%(cSWc?=Zk9VxW-Ml?E-&Wo&FjdnM-_zV9^C=YVoq*Dax!Tl5onG%NBVA8 zz_iD)KToU%aJzvOE4W!K-i82%qp@={T>ckNn>F#+KrHWt70tm6{$|TjLeiZ*9o_Eq zAp1LkQHuK+U?a2rUhn7Ucjd|0((uTRjP$S&U|9{L9J=Kwa3{!MIHnxxi`Z<~GbA&H zNa+LiF&wMWA%$(Si+r(Vt^P^IS#~cHh@u{EqqEY6xV6iUXE>)!)d0I>6r zUJ{SW#>i1+k7!ba;RnT~fREXUAXp(d_#q@<@P?qU_k@y7U9#t_+zYm_30mh-E^9_+)` z%SCS{*f~<(OFvRv0Zr8g)F#EB_OsRWP7*iUw16=zCs7-kK=r8j@~C48g+%oM8L%Py z13@(>|5FOfsiY zDinM2MMOh@uu_orl5Oc^ysMAdCK`1Z1_a-GfHntS8_chI-Cbc(0t`!B8!^xEH{+$O zV8w$Zy~0OVwhyB8kjhWg%2h}n6ctFz%p;^bKn4VH2Kd#&dy0McH;c`})6qnofPN%F zmjf&C$qA|Sa0ZeHGvRx%27-mIX8~X?-rSt+yhtWU2J`Rm7vPxNu#~^)qw?=&7y3-L z_)7hPj%&A6^L4gJ=GW!Dd43o+nJ+o5LEZf`gRVgHF(BkSQpg%Wz_@Ae0k~R84>K8Z zfGg>B!O8-(JUU3Y*&-fqF$(IYpz}7fIvqt}A1uGREX^U{nWO=j3-w1DDIHx2L1Sqv z#+cl!kXY8j*WahAuy)43ZYMsZL4 z!Wu+i@uMN@noh1GjYVe7f@WEJ#izd}z8K2>b9dji@cFLtECp((*setnWkt}__qa$d zcf8l<0c55+UGv8iuknRYVZVf~yfBf~x;vtf_e2~t0)mkWPfYjP^^r=Y0bk$)G2Ywo z4T>@X>0=$hNW0Gi4yepK|BDI~39zYQ$(Us)P!obIbJ%jpk?oQXi)502wvHk#uLG4w zC4XwiB)^jl8}~kJAyiVLg)1Fy{JdPk=IBdYcXD+rwNlEX^ix$trq0Ugv z57~nqk`R~WHuNEE)CP9y+1x5TQNo_(2Av%`*ljZdH$oZJIt2eN5`Tf&$6?usQfC}h zab=}nZ^c=aLIedOkA(;+LWbSCb(J^5TbFCwZy!%JIBzPOS-`t6P0sHE==Uq ze-Md5SbfTB?u79_0xg_zEGoOx%BC4LG7%RhHRl+M;=p*kntf^GF8cKX3H)_51#h3o z$?LPfTG0vcnOatUaoL4t>GxsZgx3;wqETEprS@XURH?Au42NF&DQm~U(0shj>))GzfcH*=x+=k;s@K8e#TdkZ%{AvUJ&7KMoyw= zzSHb-2g5^JEw5#g@pbBCWRp*?QNnQ4$%EoLcJYILhG)tkze-e^3Qme~gGp~R$yOrb z5}pOP_59)`eG6l34I_^6LXQRnRnM?1L%5jIe6W|2+;a>B=2!Gdsg0m|RFn5i1|)em zJ)|Z_3tyH)U=~(iY+YL3d!()FjG?1FHj>z+7F=7Q@u& zmO1Mz;H{B5&wO^D(9QH7HsD@zg>uJ>%*kqZxZyh|1^kwZuyOr!HZbS$>eW6mv7i0(qCj25b#U;e*6yO?;yH>9_(cL5ET4lzh2! z$D(6`_a}7Gh1{>Z@6RCDklQZ@y$-Ania&NSpfRMuQhH?}O_;>ZBn$V0+m{iv;%!^1 z!B$9cy+g${y;U)^a9k><@_8vTE*-=OIHjejHh@inLO>)l$z{h5cH4@;ZyU?<;nbH$ zX2doNBD8*198)7c2g&?O;d8v&-3`%d@1P0Ku)A6T<*vZ^Pn&-SM>luB_JX1KT&X^9cM$5)Evk-DeYBBc=!tlHWvC<-j_0q8 zDiiQ$Mi1LB?whe!0ClVYLugHCz94^E>?3@pIZ^cwH8h_wkZbVUkH2j1t+CI3RZw`| z0d1iZm99ztEFuC$_JTiwh~lsqK@(KepE<-H2-4EVM-`khAn1Nuk2VU$ALtD=PRClk z;fE&t*5ISyr+-tf@^p(2AxTw5`N85_B7;n)R6s}!*K}Y592?Rq@jE$awhY>p!2h^I zx{6w_^@D|AOBn{qShYBWF2wE=cQiE%GzcPWpBX~8V7@8O|GqON+P578kE`kY zUWo-hw-4dPp`m>EoT#phQ=qQ8m|glXv-3Z!*owbdJDQrR2x=)AC0MDH7z~DNRTQ_Y zjGn4*o3XZhz`$u7cM$?Tn-u0{r3=aMJhwSOgBKe|i|#Bxm#YZ$Uj6#rQ4l@87=M?e zg_fneO-Nk?g+NCA`w`z;bJ%1_Cq_=Cccaqyo3dz+y|1B;GR4yDI~l~Ri{Ge+AA>K( z=0)&=hapNy(>f5bytSv*`r(aw1%sGD5aR(CLdQe~Jqc_77~vKf9k3lb3g}plp9Klb zNxgR!L|#v_OjRWAYKuny_BV(=WsdobyTyoTdSZf={kuT)Umt6MdC zqt|1jST&-a+^SyA&0gsfAxbc1d&?eCT4$#|4BI-Pv2ze|c$&s1?PXueRoLjE)UwL! z`G+KOx&@0dA~*qfJ_?T;0Rn;U4 ztPZsr1DZHKyeu>fBSX5=wXJ%Ywv=Ozp5Y@Flp=|q2P%_wOERo_4uY|4{LVrUxXkj( zWDlDimNK(z=TOtkcHC6S=E@3(7hyyf6~U`^16T}vHD+>s<7eF3)M%g^M{m|{PPU(u*0r0cmV()0Tdlv6S z{>%&dpM-|%5tyJsgy>1w0oCnAKfb?6{`UB1So3Jl;TFHLC|=PTzYG~k<8QI^oq-Iv z?Jpyd@0sFM7-R#_VJRM~tQn&&x${0gJ|C$hhdleK)WU?o~R;b@p z#ca7TZE_l>JM_=x1tR)axnX!!p?c=eISQfJkEe0z7dR7#1f*HvhF9(vDh&RlTfIEb zclaDB36V)eu$!VnMO}AxIx%9DC-gi4Ij84*{UF7BKoGX-lV7 z_^=I>KnI5-uypN#L!Yp)fU{pOYe)vb^mA)!?_d~(AIo00+<<+1GW2q|wG>x1D-;L( zZ>F>Tz?d5a4f_%Ba{sll@&|17rf@u}+>IE7?8AN8k;a^8aMig;& z9Ru6}2fx%h4bEb=wrZGL33LNS!AD5n_M+HGzhHE>@E4dki8-%96CNv5B{ZeR4v9{!@`%3RyxWy_acz(C#ug{L{h z;*&~25JymoCWF(A$!lRCm{qvpnDiEofzXGie&=o#O30HZPh8RKA0Vi0j*gB#?#~1A z^{Xu~>?dUD)jT!ZZtw1LWY~=wG@HaPiox`+z!;?yq?3|zpK74^vxJxYa zXJzenB|i7CkJY!Ch?g;G?A?2D-s0C+qhT=uY~cM>x<6J@rT4sUGfOcPf+Zb-k=iKEc58^hC%Z8}yJqIA=&Y z6(((;zm9RV+ivX*crw}eQ>Q-c4kYtnef_9;)mH)IdW3j0c9)! zRHa5R5dWU|`!TB7k5O=R$?{L9io-Q~KKB9v8-(_+^GEhlU?oYCC44OEK!d`L&}NqD zm+LN0U4S!rX~4YMmkYtP;YO?j1IJq?hF+u|Q~4#*KarFjFn)9Zng)o`y?^jeWM-?zmFa@pxeg zOf256XFbO<{l(F+bgOgueVw{H>PxAMgZ#hG86$P6849AM@MV|;1qFHI=SM;e(qAH92mWJj^O1pbai2-k~TM@czk~~PJ4s;D7fF?7XBoiW_go^YMRmX zN7nncvZpuaSXG!g>pD}%Sg9Nw)t{dz!Di1q5v!Zjnjylt)#w-^(l7i+&=Mog$Mk|V zKfNV?@)D#!LmFt5sF{>C!zyh;MyVdHR+fSdkIb39`{Oe&aVc;{srmROO*zw_2hn%4 zf3ulDlUBMxxoZ@@E>Crgk&18@Yxus~e;-H}c=n9IcxbyLkn-T9rH|)8F7h0J(-kLl zS@fH%7Lom2VluZWrFM9YLf{Z22dQ?G;E%(VPg5h{Zo#HKof5ZTJLvqkH$~+Q`9;oQ zkUKn!MDUZmXsL?P4K^;*#Yl#JGkB|sGw03byBXwx&1Y>pOJBp9fXyV+Q%j;M*EbD= zqJGEq{p|fk#)`r4lO^#@zgCHC_=fNHMCc^-6Vq)iIE;urZbwAH10&6vlHNczpf5I-Lnc&I8lQa7p0G42~0Y+1AXg zIyvEYP7;&Z_<&KMc(K-u*ruh2_O|I@)|Hk2orocmy*iz9r;!y^)X|D0u-2k>zHqjP zBD`M}rVj@X5x04-ln1o}IUHIEtQ(E-c5~a|9g(un$=DS2*HUo*p#Z37v?;|3R zP=jyaJU8u8fkk*GXxNx&%Tq&gwEhFQsE<}nKL$rNCMO>(wja>$D7opgv|CP6$J4=$ zrecSXsrpSt9n@uCL?9Wd-ngCy%kLHG(rAWJGO{2zubB@e(mG~W)4c!wjbs0)7>Wsz z$$njmpZhju3~3zl&gqC&N=wL&$V?CxFNO9*4xgL7=t%e+Go)?=syNrJaOf+L^^7!hx0(=p-wB-Z7Of!i;hAjkXt#Q*IKGNyF=?O|dc z9c53R!U&WgOv*<>NE&RGMurnw|2c?{|R-N!^qsh$96`1IC}M^+TzZ@EWOx9OTVz; z^1x>I{*S1?JRdXCk2p;!=b3oJZHUxr2c%H~h;e7vW4hxttRi0{V8SW;*;Z!k)ESkC z5Ifg?c%-UDkHRi>K1^mkChvAAChs)}N5f?7C=%r}_PTQ}=kEpmQ}xqsJxdT-(nXJg}gPcE}835^u!lctnXl;&d^`1O8$_TTx& z__rKv@m@KMkX{&~GrXuBGe&{yY)9uLS?BV6!7+Arxt!NZ5c0*C%gbR!pMI{M|R zZ1V49`_mx{MhL5~fEKTibLVEu7wtai+qs_E)v&hq4PR5L3}WwF9}***@85iHSUyt9mxjOAifGE&A4%;iVe zt+#XHI?BMtHq;taRNWTl|k0zJllYWB_}S)`87@rvla_g#6jQET%8gs&;< zCeIk>Kkd~!GasT!Y}H>ZB|H_?<}}*+ZGGc*n)&aUCn;%Tm9Uohe>4XYnIqZxR$tLN zYlkGJVe0nqs)#?28(5a9sm%2!_r(awKEf+eAbDtL74u0>j68?+=gRhN1Qy}=QKE9Y z{^&m>)wODZorn`#VUbXQl$ zjW|7+drlPuBY;>vz_KI>>Yz#D{KJ8O@^2P-K2zM=RT;Oq18dceGF>t0Z zI03ObG%LWdz2$)I?WBk1-tBtB}NVRJE>`Y#?nJM9G*d?k%EmhJk+gSvqE_H$?#?;%;= z3mOc%jDoREIp2*Gk=sbOXx?OHnDFWD#P1=+_9@-_p)*s9qL%8oN80$XNGvauR3nViDyJuJR&V|b%LsS>XjQV5U!L8wOs~# z;yF2^nkB3aIno7&+#(GaFBmcWqvq7pZ}F;dk}=%)Wj9pC; zHcLs=CNt(4<5}0AiZF|`FxgM*9+Y^0?Al154PJ~4lBl(kE|`vX9Z9PeP9DJxz$w{o z5Id3cAR%6PDel`_>xum(XPfWEm|1{WcCFhtodLR#{O{F#oUGLCcM604hR%Nf2n`nR zBi(#At^$jvHPdfyEkDJkP&d@>Rcz?qQh2#h81>Hc9p{#b&6Uy~RhtPz3y*gv9(|Pv z-WY1yX}f{>X!X4M6rx$RoZG7&9o)*ijKio6d5S2h*uP)*n7Lr>vii>^W&Ez;sqd0YTPUf#h z^KN2qvz z*d$cK{d=p*2!H=u1 zo`B_=+1N{9V))c8Y;%|%2a3n{Sv1*VR^iiF{I7QhK~+Q?0_Z_#l`N3&BZ-qk?|R$w z#1ZEGn&OCd#L>Dj@yEG2s3_tdqX8pQq9!z| zb&VOf$S-d7m37|B80=EzcKV*%2g^YEGSQo$+Ryv+iumc3?o;Uf%fP)rKhESYlFSBT zMJAl%X<*g^_9Fw6=J3Ek2fm&F>d|{#BC0<+$5Or2^au;=$0hD=P&QA!(1|qLoxZd# ze`L8~BY}HX&+C#+wA|ike(A3lf#xdW) z>^%3gDq@h9Ibm>KgK@4b=3=~6UPftdPql`Z5V^Fuv}EMq8T((I1CbE9|6u_n-70Oa zGT1K|j(OMSdeIU-9$$9eE4vwGpmUMlrLNH1!8|F!;VW_%&(W+(+9OThIEXt*42~jjfEE2=!XS-Thukvl8sY255(cYNnX);S&rCp zG_3j3>}B3qRMGQZ)$E!}#dqz)<_*TsxpwsjA_0sCX{w@Y{m)H_pO1#K53AnJ=E}_! ziKi0Iw3G@+5ne8V_PF|SC9F!qlkS&iV zp0b5nyH41Gd)X(NaYX0y{d?b}FW&t@OB>|dNMR@rv>b6g!JDhk*HKqenrZgrm81nD z|BMWZP?<5n*9Sh4y!NHsAU-(w<)U5l^&_y6VRRh!{_pD0{)&4FbA5GuH%&kABl=%| z$yZ7}Uv5mTpRS8;jrCdm3`P`gV9j-6W+o0Hg+RFNbQwm6Gg$ov58wfMNYQvfCQZGI zBNCyzp;tNl@&^BsMji=;WQHR0fNnUF!u*}1T2N@3WyAlg9Au{ zUm&2Evh5RwWB-Oa$&@xIGiuDDf@yng}J`|;z)GxuLW zIst%LR8%4j%EKH^E-rwCUl$dHB{vBWKHL-8+0^*uv{faakT4LwYQJ5 zi)Plz*%`zfK$G?B2FWo1pVqDJal{;=P5~d@Y0iqzj>|jNn6+S$+%!B0}iWn+Bm8gOnOFep@ICsSk;k zwzeZyx&l*nnEv3H!5{UdlXaKmNh%(5r6zs)gWPgAE3@uEvt_vM%r)ATy zTEQdxUi5jEQ95eBBSJisC$-T(6SLNpzVT3z!~}mZ`#85TkQ{y&^9(Y7q#4jB@WIs~ zBX0>7|dlV0};5hGe_yThi^TE&HYhvt**+$ z)d&iC1wYvL8KJNPzCx2S@AXP7LJTVq_mRhQ!}w{1^il({L`F4afo5Y zs~Xuf%_>A5Zb-ljctC%LA6_lhel|58xjR+}w8%#CcGCW)S_J-7`7DOdVYNGK3 z8$o!I#tsJNl^ib0uo+}zC$&nHemAmJ(3JNnvDMwyFZtC>6sz;9DZIQX^~VW=vaIY* zwQ6l4yN2;R*&c0a`8Uta!7exzsbrdE1#Bn|NL}ctr5=d-xAPZZSUedkwftDv8|>9m zBIu7CP|S}EN80Z>i$#LK?gOlXvtvLscNIhG3ZYfEJ%V-Eynh|Q(0xF&iYMG16Q|n7 zJ@uNla=E*Cx^41FL!qM1iFoV%c(AjvpZ&+1!%zM{^bB5JEeGS(w)HmDW)$*j229~= zc=-9X@~S=Oai)8!^ml+`^#p{!k2NJOe>>AEew5=NT!#j`4kui<;|5b09sTAnsG6vI z_GU8OzM}7pu`bPknL42_=UJl_$}Vot#y7>}Yn1^kklqOwz|U0uV<^C~c5No?XD7F$ zV6kHB2(U*$V401Oi2a5NNeG)JDV|Gb7-kKIcNTwbKEX6{s=96lTd7{G18<%o9jjrT zT_8y)PCg(IycX=WI)5KD0?ZRD5Ui*nEY9WtszOkbkW#kDhC8)2gOhSbLuQv~#wod>w5aYH*J|yB@@UN|2UBjs6v53&3x)P`OT$V{ zneiNaf)@PkIFS#vAE*c&*8A~) z&sCarfq&m!b^vj$&1H8|d8^(DKn`L;=ZLVA)_5t^<|G6}__ek;>Rz_)-D$@!Za=;{pH!by*E44WFaO0qhM9&| zCOldAe-1RAzj3}bw)#qRq%W0Q2mFt+dUfX!)46zh8qJ~|cn-j=}a_oM(Ppyy}i&|R9nmh#3J#$%jZgYC^w2HTKxhTPZ3U<+WrHuH$dv> zw`aEbKeWA7KveDb1`0U9&_g%O&<)Zd2m=Dr(hU;QjdV96rP3`W9g@=BAl)6(-3Xq| z_y3)nbAK*}nAv;w-tW8Cvz}1vD#!CFKegclawY&m^O_dIgDNqHzIKfzE-zi&gng;n zsMfaP${7lsisxz^hlUp~(=1&(57#SZ+q=-LoIW4V+pp1G?$sPN<)r~2=lCW}2d-js zSrKKevE3=}gYu`yUh3nt=J#{hf(jSiIStKMCzgWP%`;noMS+cULC8O!4=}LO@*RVI ziLdD26$gGE-b!5)&CUf@R{K}2zs@IAW#jkrt4 zk}7vijsWQIvl$35Qwgwm{g>?0B@*2+Mm9bi70;St?s&E$>l7!B5E|z8Au=Q`1F`8Sz4 zLvLSnKy)bG`Unh6Xh<=SJJD|dmzfT#AoZ92z<6&R4n{)2>l_+6n0+dd^Gx{AO`c$* zTV@t5P#^P2F)D1YG*1h%A4@1GjZ=Oz(LFP%%`^Ag*oC~cb!@X##==y&S;Yog(r1)> zl@0e@eXvm0QeqKcU+o<0`uFv5vJ=lC^j72E^wj9LmGxh<3sQL)$9{6J>N85J#Q@57fBmZct62nSJ{HvL^MM=n&>eb$XkuB zlZMx!N38CW%=LVdpXtq|DNvELRmp8q96C;f0FXW1qojXCoPv;=}uVOZPiWI9ss16-6??uJ|_3# z&PE||YA=h$160~;sC77BKwvSw^WB|I zM<0A<4+?J<#%v8+HKfj$bx_6H{SqR5-^^l$-xx2)g9(F(tm~O>!+U#!s&jfHgWg+w z_%iIYVd(3T+d&p4l#S4acr*ajBH{S2b4@=h^(~3xCfnq23x?`mrjh2o7$Uz8ydqCC z3|0UYn#OiT9~qtlc24WeLId}{xD;fWuI@5(um5Z)ZBkQA7blitKcc{8Wsq_z!b=Lp zkTuo%*-Z^D5A^;rUZ6}j{jj=!7T(6F4_)BErKen2bc69wN)?n?=*+_Mx6x4By zlG6M>*XInCgOj9dJWe5jJiLY^RSl%lmGLObKPJRVwYc#I?26LSvHb=5sLd`#sE zyvjNAUnW=Wnp=gH6HtRHi#vdxW`05I_?2fLkDp~_v+u-RRBJ5?OtNDjIrr2Yto$q3 zpS--RSg)XCc8q=I3cAtYhLM?Ethx=~`88auSs>_Jj} z($A$8Zsj#JYO{HbE;=59GX-SGJ;JJ*T!45yz>fgRnJ)1zXEk(B_>^;^EtIUmp1f3` z$@#7)OUp>-xY*IVWV?xT>Tt{lBCc4ZNtQP+*g*QEn5d{csMHK`zs^b{AMML%J=@(! z^=5eFH!kbo&@B3UF>#``!Rdy6v`{z$r3|{QD$)Ce7y>Dc5U!{5k3`@TWP_sBM=B?6 zA?Bc^1_HG|k2tRDYMpmz+({wfc3a;s!XV*T3rQVy9H8BZ_ydS{JugnVZ z$58fbIo_AMt&?EOQEf^h9FoNfKVpiOF5?X0l9R)>xC;pnB9tLOE&|+CX%k7sm?P*r zyBN?jBj4u0#ADFldhsTwUA9g@JIKcrC=Ny}9L^eXy#a*;tCtVchs@w`1$7Bba1jT$ zeNSbXzGIWF)l{L(sN=E|)uYX7u)|ERbD-DvuQNmu>bCw??31CV*&GrInHufR4Cu|3 zBx=Mf?=_M(&zQh+wSW7B_rw_|UQ|>hn~q5B3#ym%8|0NJp%sVs@5e1fLU{>^goV8> zElnk>FH9YY+i>eq^-O%uqd5Xn&)l4&3+bCi-e?pil(0O@wUY_QMz-2_3|71p%}zHD9+jpclh9mi1-%F zHK_H+zvje1it`NQK)>YSaI>f~HRzFaK(dff;r<1Q4g${u8dz)>3X}Br6w>c7CBX)j38Z2|57WYfiDcMxx?yVTcNy_8$fyciZnT&l6Gsge`C6g75yH-+G_vcsNR#Z@K;6eTpY3 z?!bC~Q82aZF2$*c_(Vt&SxgxIg7=Y_pXmYX?+}#IW6cWN+zG?{pXWaz2cLLfaiGt- zKSdd{o3Y8;OQ8ErS&eQ{ZW1dN;nc()6$lo6O=oR~)lJ(wqr>8$F z)q$cl7(%KGe&aDyZAbNkJCt{_HB^C7(3kM7SKL?fmoy$-p^em?w$IJ*8%v#EmtUt) z7Fvc`+g5BlS)==COglG+Z@c0j*1gN(h6-yQA$x0(4)jzNk?*QBD&oSy^uAiI=tl+d zR^_jnD_@%k_-su$tFE&@`c`B)Ri}GmCa+*% z#qd+go>~4lUfUtS#p3Ct24>}@4RD^3K*{De67tU^AN~4##3tSk5@-tfRWbbO$Q#kO z!Bqbv)wls%7uk5@9@zU)U{fFHSH!_8{j}utYCD|NNjAWMuksiC>Cz5uVIuf*!?!~^ zvyDLc`o}9AZEWhiRsZz6AL+_mB*rm#uf{Z)cbhsfCVTT=>qrUH+Ce8+p?%8h6*Bj} zb_T&bHF1}S+;~ly9v#(Jj#8^qK<}0U{)W#N{91rF6Idt=7)izajMmz*PVBaZ>&oAa zMz-_W6>kv23r5D?mq953hKR$f0l@|-#*XGL59j;9tX5-TQ36mqKFD}U276Lj><%P0 z|BA^YDg+6Hre2^xnn)be_68ZAEekr;4X&)CNt^$L_{v86QxT9QNsHYv(}nA+27xN} z6MRcA!MgT~dqM5yGWXlb3!<89q6bd6T|tU4@_!_>cODImShiplRRr(lc6N1DHO|#o zsbzFGxXEaR&9bF;d@`V+lg(=uyY4fxkivrt2mGcFM=%k~zG-i$z(TckdUo(2oveLX z^-jkN4#opJO%7Kf!@=MC13Su8&enwk4}YC*NTxcHv(v=$;R3KH1Jd^A5y(X#&Zs%S zDOWp(OG<{;O|wmF#AiY}<=!*y05PDaPcn!}vez%(h2(BfR!0Ds8$gU;-`c*~GGh*} zln6!w-sh48f)pev{skV9gNYXM(;fCHIg3~79bl#H7?8O(6F^c;{7mZ%<&BmECw~qA zma#4bH=zg`v&R4?uk}!33d>X;PsJZOWdK@g#%dPH0W;=D`a%rHB;Da=tXV?;P(k*Q z7YF#DoptFJFd{k48XdeJ8U!8+{Ss0WJ{QdER-Vuiqt2`fTHi?*pGZh-Mlr4Gl}J{bbeG}y@(Wa(E-by2w=S!Ir3YX; zZ95Yu9w!+m4THfnUyI^IDpv6!6aNN^eDNC=l%y$j9K>ij=C8DUnrF^Xf{NOeY;_6n z90elXMdmBd^AsM7Qob+3he6hT&Wa}Es5yRsM9hO)hRpdWhv(s*< zzt!cPP<5i$Nb`nLb0uiFC;!cn-KTbdr-lPm^GUTp!senYGza(JX6$Q7dB_ElZk??a z;fnguhXI<3A=cT%By^4ts9~3rtubJGXXtllKEoy=^Ekh8d$p*>ms~cYc_eLFqIq;3=O1|7xDvK}sYzhIBN^t^Q*5^QVXb z7ncOSvJ;KMR2f(Om35w!a3%B1CcN6C`rm_uONuagyH8OFc&&UawoEq(518sWezV#58hq&$XuY^~Z=!nJrdZ`g5#Urn+M~|7S^E*)J zLNGhw318p=i@N;^(^4xc)XU$6ng_ZY*$oC;=Fjdr{dF_IXj=!l+WZ1}p23Xouj^@@ zjr;_JVLE)CN=rLgaK$`@k%*aJa}-F_c?NJVcGX4-*y%vO%%~827nz<+T>*(v1H$+!t}Y~ z0F#0zdqk3WL#f8}=Hy%R)f7tiJ!vJmRF?L!eC=En(;?gd`m*O_-79pN@vfiPbt2kF zj(Jo=Y8G)~F$A4~*!`5@O_q_LBbe4gHUQLiedNU_0?f*OPWQ8oHze&<5`4z(2-1I^ z`6x;m@Rs^|fx|A@euEaTk4G@am6z#qK$7N*jk#`kfqF9nP+4IU7#m2Dg3baLkvCkb zaHosZ7>|y72U8C*ug=Pu08rCgB5+*N5vPAkQE)6y%V~6$?<3eAj2Yc_8@s zKdmMiwB=X)9_)A*0Rso_{7%A8NB7W33^qC+(<8tDa|rELg0nK@5ozu+rA(nI#ZQMy z|4ri6rMaTZ30bS({YZ^XC2lB)$EWplF8r?1Q$LB=`@u#)veDPYLDPkuEu8b9CPzMv z1|dEG=aLFvrbA5>uv(iT!HY^b8x5p@GX-N~vxSY5D>E;wd9X)7UR``;Q0%12xrTTE z+{g1FU_^Aj^4anQIaCUR%Jg$5*+0N94QRU!fiXe^&sFO5^T2I4N{y!0YCmb37^jHn zQ91E(iuMK0cdto!l0d7fD^=%0Nvy5rBewy{pPC??o4n&ENlliykJl)kDxR7}x?*|- zD*Xp_>V7Zfu5ZOkz8x80>1=a*R_)g-^QM2>5YxXo8sg>r?Br=KEfG5Yx}5soz^a;^ z?vIt4h&DFqK>UD9Tz({vuM9(;C z*F}(%G?r~tO`MdvfsMxycLNAZs*gi<1|Ve>skj0vu@Sab9I784v#dUv!1MK<;502p zKxFsj?G6_{C@TFoG5mI7;|M1Um=m=ZBZ#8BZ3k$&82nr`N{D>|5F_>G7u7Y!DrFv2 z$q*#(%AQSRA`9uiYNry2tM63;7V_9dQXnSjYclVFYzd%;3&94x4C@7m?%T|1+Dxk8 z_1}mpz4N}WI?^eobG!d`n;nZ(T+IJ^oq(1z<(far{*&|NuhBkR)v0k3Bn7n&dBW8I zG}x{{97m;qc$r20TnS00hL0L>pZodfUKwi%q z4d7t}>?$CsBV^^H{)1`G%kDqAUr*;3Rz&J=TD3F|(xzejO1%sZiF?Zx`3qM`q=y#c zL@+}K2tCKWs+3#Dl7Ao?c4rehE6nmMHmES_sRCqAu2C2om8k+@sLTDQarKz0(?X(zL)7YeyNap59Ccy zxNwu&CM;aNNsJzodTh?7I4`=1MjQR(G&R*kh*PzD+{^1j|5b#KiQF>;u^ z*FHK^#jLbubvm1Xo={7L^D)WvDB)?!%#Z2wxczgMt2NhbMK!~!g6sU!wenZ8C+dvu zV-~6N=RqNH?Blhfc9yRr82bbrn$Nq4GxS8Lq49l+=(1ZlmJ? z3+l#_w1Z~5bGJt`4vtj$@F~{UTB9V{6tkF%vZ*pv|7!0~{2Oyt|H7%S{&_9z`=7`B zB&@#9AtxdV3#>fp7l{_q4}wfW7?MryU%a8c;Bdq%dZTHH<8w5*Kpod$Dnl0 zZjwJcWYf6^fCIO>=>6pj5qdFy@ko#dE8zXQA+ZDc{STWGC zT8zf7dM2w)>GECQUw5s=IoqS?spiY#{^mzpOADRFCCNWk*e6Fxjz*+HoieSWTCo-e7Mg&Ahjjl*(-MFE11RKqR~CFF<18&9W4^$lH6aIaY)`@{dfgmkR*^PQX${ms2ZI8emZLaZ zBfgoVpdG(T=igimXST9_P2KabDIdE_Y>Y`0yj-M80JeN>duWH2ndSJ=bmirOnm9d& z&5X}XZ~k&;b+vVmtt_-B;>O@!)MlUSTPrwG!1HH0_upWgo8_VOOF4P<6T`jp1-7Av zN;?P1;uJd>!1X{Xpl1;FGg0FE>2*gUj=fbHv8yh(fsB7w7+=(( z4y|`6ejWf1`kv>HUzSV5n# z$C3y6boNr`3Q@$TPk1i+>eZ=t^sjDX0Up$w*!YI2SG<1-cl3mt0eEa(-Wd1AA>r^` zIgW`|O1UwYPdL`_=LHp!xZj51g#9l(sCU)Lnz}X7?K{d4-s77OC2ptf-4dT(8GjC4 zJ?AS$P^0R7qr*sKP?*BbQH5LUcBCt7zH^?()4gK4$3yHm;WRo`E&oA_iBwz@L>WTo z9a#yxeZSd+fTLk}*E@N>U{kSvgyrDTyQ0hXrh8I;XYh#6x--j9fRB{9n7W|<+*xqL zvvFxvt~UGZN8MwL-jaC-Y9ULd;W=6KzY_f2f0u9Yb~}^%HGwfSB&Ewb zkO;^G>}?&D6ZpdLDeLsI8`?Eo0SC4P0)R~xx&&I_>m8Jh?{q7SyAm(NLhAWweLDoi z1?d_o2c3tY#HBXtQLe7c03p|ZtD!S;Mr{K_v%@;)V9eoU6)&^4LhsJH50SeVh=@Kt z5HoZOC7+x%IrJ9P3SY;rKoEQd0_b$#)ve)Ep`J5j#FMDko>G2{$~O0nK5@^VxrQqh%(sz%NtnxIqc zha()#X4v_4#v&LY_^H2oWIH7XI=rCZM-n%Td(`g`?L+L=*P$Le0Z1Zm&4JjXXIcON zYEt4Lp`Eyg9xk1}$_~iz`5+oB*4=!i66r4ed#h#hQiG;}tNkGL4=FkXgn)qwl3#H1 zJgTzQ3S-9>TSf7`cB6h98#Ba7xW;V_c?(@E3vguK912~tx!r`mCFIsxf5hZK^S!_9 z!2TY2w>q4bHk5X;RpvDiN4nnKwtv)S!$F-mYlUbe2yi92L!%fDgLaPT8R~OF1Ky$(u`NVf8OW0PL1moB3e`Wne$&%$O5I zUbz5*QmwIsEo+f<-2GgxAy3wt!G-VO#=CLi!6yujQZx9GwkQju!@ef2vqTTiJ9@Um zgquYW+|08PB#>aX_;-031Y~K7L%k>?4sK{tUt|7$aoFBC{#|S}~^0 zQ_Khoi9|@*3~A)J-a+TUguD^7k@q&9+lc$VxaQ=!F!D){!G!|SMTN73T;;E+f9bkF ztBU7ZYe1RkAuw>ilZd{|h4a<1dv`w5u zaL-HH0X2TD<=&x?LIMMU0ue&O{bs(KIE5Y41%3)8;Uj_q-!`a$sr$=Ms7im)n*wp; z_wH(8+58~^Ww=n6^fFwA`;dTlAjo3Wmh`>O)n~WgzYvppp1H5NxwXYbvS5HD3Ybf*tkcrYktvPL-!AcTz$DpNJpHEC&Dj9^$SP zfH09rEMOeb8Jxb>=$Od-g<*@0Rq=v=4e7%!?*~`h!^_B*j4}Y(fzcnq@;*i;y8%&> zfNJZOj~2T$9Ki}c)AfZ37wH2mK%3gEl&gg`6g4G=u$Dj!IH!;{>F%6MT&6_NXw@s( ze}Rq#q+f(aQ#8|dSkyrZI3(Xsp*iKSGjj%jIF^D&@sSn;Kx8z2K+>!Ux&&|TDO4@s zJHLqpaA#<1e!Bf8h{T|~rC`h8RB7pj2M-u_!qZhq{nU@k#Ql8qQmvict%$&d;6^~# zz(vBA$d-)_rXd#$u_OBisRRLE5Sv8oJs#4GRQ72A3>~MMVMa=x3mnHlmz)okPNxd& zd=UnbmnuXRdgP&zKmdqaj_<7GHr`}I+JfQsk?yVCLNM_iK`#_AA|lL~?fD}?HL~5G zG;jfgzSt4(CA^&*^cflRokyH`gQ~+i4PZ6UU?Z6>#E674VvoX_flr-U+YJG(XE)Ke z0$ML1d{{A>jIT!d`D+;GWKnDv1il8+zOZ8mQj9o4`JC{XxNuU8@uyxokZmaNMc2U- zcb}yy*_2#T>k=QjWGUYS0zdw)&0sRo2>Ffu{62qlz5k@Kqrv{@xujN+K-NTYZ-Yo0 z7^u(!O=!h;8cZamDZz*$&~~ISuHOI!eeV!O1VKqs5FJrV21{WR4=F?(20lfUqzgos zh_ru;03hdh+ES@Oytq)wbl%y74_9o!edA4p^NR)q3PwN*TO(s6af`r(w=+Bsf6+>% zzCoZMFe1F20MsusO7QIm7EBxrK?(^_d`9T;wIiAS_nH~_dv0WUYsu%|BEXFC0R(;? zq)8+9zt`x%?V}(Ox$xg_#ldqR;86Gj6C!{^L;&6*IS9C`)l|Oo|Mv=l#Q%JgC>qm$ z|KIBZz~})OoBaRy@s!pU=yFqGkV+vW>u6!I&G!n{X6l124VMo^LIYNfMMXl)ORG!Y z^v%oldzFYgjXdLi+43khS+snoAJ(X{uhFyqpsE zR6$=#tf^YA$;9{L!X9A$ykHpweCic>qRqs2I5~kn;pKi27ES`?XW%qYIT5f;qoc~-l1DAgLBl%@r28*$RNTD4{$TQku^0&;aa1vF(b?FDBG)ywa!7i?(JiBw2$cD+j|p2Ad)pLKK>;T=k&VmJ9D?i zs|J8maM!>C@H8X@&t6EZ?vpH(saXohN`$8SeDg5=VXXChcLIT*!=#Y@cOR&K)jP<{ zCA!=wcesWzdpCH&FD#83U7F-Fj9q?0K-${?F}^^vZQV&3V|4$7RBK{t%AKg~7y;Bf;<7TmKk(1FBCE~Z_ zJ+r%54cEibitY3w`eW$kb`5xG0bZYgNL?e{Z>%PihpIu8qVeBpi?7pO)ge{hyIAWM z@7Z0gCf61V-AQDaG0*b}VCvEw@`BrOjg4;0v1T>jx@=K8(n>}1@(RKKVWgi-l z6c<+s*i^N*w-X!rRK&Y94Zwkn;C7y)EOPRvUsXiqs6WncFn5i-WwUf&u-f?BU zUWRdCdgOhoXuOACs(4H4wby6`d821<|5>(m$i4>MN}%$%@;X8I>%UGO+3`^1=hYA4 z)&9O2m}zz2HxFX;EhhV^3U_K75giD!qxqV*>@}nKu{_s zIy(1n8C?xXi%3_U{jXLAes(gR3s-i;KT$bXMq$LkH4Jh;t5|^*^blPDi$N9i)LP{? zUQ`i(2%W3%^geoRrB?MKtsemV0SSg~zFY!8){Jts?lC7%YzLw@W{e*3JETh;4&tsQ z$(x5wRrQ0WRt5ki&(;xSW)hto2&|>W=}T{V11jYHscGhQLl283V!%z*zd0 zvDx+Y7>c_4Kq+>lSzGu-_Z+7T(8PgD>jv;d|CMjq80Sj*9QWFkP+5p}n#H$2dj>ILo=#e`=~CBcR9PP3e|K3`rjKp4illX_J zI_~8@hYv^}NJORg;fR!kw~vDL{awvB>SpnO;>RtyIAyKMlPceaT?)lXMn*>Fql?w; zTmY7ofF(;uuR@t_t*vO{-X)=PQfj_6PO(U{eF{n}&t|R~Z3RH5*juoUINhhWx$~OS z=T94DsU;u-N&z&xHxbl*v;nj`z5jT)MLDq$UuL=gEAJ1M%gKMW&mM7pZEBV`oWJ}zYknyFkSJ8nk}eMcNY1-FoIl9Y37ZZ(mA#McWvbN$ zLKg#sC5;^T5yRs#KD{;RT?hY$(O?@PnTU&n*9gL_npOc!T*pdt))W3!Tpaz$+ z9OzYBB%r$0l~;dG`%Vwx=W{CX*8jShHUsTm2nFmX4Ymw$RwyMM5JvjfwFP_{o|cg8kz?-^VvL3=i%onNQz+KJn)0fBg{N zrk7{#xuJ=s?AZvN)zuIMOrL)kP5v}g9LsLs$fzxrT%Ad399+^!^8dFvG4V=+s(I!0 z-e2>{b9P%%rg38K=X$zXxH~Na63};%cVs%+lW|P2V0Y{iHB3e5s#&UySa<{&Pp3Mc zx&gEBfhZkDPP6rsT&eughGYBlFNhqs!~|=)-$N+e`N&fIBR$-cN*AjVs2G{gl#Nt0 z%GYLcTO4BD#76d$9G%v^&`$AFD8wGt!r5?FPf>CiYSM$$ERp*etISZJw-Q7%GWaeS z`sT7Z_vd;;?T7xEc079a_rw;N8(WjlF=izemm!CXxQ%)@g?1uwM3hNmv4jM>s`X(O zBP>*WlFK&gXFqhum&Xb%)yRikt8gwVRtK1GlcL)8NUf!Ley<{b-<_5Sgbu~hq$BAQ zYU=+r?+G#624_{6+Yac;9n5f zaL32TKaXN>765q$;U^0U|Lgd^gV6@sM=O<+&0?{9~gll~@xjv&t&nvPMTy z@71%_ebM9ataDdeak;|03$G3zbA7zgrmg@uzSPWtJ!zG)4@xYPMcb2yC5g&2ECg}B_A`Mv zO1%^s{e%v6$$#ZGUuweSrMjQX;HpxBGb?Wxn^$*UBY$vFY1E-r2-ghif92p8u{#S_}-1^1hjw74aV_7_>5{q&xxcav66@ zDQn!Fa;{hdj}Ld${%xnUD)5O$t0S>6@#;Zc2=4mxMs;f=MX>-0q*n56x*v27sbuqb z(z3w{{z@4)!3#vC{C&51<=s$YWk|mznT6xwD{I_i+-KXynve>ZWD%9h2GG8s*cbEp zIn4S3IjM2YaqB7Np|HaS`xy4K$!~Mr8u=lwcIMme95$lYawTP*l+G^L?2mi~Vb z-_LC`*7MZ({7Ct7^dNK%y6&+i(;y}L(x<(=>XadYyZ4wi* z=6~oPjwffZMt|5m?MZ)TBm>-WC($!GNQ3-*Pv5nW6Ic%g0F;)V_( zk-zTBq-5)Rnf-`?akPP7e(RyG^}O2N9R`?0H1xc{BJkh-Bm%44o?-YrRk$NQWO_AbG(^S=JMYa{ z`VFaXOH}uJNPsR18R}tBn%(H|bAt8=%R{%D1#SXs`*ov~DjhGI)n&YziR$FE=)!~TzcRU8VR|+? z%lc#b7y4R02IGD5RhugJfnQ18$Y;9)9r3!q6Z>5_Ux-mOp7imorp-mAMq&d6!goDx zlR0$o+V0p^IvvHl3eZEy_ru6sI~i|WE_eY)puc}Tkt^_M9^j#24a%OPV6e|GsF;A3 zwzQC=27TpyDTm8D?T?=GGkX3y)v8lkmEed&5n64E$d^tqN49+k$+1df6uqs(eA@lE zJI=H z``ibI`jV-)`IP_709;ISrngpVrHgyu_5%l<_)&=OQ&)F#3-p2toy-vQ9~(N5m}xR}^V= znRfJb&@7`)7zJIAGzO@omlgWUL-`z4ljpbPxh{qp-`spTK@VXy>2e0{MV%PfI|wOK zJ-&T_=|#xz9`bpATP3%T@xB-DE5pNGTgBMPya00$d4Xz|kUzA2uMIOwF2!T)Y_Uf~ zxuZTizhX1tl>MVecbx!zItpv|hyREnrR6Y831}E_>XM%+V(8wK8%f4J*f=>*ZK)P< zYwXO;qzJX(XHIkTDcSy}YA6A4(Cr&*#|vKIIyJ$7{lH#jQanf;!E<2keW%t@!xQ8n&e4c2BKI}`jZ$lYez8xtO-#sWg0K6hNb4AIP4s&DT49-G1f(_4(G7PrVxh z*jlARUfxb^DDy%=@+-NoF_Kkv$U?KmIHmv-5kP?&64x*j)lf?in0F`@C+@Ygf%nTX z8e_|LdTyxajp0)3cgvIG-Vu4rzB004d)dc$dU?>Yk59C@G55^p{m^Ia^?C)alcgdk z8N#CTeJ&F@{-%-AyCF{JD>-c31P&h14(HGRe&!!W1ii4cj@g4*`>_5cCIz`($5T0+7!;Ezjk9A^Df3r{IlIblcny{hytT^6to?2rb-TYoQ3zMVh#@#6`K4 z0RdJnW@a=zz>yOps1We{0Ooc_0DBy8#X~1unbt5YVYwmq-D5}^P&c72K;!#CuL%p; z2GA;uCsi@EanY1Kv`G7bi1!z`r|6f$#q+@~6f66@J*%s$%Pk&}YYV(*ov~Mu7&>-o zCKj@)StZCjf@jun?au&)Roe9H+;S=h(3B-iEy((E=b^pz4Yl&q1%ncqJZ0Qy7wEsc z?oIS0h(jmqiin>w8`>%D81t2V;C}EggJUwV{matmLB6YxH(j3Z0CA?OF6F>jXh_^v zQZmCWK_J4IxPE0bnzD@|N3CTr4 zeiIWP2HV0cQE>4ILg0mcQ+56_A_=mHt+U-l4nG21^oDbx!-j<%=7CiVS90L5qLYo* z+U3k-M!p^cmmE_Q>(5K(Egs~>wPgFy2styS%eVPki*PmWyYLQ zdap;`w2e<^+nOqXu$?=*QyRDVE#i(#wZjXW@I;@y+uXO z(sjJ;nGh#X!U*;ASW6G-XjRp-5uBxw*`qc@9B-&^P`{|4SyIxuA_21KJ^3CQ_v!ZCe#ny|lKor^|;@4TtlcT0c4N4vIZgTf=P1LJNgiW8fm zmw?f3NZjqrXTds9pmXc?xF`qvj4jRMue){3!^xzdC(=8qic(O@_@>=CA$!Rm!M6e- zBzZx2aO21U7-)hf3sQ|fGAFPds*%wu${#-L`d91!@IC;fZca4Gd}*~4{-s@}+#g4Y zRJTRra`1PwNMj^(@!CZBG)okpP5P)BIhjgXV5yEKTT-Yjl)68s!fRNDjeV0*RN$zB z+{S42)#qVXOb8wv$x|f~O9ZAe-o`Cm&6Vk7_74GaXX9?|7Sn)JIscK45`S-Ep-*Xd z(J$G#wPVEWXfWTUf~@n7%QqrP^7G3%%#;-S{cSR zlj6hX5g9Kxgyxj*aFtENh@VWERrVjp7_MG%8jjsrI4sEf3I)-yeP3-&TP}+aXzAc|dJMHl zhImI~q$q?74LzdTveY%RC@$F8ZRPUnB~eaNnPQZvGfNF=E}Z;r>H+d4()z+QRz5yU zjp7{l|L^q_zxOgGZ=BdirBAtsETR-A8Bo?zTLEuE$>a{0}}4r-jpz-y!fDlk;MA zpB;gi{~RUN!#6nnUwVrpjr{&9?zk0vxMz^-z+5rWlgC)J1DJ`_rt^||({TMLTn*Yr z6FAucjH1VNPQ4!rIW5fp6j!D7Kz}(gqFU>#WXqTIPE_^GyId(!%gdlTFSJmbMvQ01 zy)^rg31LD)1d7qQ9DUv6yCM8fmMj5_%vQyK7oDf{lW2P*-Q|7=n-pG6&)KA_N8oIr zv73!v(I)Y7+v?N#y4BHZH~y1gI8jiD3T*8+J3Jc!z6zZ?8|m4w38wObO~fZUvrrJx&32nn2;wEwfpIESL}pdzl9G zNxXeh@}-q4Gw=OzJ;=P{#2c5A!d-;jAmdxHu*WBnkd4#5;qcgm9GE>j{T6YbaJ;;KH6*+IW_h0tBEGOR=#{sZE;O9WHlMe54LX|M2&em7|nmUteAgHMnSvy!!A~ z#RzXKC^lQ0<^#HfcnoG(=u&~6DjdofF8?Fc;YS7|{hiYp{r*2`0_*{xHB_DKMW0%( z8=LcX-;UgXY6t>B z(iC6YxH^<@c+=Q4a8p{U+StjnE~uGpkA+7W7TPB^j7sk=pkvs2m=K5k+Lj!i zER=Pt)E3wrW)ccL^~eKpR+x?s$^<=F@I^Rg3hbCg5AX9V$YgJ^&@O3p*4f@9-RnKi z@1oXnM?(-|#Z|HozRdQhu zsVE3nK>Lrtz}jLjslIK_JD%}&yFP%>Dwv6^gyhShs2uVWncA;F9U=%+s-RZ!8MCo> z4iTd~9M!3#p7m@qm^zA1DnWe2W8Av^^SO%Rop|8Mke@VS3m1g)I{#+~sNGOYltmX( z8kz^*72L-fFY2{hX+HpvHo1hu$3~oxuaYp0Aqqx2G`gUZ94Dgfk9tJ{oC8@i@>eIS zfNWzEi~b|PM9xe6c{%|6`#ZVK)bsLtz)2Ujy#)DoF{GPAvhCqQ@ix1&jW(u*)%~MO z%HQB-eeuJUre7_K-$*>Z= zwINCQ^p>5gFCOH43nv(8#}}F^O;gC7(HH!Y`mgRhdZySsOKwg<2L7YJFo+C-q>RgT zW>9OVc9L|D0=2bV&C(JY4k;X>jl{e8tf)idu<|%y%dIx;tyv^S#-zbR zi3WT7s*V>;gvNNIX+Ip01#@nC-D!?~Pl$PK$HXf00_~E*e7kDX#biK5IrF6<_DkI= z_srzIooJ1$1!htn%8XL$fn2z61_9!Ad9g0nI~RLuREdtJV$6yX7#~3f3acJb(E6lzOymlk@W7h;3wpVm_<9`-XUtkk=EP=8(zR` znM&Q@71PDFR?Pux--TvbPD^jyfM6cl^NgIu^VyBjPG{_6XTPd-t?hX_suk&&;M+wk zYQ+gj!7vaLD5<-{4eYU0y;rSZ}Mgv0+NpV|i>ayVTVC7~KqWn3=x9 z@}_l28%U9xey6jjxII?mMrJP7jK}-u=72wmbt_tgSs3e!zyX8%#qSo@P8;sNNXPN+ z-s)UKc}l+WYXDBIzUfU`c`bV3+@MhVzIEs~^MX4&r_B_Lj=of2bKqJ>8+rBNWuckm zL1Q!j!(P72w?lvJ$Z4l-I>_f1VLWi}N=z^TC2HrK%@r=CED))J8su=2LwOa(H0`A5 zujT7=S?P7wL{G^4iTK~2^9?$AqRhpZn8pkB*~nJ)owuI2 z?{%+xEuz6C-idG%7a`mHVzXBg=$qk;1Q+}J>^^ffb#0oMsCPz;-^Ioh2(TOmBA;U+ zb3w??j7h4NnnH4DM?WX)S2_yWg)NlFWg4%0DMgA#Kg3VRuX{^vqKR&j*6*l3Qn-2E zs@utPqy<0yB1e0^&rTnc)R6urKhlvTMPp@EY5p@!Son>ed>d@GxS?^AHz%Wcb&nvf zH zDo|QN{>XAUz|ck+!PJL7EBqJ;C6aG7P3I8(@CFgFFra7WSgx#+(vOH`ATUwk$n9is z{VuE7Ol?$YCU2kUFp_JWSWy1#+mh;3u}cmMiX437(xg(K84=3PX6CgQvwg_Lt?l6! z%K2EuNdms5oRy(p%TPCEpxGYFT8!+_SN{f0ZG8{JAgO@EMdddl^c?{$Vd~j2-3})% zvaW)-$b4qXo1Sf&bZ3n@$sJHB`IFAXC&zN==v|LZ+va45R0R!{Q(nT7Yws7aBv1@KEq;PIT~3Dt=#Pp8oTZF$O6m8I1-z zNA4eTQW-6b(T445?v(=^Vq;^YnlIk$G#-Yct~TV$p8*|uh>|Y<*xW6JmrjEi66(7# z@p@v(0cb%2Li*a%N&{2-tL z_yaEopgotNtP5mtK+{v;(J%`jgYld2Gz&tIPt~>$pB|?xZ8NjnWWzj)9TX}L0 zJZ3|#P8<@?@9nHaoaUARa&h?FU6ciZO;IciiZRQE1R1!KK@OhmM!cYmt_Hib%827a0o?;rxbRA@UgJ4Y#F#Z zI=;%u@dK75fXqT#qN~6kp;$ron(gPu7=n84)$6G}#z+{j>F5l;1qKPcJN2$hQptF( zX7|n^XS=OI*WSED7@G=7)a_n7dUxz3;9ZA8S&7kDnIF5_X-6WPO@t)Uc|0 zo&R_>Ib~~bii%$J#S(q1GY5}ni_*1C6I>Rs)ASDrz_Gm-=I3h%Dq_9AdJ307NuDn% z%8*q~639Ix5|qy#FaQb4%jNC}(DN$gcVq)-@(vCT_1#~;p7!EP+xTBh7=Lgn z!l@_s9Mvxlz3b6IH~>Yu5}>;X93FwXhx0-UU)q#QlmYAFvkKbOx{Kr!=GMI>eymLsq;!OGU>AFBN0AaO^wNM7y+bkGxuVU{Wf%g3uMZ9KL zz$%<{c$wG}oro68N~04LrTf!?!sYTdXnKK3>*?vK5<2xc9h{z$EXHTtdv(TynIf{x z+I)fVDA6gU*Zv|EBc{@x9YmL0Pp`At(!YX)jy4Ewh)C;ihp{rA0kUa-75QS+nUwXF zDI+g<&G&{U8Nj}phR;e4jN?B`o%a^5pgpaI2F|z(N>^0z|NM zS-KV7JG1yI0#Zac?@b2q=NKFq5PmjkadUB~`&LIuRrLlaWpxqxZdvQNZG9;NcE(oc z`9S#7O0G+#LUFR$45Mbm;<8p zz3b;tsg&Jj8v)imlQ25;#4nXokPt=E|D=lsDKFt>d(a}iyxC19&Ll_Sk?al${IKA&>sVj9g>BC0c;=jpT0*?f};hr{5YV+xJz$3EP^48`TrKhf?$ACJ495=$CS zPaY2oIhFwyhSlTx(Kdt6Jp2J={WwP5+RTj3=aNwB3r(Jio`qpBc1=x8mc@BDpeR?M zHQ!1vs;--t_El7OA7Pcvw8RD4@Szo}#Y@aIt$Q~*g>}TKUj_lp{a{&U!}J$Zn!cpO zd(i&9vJHIhmgBJcm-FbnQC~7ha9K*;54=KJuwa|WObYD1x4`j4a(NQ`DXb6s8Ncs= z#gr%+`%1ipsLR?wqH5GP+}h_qKMIA6V8__PkWtNd-vPAI%?f70s+tS(s5&#UmfkpW zIr4glq~>v(`V>?X-_P``SrRW=%d-PlId|-5w)XbXY#e)ITkK$luRlaSLL9|sUk){y zUu7dET(jnE2uq-+@2d-j5A;)T6BA7ig|^Wz6o7G2s0|$6#<+)fI@ESTwJ%uTMx_?}945_$yhM=y9ob z0RcPDz^n=%`zi<6TA(NcvS8Emol`ayXgs2QN1+EySTy2mI&aS?Ul`WUM#nbj88E$V zAgP*gtXH=t(z`rZk(K+@Enr6u?L7MC-)1nvv1dYSR^zp)&@_P z7i?T~5k&&gQH9u~BMA;)*XrErTR!U$f?2! zRa$gi1Wd{X1{EIESUlO9){KC@Sm7kb*ffw7Uo4i#=}_wUylku)2I7c2S)?UzaZYmN zYF-jMkuGDhQGLUAg4Y}}wN#=a^fAfg$cCu2q0r|DE(@C^Je#67V3UO;uKhZ0)*vQ_ z&Z+V3o&5QM$ptU<8r{$?r1H1R{wf+kfy-VJ)+739Mv5l2hx}39E zSmcnA?r<_v>z5EGix1Oqcs4Xw+>4!t`6UYB14~kDhpd!D*NL`4m zxG#bm!W9JM_%ZUZU!!nP79w(?D9m9<&du1dn z+Rx4Pobzxv;qdwn@uScN{!Bd*TmGg7zpfz0T}b!|g1U!(nTkHo zl*TVn8MmTTZ4VbIF*lnPOhD$VSHUH|NO#h8$q7hkXOT_ou=aXSk0}!5oL*=N(X| zk0k$uB#5pZ@cZ_cnEC&Nx?sA{flH8w2>*=x&-s?U!0*ilBgX$Zniee%M6gBlXs=v! z-$Cm84AI-{i&)tT@n-U%yw>2CpD$KOL%t;^5^q%<&)AT@7MO4ySII~Bk<|9In`nY+Mr9|^Y-BDuOqjAZ5$exY9 zb{EK$tI%omFq<9Qer&{MljZUTEz)+}i1boI)Or@pHBnq%IhCI<`gTf$o$1Tse39jM z<}xZt9`x4{ES}q!Q=yyJl-DVa$}E)zW8SL)zTy*~-kD%CwJm zMT91%zdKQIbp~1&QeCPzS}yr7jd^ZaZUmSm_&oFf))5&6*3=@N-{&olK{`}QMf1_< zGsI?sJonVSclW#OiGAVb<|c@?tUC2~{V{jC1p#{bg{dYuFLxAH@vb5B>EFJl+}B(q zEIdqAK3b)6m0Ev->WQq@a@AMKA?04}lxJ_nkRm^&e3;1q{ABPRkm@la%<9@EErlmF1QJopd8w35D*+;ME99@8e;QROQ@8gEPdakY0oo~Ip zy*uCP@GW<7#$8QEL@d!o1%dL1$gT`WwhlN6*ea3}Gh!L#%a8bj8`I@Fv`%lhg;6)2 z3Xuk1?}_Y|hSQ+g@My1c{MZ@8Q~@;V-a@H{^U!>;U)AW8SeIsc8`f zGz}U-uupHvyHsQR0nu}nE)X?4LT12r{Rf?^XO4)JGOahml3!9m^4$4$`GaJ`wpvH- z!zh&qmNtz}MlHe9{xn;uLcpKftqo1B~kA^I`UWCr2!+kt{ETdOpt7#dWiE~>C4;JW-R zG&B^*?tm`s5AyM(g&a;tl0XVtF6&+cDTzj!^sTarisSSfJSia4q9VNR^=Cm0P4=~P zNC%LzbMkR3l0a$hP6ypQ`wOiCk2vQ*&*Gk1Hg8x%u0~6!t-%psKi;2Or&3GxYt*IG zcjcV;65n;hN*&YAEbN@;!Ax=@y87YEANbS;;``kE={7&#g-Wt|R?W-avVR4uWT*=o zqk5`Mfj8QL-Jrn0gm-!e_y+pW8IOxi=qrhhhP>cKYi|cKM3GJv^wGHZ;s;7zW4W9)W>eb5M zo^}p);T<_1FRR1eDjJlhn)WSOQb_p)JriqphtieRpAf z+F#H%Kc{c_dICGYRS;g8ju$w1`oq28hDmSo$#7rhG^v*<-(?MX;)rk%>AB5DESo_e z`xn`NrXK$NqK23chcNqtM63k}@eqNArvglaO;72YFAT+52LgvVYpEKb=!dCb}-=aBXF-Mz_G zX>WlDjKQry$X#GH{n62p6<$;0-RL`J9ds5pNlDZEo5m(EYUydWc6K<@d2;zx+`4%1 z2^R&~g&j9bm!l`X{4}lEP(9k79gz3cH5gYJ-gvd+@?_!J+K;=RB3F=Kni$T2EsjSR zr-=RVKjHCYEYcX(7DDkON_JSZyW3+Z30EUwVCW{Qrmb0dHhMP`f+P@V$4Q_TDG-}? zi9{1cY_Z;@^H{LrpZBwoem=^O*Y1*=0j<{EBL=)29~s~c(zz5U7;iPp`c@KyjmQ+^ zQxPW@FL=}o{DW0X^YS~CYFuqo=an83dD3?OleIHFpe2y{uOkC92f8(fQxwm2R{ z2jxcCK+Oy^Z-gT?Qpfap|5+#e`b+|7>KB?NAxr95O>xlZm2+E{Qt(1q(U60;Ba%Pi z4=iCR%5HzDk=8w6tsFc3R@^ty&d=FN=z&a~RDm;;;l{E`%I@epL;7F09{PU2bbNqO zqYYw#RK^W~L6ji5!A`5YFXIDE*m-sFOZG!Tj&|Nv>pXq3KijcN3*-8L8ZXfD zZV19-Nm75HW1!&1K;2#+H3AS;I)F36@l_%!Ey96J(ap*9(;?+h@_b-eVa*O1SIZao z0{$<9{0z^Yo%xfb-|UC{m9-MVwWkAyHgFV_^?Ddd_re?$`HJ8q;cFeO@a%&rcY~NF zzhgVj5r@gbu!sm$(pvuFp7hUMVW{T%t|tP0477zvbX&#!S}lC51O@2!g$HZ774kcR zgV$cpdu`8*47rpi7slsZ{9k=v`%4~cN{O+zt=VcL#KW>biAZK?KyG>NXnQSN-e2pz zFd*+w4bYw^yfFnmP+7gMHiUe`sb8$7Bpt09+A~r0+zEN}s(Qc8V)=L&#!yTI0)1#z z`@NIlZpjOv{;omru0GIx{_9vapEZEq4)S=zqi?1a z**wm=E@6a)Ks4Wywc4opIS5FiT}!2C2Sfz5=<1%KEc`j^74)8d=c%jwb|Ykqd_i2B zOF28r-4wmUon$A(v-B8D zJzj%Gj9Y|sw3=wV2k_|cw4u$f1I=NC7q5C*%uCusKYxd~!Nh6MI!?kr;btJG0~;!t!y1)0Mp*qo;+|62<4L8 zlp!%;P{IJ%!$u4H5%BOvQX`K7jIkul7j#3C?C(%>hfZ8|$G75*;FtM@arWz>U7 zIhD@py=EGz!ut9<5^0WV0^%ChhUPijyVYl7mQsU0>wph1Eeu*%uRBI4a}hHnqd>rZ z92h>}NQa8xJVxzCMnMJ9^O4f=sZKI63N8LALgBIzIu;vt6iJ4q0!7AFODa;OQC>HS=k5K zi))bg;=+)_;^*kTg+|RFpx|!g;HmzPY!dqELQPHZ;u{n+=zq-4uF{q~c=Zr`w{a>_ zDZ(6sq_tDGZectopu_p1sEiwNb5TxjuH>7~gKI}W@n^a2Gn*8o~Lqnic?adw)& ziA}W_eex2z>^q5}M}M(7KRjK>GW^q2UZ8@8Tz%^Btv3H|VDgCewt-%0>mHZCwU;B! zl}5RwFzX$N*hqq$0tdgs714OVnHfzihQhz$bVk&SD}#z}$nT--s6qv2NLY_NlA}5Z zYU`5JW2R7agF@-aweRa1It9ev9X24FV)W(Z**OvNKLr@ToElG{X7=juBbaQph{?pv zxgcUb7NUg}Bx7h)#6ej}(L5C%Z4$GRHf_B@z+|AbPb+9QHG$s?FTrHT{Q)G*v!yEh zkYpCdoF_6-1`;Uq4{nI5%&sGoW}9mUa{$qgR&mmp!A99#NA_c=A| zGTT+zc42}0OaDyG>NU7dpi!x#Tct9eM1;KD&(xqqrQh7*fN{wj<% zeiM2APS}ux6i8qY82 z{YM3)|3=W|hqwf&4l=RaImqBP><*xnI2*A@czrp)UzC}(zeK_t{7$$lN?=7>)%%OLVOfrs&af~#{X(%CkDei0fmDN1TXgx!XzdL9D+3H?;*qc z-uROHViU1}5*ye@@1tue@ON7ggv0;f5ikhZeVkF!rK}8)+4}=d?zdW+bj7Iu01W@j z+2!C@g>wA=;u{M0V~MpsRqNlg1%c@43jhM>{&O+B_XReEu#EJ7`{)5skdiFLnu7c9 z#c1A_L$Rw%|83)k(SUn7k%_hW7tTls2S3-eU@H6fY**l3K*0Ea-FkIm@jrhKlOpiV zNZ#A7W<9xWn{ND~#h=z1LO#se%enxk`yCuK(n^1Z|C_yh)Ny`5_4jjMs<+2^7!`-NgWe*Oy-uP#{=j!L$C4dJm7JXH@diyR z1vuPa0{fsAP82Rn7=A!rK!$;w^58=CtK{>q2fj_2Pxh{CDSP7yu_oF6E0mpNf)~y~ zeJRrKI?D*`>&n(`Wui&dk2{XI_r?<|Fr)1QZ`VeT@ABNTX4P@tq`SM1(44P(J=B!M zIHylD-*D67m3loOpWcd3BIZ|1iEa5cpRJJ=r&Dpt67ed=FIKjo+rX`w5VqBNSHH+qr%s26?SjyrL#L7KXJ z-^b@VI6NyOL)dSvDE9Cp(ZmCq5N}c1E2&TMb;iGfn-}+unxjj&(8DS5eTk;?qzOmH z#}_+8{(?GGo$05I{rx1?SD@2(wnB3)?WvT*OhrnpS3ao8$5Dpu_-OwXCGOm}K^d5| zh;PoQHhocEKxH~F?B(I9Q9Vb|WZ^~J)Brbp#k7=OV;+y=I{kYti1qH3JkPX%PQI}i z*LAIl*~-tW3icMVA78LmPe20}juyQM&T$X}3M@w6&N#?k#ta?ZAEca?C|zG`KgA=AnHF;&UwLH@_?1 ztGn$gX>(jD93^|>EX_pe9qnVu%sv@=V?thOv=jOwfI)yLf3UAFolV;g2u9IT5GF(MOz;r#QXu4jfiSo4>+_tL#O2d(}@qf>;EM4i9bpPe^cLz~DjWLxV}j ze|uuy*3Grr(2&_aJ#H%}eJ=j>IPd51xZdgCUHN_WT3P>x%wjzdq2zVJx@bLA|n#5xsfl+qiej=Xo2ApAX zrO~>ZcjtHNCeEwVR(jVorh9SXjV2|QI)-wPP{wyO*+*e)v$M0lj*eNX6Jm{xjlK<8 zJm%hi77W{20FL|;s5jp#e#Z!u#P4SUtf%-39eXd z1FQmoG2$=fk-&7gxw+|AfNJrZva(plt#q)vg&E^(MX5bFa=u0S``ib|piPQh>acS^ z5*Zt3as6m){q3Gu0NCwVTi(OJxpAFneNb~1GS+Q6f3J?Ld5&W$&6duI!kJPee9Zi` zAR)^Ol;q!GIea7~A^Ekpm-zm`f!82VOoyZ6(Bl5$y#V^iKyVuva|VGysj;=Z zOlva<0J&m9X8@%Fue$<(a!%xlO@GgMb-nkc4CfGvFGPY1qvaQW@g+|~LAv=P8m9@- zC3{i!R`h;80sM(?KV{28; zPVWRtkq3ByP~fuI#^!|MkHod|1r(eIzxixVA`6)ZUox5ZVpmf;b$=5e^ziYiGiX=^ zYW){iKo(J^7l@^Fe=AJR&Niqu5u0HM%=;QqOSZK8^OHX7<;j+I!af#Lq<*C;!1HMt zv|Bxl1!FLIuAWm-_pHRkXJOwS3``hOBOfp4p+}V-XMABRwJ)0x7sHd7@po=l>HhuC z#Y&)gJ`J-7Mz6C|omkfXi`w*C(< zjU0lK5iA^4?tAPvgaHBEav8CQ;uHCgqYd0taoR68UKxYIwsy>h_7Km2#SMYCzx4Zy0pNCF%itCFPD~^z^&}Dr7xB|2KSX(oGW{tD z*Q7P^hqlXlS1RVW>*nX2r`6BKU+TT+TxWKCSQ8&io%gDduI>S;{w=_6lMe(X9|w)3 zOsr~&`EK#*qn$9(`C{edw8j!2_n;`PLst{dAEQCf6P_kEOjSJ5Kw|dN=t40FT!kTt z(3kSHMF}az(paB8$GLB`<(Lw+t@IBL>kI zG1Cze#kmZYrodXa>@FdxKoJHTwXYJ0Ixpps_#oO35tec|f0@Uz)YON^>!Pf(fF5UH z6UReY_ipo#Ytd7~Df=!i5$>%^F4b(_eM3kLbcNT^#(=U>rCSyCP(*sG$84u~Vk!6< zkGYue5dFeN4JPb#p~9HZT%}6Jjl*r(hg$#)dGX*?z!)-Cli#dBjQhsH44qRAJz%9v z5I?Qk=1V9tDz9L)(EAF&bR(qv9`PwkyR=PPKO;96+wYNhkksjW9cwWNBeOjNAM2TN zSAitX8(*GWz4vo3A}El8*+h04B3k7aq^o1B%oR}6$v!EuVJ5l$81FZoz5Gk@=yfMK zvh@#zD;zxJpdn{Kx7rP!Yn0i1bKuNpFt`z1kDZ}QUyXEV_YD2PJ*^=AdG7_KC~8Jb zVT29X0sWBmkfQmR#3kuKJ0yk-){sIen$YKq?g5$?tF<$d#BS)zfFxiS)yt8wtm2i2AF?jYjz@+UMU;j8B$dD;x;v@UozJ))B#7 z`8sg&)j@dti!a-uyd%tmIP1TcR3?Z+?hgbM~7 zvJ2JjyvbZH5(sN`#dVlh(j$)wHDg5kI>f;1y=%}`@;!{W%%CHV*p0ITDMPV7@w{aS zf*aY;N;AX&L%RH)-HW)01nAyj4_SPKBMMVRG%lPFI)^cV0rt!5U4fa1*U6(ujuDfL zKp=Bg6paE#CWA-In9y$-&oJX~-wAhS!r+C*KDupY}UOF*TOC$>P_T0Q2Vn zA}420k3w0uOI=bhsKShe;#y#dj&N+sDGEsP2w|ppGdNe{z4#rMkmBr#wDnTV$wNt$ zt5j$cF55Tgd=oUOOZ(=B8=uP!|rikdL>3eTbxwie1McA z1kjF9+Z&WKUATD&!{JV6afh&#bV36>f8MW*AF0C6>m>;%3MZ3}KoODWGb7`3czI?5 zlG$v2loUE@`|*@OlO^pL%$_SCHaO_kx+VR!tC8LO<5$KJ(c>5e>5cg|uaF@_Q&C_| z{oD5rs<7^|)RFj5gxMo7C@6A2DAAu+9+6%GmBm0SNS_n7lmoyh(uy7ds{qI z{aXJ{e7itJQc#*eT1rWoh9{XTzTI~G zDxk{9_yuz97YXl$OXscrOHgaC-fb|^YVY}$u5vYPmSJx$A%S1{W6oMl zTEQXvg-P)nrelnb669~tk$=+78&OO%!Mk<|x-va2Xsam593S2aVvK?S+cZd6FM1Q;%(4brojJ}sysaG=bgdAS(O-=Y(bp*1mReW1NH znfD@;+wAvQaK_Q0dUFBq{^VL`nG@spOml$F9L19j8$(e>g$E_@hpRX2AG@%AfOMNA zZD**6K4ycP*;z5Tv7I*r=L}FkDBwx!dEMtx$+pFd4-WmjCZ@%MO%s-_Z@3fkTg!g@ zmC3-Y!bgEK^nEe!YZsGOA3R|DNJx)H@3jC_SOb4$mgdt{pFi83M)5YoYt=X~OT&l- z*Up%NDPyPjAWZ17mG-m_m67Pzy8iaUvFm5a(}SOqrkNJU27a~V+twn?(p&Ov3hwR! zuOt#BBPa@6<;Ylw$MDQx67TOY(YsiB0@0B$I>Mjd!#<>?OpdvQl*Rq%OMM^!1E3Zz zHN98S0fDHu6dRN7SDgz7H`KJ@Xt4lY=6V-V6dn7J1*TP zaItlr_*X=^!_x?U2^AxTuh~z@T`EjRi!_x~m5KG>7LthalC0MOwyx!kMuL-N@F&ZQ zYl54=8i?L5!uMoX27dUxl9o=SLp=^1ACr4cs)IpG@TJp;Gz#-m_}tVfaIgX#5FSCo zkM7E??wM`=*7=Nx#B{(|7xLQH8Gw=X<}`C!{YKsuIb3@2?8j&Kd4q|!9SiF66!
dzkP&CL)|ZpaWYbwFOG$%`_Ly%zJ&dN z3JVLpK@zX1L_m4{2-!_Ib+H=0ck)bW{|21&djCIw?iv*c*Ye)y##6 z@-S7V2rA6?amVphLA>k2B>0&P5GL`BsA|9l7Vv*s%6JZxPM{{6FLyikwV)vtd~ zSG-3}!}ZO)>+UPuY}q|*>OHGHAQ85=O-c2-jBV2H^gc}8}yk!E43ZXN9 zm*bEg{_1r!ly$TVno@5)4DsiORvUft=8^(NAF&gC>$`ZLS|byQ6w)^Uwa8V6{qX1S zg^Sa%Ylk|*7MgFxvK46Y`)5*fSYhsJEQh#=>5q$e0bd`5_WT?&F5d8b&WJX*-d2{P zfxEi_*1{g%=8ni>4@BN5= zKiB>VWMTKR?Pr72ten;9Kg5POzHR`Cv8hHfZuiDa1_lPy7n;0Sl&x^w9GYo?mXwp= zpMAZyu0kVf?Sbc-6y|oh{mQEmUz)xg>^;|Q$lsim;(qkr5{s_KzbmH3PE!ET=eVxX zuwQ8yK1FM66zvZ2(Vb~ZafoNjU{`2Yf1t%ehvZyVKdxoB`Z_UVL(^qCm@Lyy5{G~G z5}0Uy4uN#Ldw7TgcEEjF>vwbE{w8d%JyA_ZMp~jSG;!{$*o%z=O&hrGGH+XoA0;Sw zMpgkX#~Mg0>4;^?jbmj6cw*rohtTNs_@$hX5R2#z{NVDF5PL^^{3^g}pi4Pt32%nJ z&3f>`f+O(LR^8}}lygd6D7?AdMrDB{~#h}-fTV#&0lCsi3s*!{xloXT^$^9+(aTlYLOH(a!2f^IN zIAb*vLo-8R7K@@S)u4(rWm*R2-34R7-n$@tlj`&tyHUW(T3YMq;)FR~Ax%(+Dl&{(Q@j(=LEV~vX@ zq2j(2KQ#9!dWcLUxnR6YgIgbd$uImxAd!>b2kqSp;##SotMp4jKz<>`wpJYb@#9rD z!w~|8O2VNh@nR!Eqri`y7T=O-q&gT7{{tnD5ruw3$#jN>hJitLcFfN;_E%R4iHLf) zb660{xK9sLSvd_;Dc7TrVV3RVjp|P!s0nV^-7m#;nV9XHcmI+}^FC3>A;sF!Y1|pl z?En4y<)tGe`0&38Pyi;oVfzggPoXCl7s3s@tfD`d$#hBJuWn^~KOb~J@=?OYF6Jb0 z;`4Y{R`IXx`7a_FoHLlnYMqPZ4L8?Ve}>1$qj>m(ZyYTJ?`PyLU^bGql5)$HpRq0SjEgguk1Q$E-Cd2)Z?OiAveNxR$FDj@1H!I`IIsjVs%C@5?{6GHZ_Pzx{lNgG>oReXe~)s)9oExC zeGEsdH`~C}DB8OGkbBDAP=NHwBZ;wF14p9?%aw1ofo=V3yW$yzTt7t6Bc+5eU0OOk zPMdA7QO-PTkJL-@)Y=6XbJ=ll!jykG|1=>^;WO-BA^+vYWX4|oinNaU#v&Y9QBb$d zZu7-?A1`90fYKHYwd;+Y>HSJ9y-{*4$2;{sq_>DuM1^Gz)&IAS66RE_`LB{ zdp?!{V?b+y)T?&k{lse<1v0YDz4zlRAM_{|rcD`8f0fxBk9noxCs~KN(f^v%o{lgc zCQc#d{wP;5HB9K)b20#QhS z47r#g+f1-zl5~yzPMct5iG2$YQLv*Di*v`u=63#GW=|Y33^G&E6B1^tR+b}T%}`Es znVfnN3ux8L3Qa72Tz}15Q(gQ%X8W;q>+dYYrIcA(NT zuk1Z8O}6Ez~AC=3QqeEQ?{Xx<`#c_aF z92fah)(-JHu~(=c!6a6S4H~#^Zg~v*F}V~e#I&SQs}qU z=AG2)i=oISgj6djDV4eH5`;q21OB*wy5(ipy`E`?b5TxiD-WTc2E`xNd?M<`vz{F~ zj6Tv*KaGqDeHE{}!~9aY%qe7?v?{W~F#Ij|FJE|ePkN#AHgmi0>7xvipMsyYs&KRu zPYA$bKofLAFof#pSzq`YR9c*h(TT(iD&uq@**~Ys(>~|JIq2%zd=F6^TtNpN1|G#N zoz001F=`!618xf&E=)(HMBz@0fS4B=og#?Hek>Z(Srt9Oh!JU<>Cr?n|AZ2?Tain7 zUVnRaO^Z%DN9=a4FBw7G@dVJno=V*~80ozQiHGCTS)kYxCyow*^nQ=Yz5wC1Scr6h9!tZ00?IN=DC0BnHdfXy4!_UKIaulu-04T}RjKuZ}YJOWAnBXBGNyR?d*G8zS?y-uROR?Rze}^Cma< zym@lk@d@Kjd$j36$P;#AWEA1qqa@7CzQo|4M%gN3aa~7&Kf}3LHfIL}4TSl{=$Qoi z$Hq-BA~?-&nJg>mXeajfpOucv1tQ1KDdeE5mK6SOc@*4cP5j{ddU8nGQ@gmYa9T0k z@7-f4Fj^K>2%Rw%KVp^Gupvbp!WLfdp=jM^NMI57NFD%ss8}S3zbE>sWuJR6;F`lJ zi|V#*c_XwwK?El$F2KP~`E8^8UzWpIPn%1IKmq zDt6Luu7%+~j68VPFIkqg0HJ`n8^(L8gexMxw=*DrIB7`@*`$6cD=PHfqU(*2Q$E`g zpuM}6P?v90av$*P(4whQg{7dX_y-sOR_KqQWMc*gb<8v9ynE=5&Qk4aWKinfMorc? zOJnD{kEzYcxT8EMAC(|PVlnZ(#X6i{dzZbLT3q{`cOTxI#^5KJ24gq| zG3ou^yFUZf;|oC7_TM-6AJWgJ8GdW~t+R9%)OzMTZvmFsvZoB&DB4 zocp@mtBxt2-K+{b*hlWQ?C|wZA(kxmmb-9A#o=|2sKY02H}NeqWIoqgf?8wNRv3|i zXw0Er^&^;;C}f4!)0CWQUPUFLihtg{6J-6}+l+HdTAF;{iawX${w*6+XX)WZm9aLY zP_KjT>30+ve@8#Y*C-f0@Qo9ceCVxz$XXfTVo+N1Ox?A!E$nhCx1jm^Uh>TI@hw*Z zDe(`}sl|Muf3(mb>uX8)Qvdfy%< zYIjfjxlgg1aPp|hLL;1VYJP7sz2aT9iARR7&yYkSxJm~^Bs&)J7zB4`ZLy>TXCY>4 zDT=wARR~9^?(E8xyM3hwbAcs%b#=9A=j%JiMOfW#Z~q*nFTCaPvyJtp5_=ED&6>D9 z-+(`I3wiE=M@56z$v!D{rQyT(3}TO@1YB-Av<}|e_fL|=ADC6 zPYrDJES_B*aV-#!+RLgu%;LLRdiVma>U*89Wy#G48{T-V?`I!)T$#$7p1_KP zatWE@?lP@lqqES1Tx5K>{T1>{DwnX$!`O-Z>8ql4uU!V+wtRft;h4mUu~Roz`?aV_ zYe~&mwqj#^?Za4IjX5RnS>OHDX6@TQPU!53?#PJ_34V-j>wk#0B^GF}xuQSVO8ycN@X^+$Lh4R@eUTv+1N-{798-ml17KW8as=|ds|Z&5KLK0^ zqxd2<{G1SP4>q~~`Rz5dqa6>385D?w%-|tX7mSX8^7c99)7u(v^S>{=y0mt}#&?4z z1YySqjWWepnV6jY=(%`Fle>%6x_Le`KJ$|JHbPlBA=*58E+t%-zP;MhNpf>nyoFRS z00AN)13;rD7KebmW>e+f<(Xf0&EB_0_3E{VI4djpvhT`uZsmkAuE7(7Za?ILp`~-& zyq|^vL*db^zQe%{zQX7@;Z8?mgZr)>8FRWyBxFuc zllo#T1mxB>R^xw>dskF$otHJXMb?6km6s2xvNG8HY^>$WXO`>M#yK%b`N72Cn>lMj zjb%zc^$MdB6bjkIk4kU-sk~R|BAwFLLgCtb!;_Ge+-h#8rL{ckXqT7ZRfGF3!M+&D zB&(S-XBtU0sU-x=2?4pnO5UWa>cshN#M1M5}K zZp*A~W!@D~pO@LJ{``y6_oMEhJhT1$Hhx8s!e6^R23$3`?*`n4-K4r$ zAY=2fsa=8YLyZXmb3s6rm6^R{oa)*!afjpzdA&BWR()q*sk>=!f?O{9=2OoP`xob4 z2?5X2dyEeJ_EXOfdP!nAt!C(asw&#_RkZ3QcjLBE7ans?e^c7Qb9Cd+g_R~+&TrpM zw3`N)O(bLhXvN$lYu-nyo2QD|2)ex%1RP!0x_-65zphLsAJsVc>xG@P$Qt$1(OzmpvFyl;VDl4ki7t0&rTooIkdUi+k{H`oHL8UU8ohFQc1gSI`1jrLInMXy2 zN&x}QBPy8HXx&HAy0`kQM)sO-l*hNroZPJcw@Y5G@YlGgVqdDB-39A#pU>{d;-fi~ zY;3K+|3mIuQwoTQY|xX!MXk)^)ROzIa-2NE4tqvt8w^%qr2;WWP#E(Mzl%LYsCyLJqT zgtX&BR|$b!1pa%8XqrOT0+T8uEqmb@)$Ow~dnc>UmM9whw>PMkR{SMZLX0Zlx^~v# zg;9{V?Pon^zOY6jWdkT&B9<523`_32PV|U`bV5V|guuV^KoLQC1ruw>h3 zP`!AVwQPbc?BP&f#vGSig@Hz^OS&5bE=_mbCZt*F;lUaOhh z$xU<*l~O4Sd&(sTdU8v7=e+D?V6K3>m!iv1)xC?VJLg!9qWdVT!Ba%d>68$VJ_1BS z(tk!|p&258Xayyb&K+1Miwga(dnN2;Zk}>?51GPRmGla0ePjtS$WZs6fNLp5<1X@M z-H@9ZDF(7WZotXFsjAV$>f0^IQTCrCl?VY7L4ZieL>?5GX{HFMf&!E`PpNL6M#!}+ zEm=b^J<4I=PF|^wyjlb9ANFI&IaI-qvL<)qif$g(^Y*DPQDS2BNeDH2SFm3B8I^#YM@KS#n2LEOC`NxytPvHA0{axW?5htgOEHnHWeT zL{w=K2oMRG#A6~OEdl|KZ@eAl-oE^KKW78MXDnNObG!BOfaRV zMYa+G3<0J8ae3*o@`^PG5JJGz5g-yW^(RR7TOtCgv=q6WgC+Ky`fPRx{2xOZ^KPzq R;IRMz002ovPDHLkV1m#Sf)@Y) literal 0 HcmV?d00001 diff --git a/doc/example2.png b/doc/example2.png new file mode 100644 index 0000000000000000000000000000000000000000..d2484b7a69784e1542c4cf38ca0ac4efa90b19af GIT binary patch literal 108251 zcmeFZWmH{D(=G~&6+F1RyAufRZo%C(xVr?`;O-jS2~G&^8eD@r1P>D6OtSa;erKF> z@1OhoGA4sHd$m+|RaaF%RZWDVg5+CxJa`BQh_}*GV#*K@pa%#DFdZxiC}~ZUO9lQw zIx9$$$D0Gjld^vUG5vH{X%l8j&{+0sF5rF4o{x72i;6r~@Q$RonK}d@Ut9n45WW)HN zOWqDPCPW_&Ec(|Sj=RYH`X)*Phk^zRm%8qKPu)5yeDq~siV3c)2;1w|x8LjD_Rxr& zIJJ$BFT+ryUW~&DmGudJ4_qTBC$qm`PiMUC*QFv0((cG0(MbMz0;VVpk=Du_%_)RH z|2~G~LXKg%Q2(ugLM`M--G`kvL;jyuc`dz@5O8t-JVN2k-L&ur zOa1eZ^uZ~v!d<-CyZ*PpA53$0&i>!wP_}^g;5(4B*MCXCaKFR-mp(`|SVs_syNFK! zH~)Vqf{f(-gZS_05OBd1{BFLn&i|Y z@Y$GEdDHmTzBQ{NOQ0Z&?_(qb$(#+H)}4ndZk7SO6aSyU^z46z0cUl!s%TBRxNyNs z@kR4!LG#o1Q*tqEZ-kPK&QIp5s4JlVCYNC3OlI28V`zX9_1{u3Y-!N=OK^ZcYH1t= zCXGjhySS3_WX43cDpu^IHaqW@pvo6EOuIE#(k~3z9}`{SF*%ygtm*J<=`$%QYgbT6_ZyEfdbKKJo9%qVPZs4J`^a8b?!@|rF^_!o{Jog#?j@z!4B2j% z`mRg{?%5=bI-Qd3lDcoV0aYTnDj~s7X{l+Is?TqK|Xcw-QrU{LAyKX-phmQ>Y#E{nUfz zPXc(Q0iQ?oMP@2-thnZ?q`@bR=O^Dp%AOyODgMSIBO_kcrL^U>yGzW98cWSuIal-; zBJ;$OQpDpaW1Oj&2OO35$fCt%rX8&>+j~dDO+*V5>R2(!pZp{vxPjH z+)}8m#2A$Rt4THpK_dZ-wFGNc$46$nC#*K3^_~QhgBkYl=$CDtBJ&X*-hmjX5tqs$ zn9jXOTDb!Gl{u7?_v35_xXwXtL#ggH?}b;Tt!&QTX}&v_(Q}2k3}!C649kbE=AOM;Wmi<|Lh%g&1k;(q0{8(?9WA}hYsuEcP z+id+cFH7)ED#^|eyI)^>>k3P@Xsp>e0$U(`204kL5!qR^)~SFL8isqIX+D?SKkASf zHEv_Jz)Et8>N9@{QfADr#zULPfiz+t1f;>H6Y_T3UxU9#u_q3^Lo1j&tQ3et3uB+v zZyR-7zi5x=ttqW2#VkK?h+~f2im5j=plhYy<#w1fYLTBwVOs4nn6hEg+Kr+<_S>i| zhS570Dzq8L`7ylUC zq?EL9Pi2+$Xv9JHpRM9YVQ9CkR=t2{H*^{LW{*k_g$!4xV^hdPVy6rAzA5`S6pGqF zZz5Xu{jm(|7w4}*En~IPbC$GgE~~?i*v*lHM^0IK8I14G%(>rE?d+vx-+UbM#&Ab$ zWgx)#YeD>f!bvq|h_14B*S?7dt+RX??bUr_VOpm%a8D|^lZQ|@WZwA zIGcyglJ@~)Yx;}x$f2lm1eYzVm}Ju6CnTL5&nfmM6%M}ankM<)Vq2Zr|FOINiOeK4 zOPmZ8MH%wvOf_lcWTJqHG9>k^G5#M&*-3j?LsvX@0I+ z$jo6~yfCd78=2`$}*6PLD%WZ z-HHAB+sZ*WG_6V3fFExr_%CBPicP#7M)^)S*8H5Pdr_Ll=J4tG!$#Q$H3dH@87MP=}Cu_Y!TY3N$M0vh#>x5nAK9Ykl&F=XM~4){h!fxHZpK^w)1u^^^hj4 z&0=xx+F6!`_Ss{qYGM4%WNeae=D7tufl}A1ZyKYEq8nITiC@sfO~Bv(=WeahMQ7bN_AP{{BH>)tl}zb7nuX>e_f|e3L(eA z{||MzR)fv`>m2`f6vqs@+@qYgH)tk=n=Ptn?S%-A7}apDlmk(qd) z8;XKZ;9$$<_p4C7r*M9R4F6HgOLL(PSqzAtND@SUHj196LKIl4%XFlpqMQ&hJTh~Y z$SHQVB@siZO99dxaBPAJUY)n*qAV1+xGs6nZGU#AyS%t;n$GVp@v^(!V7OqG*9sji zS$J^4ya5DTF-yh?RksmSLuR=M&Ot%`PWK74{Bx?MA9Dsh-VFNqu*;l{UB1<5k^xZ| zvVc$xs#$(sWt90OWIGbBF}^(5&2LxL3wTKB^UDsm*_9wRNOhHhckVx#^3Ny{367LP zz3XdfVIM^q?GldjcLrS!h&K^R?0?{99%xs9?c&ER;otti-6F65PG4W3|29bTF`;TC z^v(QRgpWHvl#MdLB0<2kr!*xe$N;Z*{$dMc!rS)6N$;vwBl!klr++(HKI%rI`dZcV zcJeNWE}XZuLn=o;UI^mVMJDlalLs-O=jjicsoo-_d|uc!`H(W(5!UL7Ovxdx&xz|4 zdw5`{hr81?MEUY8;O=xde4YUgj0Q$D)M1TzKL)G&-S6q{Vq4PcmWxP-o9f9Xb$q)A z(f@t14%_J`GASaJ7)uu?*n&eh5(cnha2?B9BS=!=v zMB8L7teXK%M~AJFlx>}?XU*9A6J?wZ_mDH{XqsQr6@%vU|SZnDw+mt#LjP-tYU7Bk`yXfCbIMB$!-d z-Z_=g^sqrn_dgt>kEEC_FJ=CEDyeslu6K*s{rM@wnxmib*#6h3%J(|HOJbD}isSI^ zV!2F&j=SI1R|EJrpJ~bURsy9H7-Bmt&RC>O%?0L1TsFV%&auuDud`$e6fjy6-F@}! znn|FDkULJAzWha;*1lF)q^GE^u#l33=UU&vrOoE7lMt~?cnT9_i3Up%V7*W+91lec zQ}g*VA}fYZVKS`>m1F&(rf358!e>V<1X*GvPG%X4)@{H@|h_t z58%c>3Q|$Jvxtk)jh2jD`33#{R&Sg!TU^s*>QQXZ`gZc(d7c7A6EwA&#p|vR&Sf!? zrG&{X9*H3>C&ysW>C@`83y(n)cmL=dKxlz;d)5|jZMRKY_N9VKy*w)}&R>mQx5WWx z3qF^Yp59{7H2cRc`8okLW>k0ImrIVKP{O>bX-jo5x@QkIjUnzuiE43AbXIf}Nu^ZP zV>zcf?K*|p<2&mW_?a)x#pLaB%e`x3d*SgWUxG)PV$Kb!%xf*Q1U+?gZV~~%SNb?9 z#jX4D+G0=LSB$yX>TS~3EBHS@<|o+!KS8P8dE2@BCEnivPWPLan%WKt;{#{r_{GJA zNMA&B^v$vVV!8>)^Vd$jxn5^ZEA?ZDxv{Y^$+eI|zm1-jDhMuj62 z@7;090Ee))Rm;8AT%`qa`0BZKwd|fWN%J(NlKiNtW4z6#{*VAlbJ0Puz0fZ>jRf*1 zn*SaZC8fC9PH8qKqc%pYgcP@j#N}bBF0-u$41+gCj8JL zn`vq$CZsJJ6!}O|S23Z0+c#SZ*(q(Hb4ku*8neKkjuWY1U%7j{Wj^PiCaOPX-)+`8 z2Y=H`y7eZQy|2YmaGROOv@Dax4(A=I`?jp3+^@YXtJC(;v4eKOUUT5az4s(zOCgPs ztK`@nblwv%$f}RxvXM>xpn=I9=ah$d{+PFzj}ImYrBq!BkBMskz5B!UXq$5M@^Gf4 zZWYCsWKHCu+xqF@2E~+gB2vTUDu4L0^JoZ7?{`pE4Jn`)|L>;UViX(HLmpCs>RC@WIh{Kzs@?&I&!LQ92=ma+!l3MT8!mYZCpmHwPB8g0`=8MMR< zvE+hRsT_7kF!TGnd5K&LI{S^gw~@68I_?_QZHxVCHu|Fv&RJ`+o_Z*L%*=$0?Ds{W zr!pI?XFrJuWPK&MhLt3i>GCtMZRSma$HKy*FSRIItY3a;I_Jh#uy(d9a^$p~NOc{P z8sga_W!8BRNZ5{H*l!u@R8%;@er)~PoRk=5lX`(^`^%?Q!hb8qI!iMWOn{;ojXkw% z7@MqJ?s2gt(ic#ZB!@&el zme9;x51Li#QhYtjZwjeRNjm1M^$;546z_!|HerS_%byucaPM5^%W)T-?W%HY4qd+= zXzR0L;!mJbv)D(#JmiOinuLZQo4d9(RW*L7R2g)7^wJ+ZyhZ+w-`vE|5c5=HHpTv) z$*jWlx`MN0FX3V@21=ajryeOlFs2NHD2Nl}!PSj`sZ9{h74B6j5)dJyw$tVEWzU;E z-~f-0lU`8BdHSkgdSu&ibB1h|`=c;89i!lbwsywR0XgRxoMdj`8w9UaY ztmG+q!9s!U3&ZFy7WL-o_A(-96U8`!GWx9I<hy#*<6*Cj@!i3Uvk~1xYHYtEbQeM01M7tK^Hk;fp&_y|TjUiN8>Eu}8fr112CW zAug^q?3e%MBEEP3?Fi8Us=L|uIAlrU&F#-ZE9;GMUVJC#l^+isb5#YTbN(dPf2yeW zguB(>gn+sFR-^N?14ig_mDJ4h+P7k~b4U9R+)sr;L_rvSh)+(g!VSCud}o zD-dQ5NFW==hDi~D?A-W=XH2Hm=8zj3o7Q%?k>q9Hu?3l8$ENSsIT z0Tga{i~yuAm5ypxPd^I;gZlh7*Y1&A--i|5CH6N^Sl2vF32LFIS96%Jjd0{lAI&eW zet=7+5F_?Un5$j;2%Y(zUSkZmHVv9`LyK8VokzV2-yTh0$gjN3v4ue)ogAe#2)ezm%|^bJEfW; zB=PsEyfaJN%~Zy!g>zM?d*qY^zJ}2u2T%>zWFip*B*#Ka-dc7HAuBv^d(~p&8Eb!seNs=*x(soLuUx@rjquwW48|3iNL#h-MmL00A^Rg16#sU z5Y(LTu>?arN9+)h1h0x7>K==Ooo%b>F=YZ0|wumPKKuLNt5&EtMk8E`V!lK|%i7t5}L+)hk(M&<#NTS zo{cxtgUI>4bAIV0kOnAuE!zm?HZvOF+DR;f5f9;1s95myTR(r}imF;Q;rQ)}&Lt+e zU`WUV$&e8kPUh@FWIu7Cvw%Wm1(vr^Xj#UkfKPyKMUUrog}P!LQU!nH4$uHGc=sHV z6jWpA_Y%6^<}3k84Iv+k;=6GG^)?PCmIWPQIxGMm)G0yWe+)39Wg?RDNe05vBm@iQ zmsC_IDh9QHfW8!WILX!mWF0#itOlrO8s-I!^r>rF@@Y6gXtHM(kgFa{03x_n5<;_Y`xql%tpI)z&0D`1Lf=j@QBo`(Bq+Qg@atq#|ybi}YieA*L} zQ9Wn6{}>Vh=AgljdTYiB=A?uSDO981T-axzIH&U~rRcy|LNdjj@G3P*nFHh>SN93E zK!7Ho19rop`qj!Q`5r35>9oy;3;gj-!0vZwB|aY{=qD8QnXNQgPqKJ2#h0nWaRzYW zTc{#M!9Q3>jvxCk^x$Zz0k2aJTFeWR45^bTpHAa1p)72W!5x5FK$+1(RI?%XSH@xn z-|oOMO6Ut4#4>Kga!2xZ>KmsnPj=Mg94<%uv%aTOm11iq{%AL&5s6e*C9!x6i7MGK zOJhMc8y4q;ayUn6${+?D$fZDcltGIqBG=PrjQ$xg?QOx`Q4iXpOxgA@n1AX0$y6Iv zwIbMzo9yTUfWn++9|{$^t#{B2KJ}1Gdix+#;BUHoW)`M%(*09fivtHng>+1bLh)tN z?ZA3uX!M7u=&c!`YhIJ$KHFVSn z=2{ib*q5mT!kGTeup2tC)#E*bTKJh51~TP^v58MP815a7DEC&>4zb+4TMBlJDxG@+ zBLd_b2;`|;D3Uf+fl&24Zh!oM=~w@WQ>JGVehq}^Q$rx69vu`u6^m%P93yX%-v8ZZv8371nm<7`e|tX%*LI7zUI#QZ*mgwn`E9-IPKH`rt)DGJcDj3l8LOXcd%&!sj+%9fJBG2C{7!)$o7^sd z^2-7nenSiOII$!a^y5ofYK&d>1n*V$RgjRx?VBi+FL#~2qd8mbNApP^={}8f0>EoI zOYD=_PN}c0YlM+y;fD{deY|$dUoCHr=1`dj{fzb!0L}}Yqx(~D{kgv)JytDM%s>3| z0)7eZ@q%U&*c%JVZH|iAiqT^bPAt=vi%*r0$BtzuT(bQ%#bs9@%)63)B+ny@`UcwI z8;|vTWrM|}-D(T!ejGWd{z-d`5{RA$Y*5#lkK++22P~GHt~96mAwJ&+?;@`Hj&TQy zIS!)Yitg{1jWb`Y;0&h74Tp{%TW5 z0^&#=c821sbeenQezrPW_}rL8~^LVVic0S*+Ff;0AeJsgck27{ymqgZ~R{tYRNt3`m2vx z5#6>O^Hz1R%>pn?m2NAVJO@CZyo*tOn9pl~kcA}R=6FGLK44TK42keAbKDNJLqBWYk6}HfBXjpc zJs7bh(`yW1K?m=2yH$;ijVWaFZ0hn0UXL=p29ii#z`%MpSPI$nEBVjQ)bz4Hrs_Vdbk%vE{jj;Ig|cS{%A<92Hj$N zFEu>pZ**#fDKqND)E|P|B`w>;6T+(qKrdVUS5H%DTu zq;172-^bgFyK5G0*FlwU9v5WBS+J-u>w2vSTX0|#nj9gIK?`VRDRz%CI5D2C9HUbN zW0f;EMM9?!OZ8^k+uKGOW+bJn;XRx%GK8gWB$_77;sD&b=(N$N_X_s7svGH{8PIcS zAh4!hOl@5p8r>=R4@Hp|#}aTm-JKhS^U^{35Xq!4z-dtcU}pgw7^4G#gWl-M2}Qu} zDq305?NDFD=*N|*hgGVyxmXVI$N7yU(W0Yvqm+AbaR)3&%1?aRBeWOEFj7Wkxi`|X{mZ5frw14 zwjHya+wpu=g?g2U=puXo`s}wUvP>2V$d-+bjYYk7x9)9CL>3;NHjxncIF>x!ix+n2 z6lPd{k?VkFlpIfERmFSN5kDi3Q1Gy@XF9|m*NsLfWOuQp{Lz@y%}^IWzD2w|ec8Eh z?p$a*w7-4~$X)I{46B0n_gCB(8VQ(|3CHM<#H`S2pi-qBh^Nl8=s8Cs>sO0cy*C)ET0yPWC&Be@7I=$ero!$r3u9#P{oA1(D#_@ST>F;D_yv?jSA!!e;`H zwy(A;^Dp0Tp7?$`93aJ_z@pr(F^oyX8Rrj`sa0V85qe8MZEj}PJrZivTLybJX`FtJ zAtxi#v*BxMh7g7v#JUgrmRQ#<&s|PI0WiB|tT4k$xG6xgRxeZZGz|0#x5(v&CzLEG z_QU%JD{jJE6c8EB(dtnXhOfEfy2+^#|mp zIrcse+A|zu+(37fql-h2Qj!z4&d-7FOq-leC%^UUjNc@YrEy_!17LX}dPiW4-F@vB z1QBF0V$^9EjCAE;c2zdpEP*H?@q*PVv>UAeQxfKXmVCPu z{tMRd)O<4MZJ0~2U{`+sSE&ai*P&)PN9vu_#SiFx%q)G+J>AlGYlsnH$Y_F&?j`tl zjSPBi80|-ja7Z1t%U`?6`?8?}J5SQtEl94rvT>1>#hcvCccxput)?8)N`ey2a92Q$~s(QKu5>P1C}=iCDrdZl`w?j zHOkyt37LtZOSw0hi`)yDQ?}OTDk~)gyQsK$ArJB>HA%c5h{lhHjCB%w>py<6H-7#k z*fH*JHkrfkks)%~yY9_pz3nh_PH9}5Ucto6!pmvDzG_&w?GP9X?4=DI_MliCWDQ3j z9$f=KJAeLY4^8aw5g9oHk;c`qF8~iJ)_KvcN4?C%J>L<9_IC5v&+o+Ujwdu7X`9Fh zvF7a|>4zTxZ=f5Q8uq~V;d(P{K6gj7kvWn#zpXwX3)_#W7uRhthR~z0$^IwN^g4(; zgXIkW0|NN_fP#%p@7ZD+$@xAi_|aA!)H(dZ2+kOSlR@K^!IQumZrgoq9Yoic?K%MYsMM?Kt3R*$ zMX@t;FGQmUOCDkP3HbIWvJo)f_>}Ot9QgO5|CzC;xdB}f^R?#_1#Q3z=1|FJkZ_T% zN&ac*TU?6>lF!pPUTFs6pkbbA*CLM8a#%cZGdmZDeuc<`I%0w%u)<=3J?m>NEnMGS z3B3qOZT3cz=^d#ki$ht&YlnZ_4=+?OFw~;3qMC(0!uZPbt}d6fpxeND2Z+<1oNEt}iFc~h^>i5% z$)Xj3B@BG|nlI{iP*;r)p52~7M@PSK9Z1AmGu#(ZV$|pIF#gbpkgVB92|}6K3<*^k zs8pv1T>-i{<+*_=z_|n^Fsa83#+b5+L#WuLgl;s!U-lKt3XTZr5D`)nJil|PIakH+ zF55fz$h(w#vlRsSP0g0-w;paY{ejN)?2oNzYdoCK3mH~ig2TCO@lLo>XYR2 zt#gHSG{(W56t}ylg~m27^+dskjy5de;gnSwPbAJMA5}B^Gv-1QZC@tH1Gm4V7$Xoq|gJY$kfSQxcGkPmJPA-fXYXa zCK!S`s@q3_$RQc9^8fal>Fy+7IOjWqqS9FLDRJ48XNdGF_~RLZ1+wG}x!w zB$l&?k)hmzSaRj^qmR(jo~qbP^Z_9JkIOMc{24%)oxJ%Er~e-pQ*aNLIcRkjA;>)W;_Jt~YoDf>(?Xt9aRILBzg}-JX zwL%t+QUSQ>B-Cn%_c3@Ji?)@2hp5bvnzq0{7S?;7!BmkT#c87pQdM#X=Z+a)cNa^u zr;EdJOYGz{EXp^N)0WRL$YS*5Au%qZPtcCHP?U_?SI#o_ene>&l z0Kqa{4vtBk2msgLz+wCNym^cwwkPObk_)q0K$$1Y?v6vER()(YD!}+1aBBfZ3sW$v z6eHf^;&Ruz8p2KH3KAEP>bdzmSekD006V}%$OD%J1gJo+MKq&T5GN8g&%{M(+!&R3 zla6%x`c(0W<&qAa?wB2mXwslhky=4!2%zY6JU)JU?lb-8=oACPx0{$ zZ6+JVVhj*!K2K1z#T9)r~|W*h@+xLJYc*$a#JyKq;=U-Hxi>3BP7+Bx$;?e z+)L^W0xw{6nmWDc(T zCYBkZmYFdR@$VH;-6E`x=>aKNp>Odwau5yTd_t+=XnV?NFYAZ>ZFt%qsmilnkoy#v z&?Z`;o2vmU8ZvGj=d@8DP%&X2~H5z$;!Ye%7t9 zNUt_%G+^sYU@Vn;jNG{TyE zQ_*{Bl%804%owMwybh17@u83^2{!}r$V{D^c*C7?ZQ_wnX%HwmMW0@t?;V$v9^nvr zAUT}kYfpPqWfA$+pGD$pmhIgNJJPJ$=2}QU``pEPyl9 z9=GYylq9xKezt|Bh3P~(Kl`dPur_v1ipb`P-&(NE@Zn`ackn9PuJu0A0f}nb@e$_5 z`|x=?&It(oZ4Eo7VpD8LzTU2Y&D7f!C#fXD1cfH#&@pF%$Wsre+z4}iE(@mOI`)=- zY2UptGah-a&K!(9A~k1n=@WCb+lj2fxF};pL~Q!LL{Mc%)%S%-#a-i@Dmsn9)>?aE zO%}D0@Pq^Z=BIi;5(D%zW5J-6@%6REsJx zwV!Y}sXbShFid4&75+5l)9-9-3*DAYJ$I&;A5m|iT~$V{ZvrcUY}$GS#Tt{3h&fvd zA1lc%P))&kaFK2gMTKwXM+rS9P%;*-GHy&5>sBAO~lik*& zG-ZLjLKY+dCi>ifMIb$eGKI%L)~QX)oc%Eh-DAuD2~JExdc1ALC+_%bpPR*_Sz2bI z5K@mp4d=%2KKCHLj|K&SkM#iR{_Dx(H9%iW0c`j*iU#TUQ{%XDj#@(B5U5&s>;@hG z^cdJ35}T$p(~74A$amUH#EMfp2@X>Xm@r3_AOPBfqsWBQ0kxWCA8t?lNpIJ!oY0fzfQV0xhj+tQnm=l@|7R4T+|N~31qH8AM%`N zTFA5rYD*l03kHis0CXv8%TM$hfMqB%CWyYzX@z7sjI%Z$Qe*y}Xx(krMp{XVVTo4P zf1!x0U5Kw2HbKw(6;rnmR>)2P>3M~=g2X1y`u@=BO1N5gFs>JIOL&)p54hs=KAhTQ zC_k~5G&LmXk`z$k0>PujuMvp9&u1b3731hycmk;ZW>6!)S`|`VPWiugR$k#?FffbH z|9>3-$$ka1|DVuGZ~c@Q)y32mftT)qR& zEA}k}f~KzBJ7|KT?sVmO(%l>ie2botK{Zc{r{=L%M`#^uFX+B&3@N+AyOz_^ylD%a z`z8!K6gc?=Zx%R@y!NmG7ruUQ5wMqQ_FWd0?S|7bsYCjKh- z?88%`qu>n|`YYpI)Hr+S91vd{qkzHd`ge?`*|*8~vBVGY>hs-GjOL?Z{JGvub0|fE zx)06|jr77S#7L}Ju$mH|aZnYmc6I-eG(OVWgRY0zl&8Q9!M8pQHg#O>^6%(1_M7F| zRjP|5(im;&C=17$W**H?wEP&4y;=|0mgy{hOvbKp{`z=ySfq^Ec#mCEJh}MoWRkI= z+GkPGU;Yrqudy(rl1NpT zjqICXhbj4`AKFg8pSUO?&ElSR|9*;gI0f?co^ z!wy4{SmrXFkXF1LBPBHYrlvT87wfsjZ{$ZWepS70qiO3JZCJ^veuf`op-6-W#uXF# zw003UXnQG*U&Q6xbDx4#7RQ!TEhafQddXWO@H|(?+?J;fpJ`vs7>yDRedNNa6NNhx zdiafAa?pS7FTv6T)UE90y)da|F3{;MO$!x}!Sk)2#5803{a%>ouK7wFV~mopu6=j< z6#dA-Z16$ncq#)~aN$hOd72V)$3K6feD#-%mb-i1$4F*J4rKWH1R+ zFX~Tk!wyE%aeY7e-DYs0-|rTV{_%cZXp~XYzn*F?NPBo5zBB+9_%l|UnthqM6?$rQ zG4k`(3(4R|c0uijD+Jp!&(<{u4tDaZW&Ep7jAEYn`}kp1ZYswueqw9N?@6O&v2tGC zOPlgm{kfcSw%XMS%~p)-TGvk-UTeXy2YOTmji21TJh}rKNow4p!4FTCQ<>(6DYzCB z>&YJ!e2EJ_YoA1^eVU`yyP0fX5BgeUzSoLkHB2G{?3;rAEych4#t?j)%^TX;54AE( zvaD_u-ag2j9T2Wa!!{xg zyB|t&KYhBuNtJts10cclu5dNGyfXp{s!nM9@cVbYx*)1O42y=-py7cnK# zZ9!MS)!uuQd8a_69!UGMC}8 z(Y#D1G0az+c5lG&{5+@DxBJfa%G$k+@~q1=G2XRG@<#nj)N&tcdrd#g0{JJgeaU4t zI|_KiF%7mF-$pM6L>-aUfFu&rY^~n!;a?YtGUkj&Maw-5dxk9AQ)09PcfYv(CcTOs zZ+J~cW4~FS+-ivSZIsRAQMJ@%Vv{}D!ezJO zBZRVrr0wlK{k!GS=h!=V>+Kg;;R4S1y!r9m>;wQtSqVh(2Z z(1-Ght_Zi0Z&hC4*Ht9lG07)QC;N`V&zxUJiU<+nJ>7$dtom0Yoce9g1ZdJNzhR2P z9)0or{plulz*qdfX5q8w*4*t2N`1`oQ20bF+1pcCF)+o(OptrpX>esBR(pf~`ytz- zFUN4rX|v%Kc_TapQGSg)B>CFVn@V78vfx0U@;SX_Rv{Pfn#UQOTaIJbo%Xr1Cc^EK zvNhIcJjdX_%GwY#RVftH8s~UusvUWiPtfiH-QKJSjZ`}!YxS$A1Sgfn@2Sl)l#|#J zb@2hj@711#DaSi(!MSqSubQw+T_$T&q?`LpeZk@!^*K_Ieljz6(7=5zKmz8)en8Q;Qe!(~u2{!FGOeGjiq1 zo5$oS`=hycrUj0+;B5mkrNs#SVyL+?c6z)k&=gv+jtopM*Zz+FVOy(VW)XkNl}G=|%OWg*X^l&+ly1 zXqb~Y$#G@Rs2AfMr6}I@n4Ux?6qI`QBy`iut*|Xs48)_wAPj;8h7eT|LC1S$i^S95 zHYpu6J0|Je)%BfEq9K{i6_GcN8e9z4QQf27J-7GE1fSTH9x>OY!r@%rmE=Xh#xalo zL7e^^3_n7Tknd(y3hpO-JwS-Q%F+O(|Hiw;i?69mp~I@%)2WTZ2D*^*#-VANIbtw&FWd9fqDdJJ+_v>&^FcR6JX&YVTetoiIA z3=w#j;C3n#i&tg0$NlHiapYT+2mAHTx$kxvNMWnbjl5h88NP+a5%r4yi@CS(sw&#M zKmiYO2r23A4nd^5OH#VKkv^0((v5)9B`MwAAdP}_gLDW=gKy!z_Z#p12X71p0|tk^ z*Is+AU(PvKoiP>v-`%64OQW$CGK+{lG8Y`la?(DQfgf%VYfa*i@bmqQiS-}BWBaBx zDu`mH5)^&xTK8Y@f=@3``x-geUm+nsKB+^7cA43S&sD>e@%coSm_AEYwpMJ}GP|}* zQYMuwAqyqB*j$!`wtTz<|2!2NydY||#O=c`YOFML`GmfW!t|6fQc?}K^OE-c=QtQj zlF&ChaLRfhD9g!GKK4$W~4U*%J@LZVuINaPGUG*8Q<<2AK@dH>ixo5E44-#kcf5I~drD z!M2mqK_O{#Lepz;I>`MD>q3GekKX)=#fZ-MJ9fV*Ly`4tng+zFtbBD?w!}-`UwWV9 zy|W*`(HcaWx*bAXr6}eakL>hhB~|D3-=-30r!voDcn zCIN$2=sRpL`&J`W{deUe(OX;Q0bhKv#QB@x%Mo6??u37-#}slh!yB|f^PIXSr&mX^ z`*PvbPKW*I4&g-YNefg?=ojvP+uhMO=8HL=w!X1@?>SgSvi|n`tB^_{MLGGQDU>HV z7roBum@EYeqC_7RQ&!2Zjw~& zal5=?^V=V>skn%S11$hlK-*6%S#3)@Lf+(rCbZF2Qa{*8{mC>G+rr4!=fu_FeW`{M zrexIFpT^9U)}!C>XlNGZI?`M_>^E(Bf6}EnFuTlQ*+OV6r<;sq8e8Wbl{7d}!7^BG zQbI|(QQYlA5KgL$a$>sa>7zgy;{%a~(QnwXtojJ(%P%ZVj9;)H^SWX#WG)mk`_Es# zaxl=s6Q_MHi7ZE)%osi}miq`;gF}X-z?m-E^VOd$eRyO4TD*?G8viCW<~tU-mr6TZ z_JyCSfS@%N1#MT1md{XfLjd=?jc+RjBHdmk1OTevR1^4z*;Zzo1DlauDR zZr`1`-SD}g@F8zRJ?BG~u2{Ch5IcAxk(NZwnR68=8`#^{ca1Glk$vZ1D>69sEdQph zobF9XxMW3v1{H73DpXl#dFg;yRRUDpd%Sf)*kR)=!eQ?=1a9Qg?OLZ`?JXV;S zGQJqjtQ`4;@!X)ODFlKXF}W^Q`3Z%LJ>>JVd>YW2x1i-kf81EmX9x@NeGeb8NW#|H z(Vl;U9nf{lU1x0>KJ2Bo1L$z1fLNV23DbY_TvCL}v1{Llhx${^ZXr){0iMaZl8JI( z%q~avE3@rnPWKRW)A;et_|t!D5wcy+5Dff1(FFdg!WWgA{QaBb zZD8&$buOi68~SSOoRCw zQuWets@wglx1)B^4o0BgDS-a@MNfJ0GROZ?+Q+Om8RE8(x#apHJRxdB>A27Y{om6+ zCi)7HGaQ&CbI%ZB+)qO}2L#itJdTmVZF9=Usr>g9hrAoD&l}k{A<$gB{LAx5JK?{N z(vukw1+JQAycJY~rN> zzrp%{V>`iyMstj`x*FR(F0p@Kxv#s-w6>X?G%Ap^wkweP6|Uj2c=B%HIa~ktK-g7S z0dY#5x<~8jhdVL5Ky~Q)KN<$$1E;L;GuAV;(g)%RsvSq8h6gyXLsgUilNbu%frel4 z2k+l}wcfTK!hw+_mz@PC7?z&bt8#jJR?ugMrG(5MZX0ssBe8eZUrs?>Yh%L@Z8^Twx}1x~Rqf|Z7U z;r;J!aLBvRp-B6bptX>vkpeBDo3#J?i)b+x6d{6TI^I?I|L*fZ3(B^gZS%zc`w$Sj z00l4s7mt5|+~eogj6ebGZX~_^KS9Q0oRJBt`<>KQVgEn73??@ON9y`w@vUB+wJOVP zD1)W&qG9xfcm4^U988^AioaJa4>lA7VfEc* zCbbR{oD(WyPcXy^DBc)D!L*@Oqmcm30)oJk&c7?y(Pu1cZTu(y?x6><9(6&*M+K{) zNfNH&Dgr1H;HFsA((>o^`Hn&=+fgKRBcG#6=W+W$S>SJuAt@q06Zr5@{>x>U(f05M zq-R9GhnXwyZdd;fSkzjM0q9RX;8yv*IWyR2yUEJ~C|AvV>1Pb>fZZClJMch` z;Y0HXvxdXY;b0CpopyE~-=5FNbTCUS zh?=hhwB1%KZ6h(S=PAWL8X+g71|1u-8S@eYnW1aRF$(t|tab=lrSW3~-Z#y%&us>5Kl|EZ(6h@0`v)k*| zewz4b^2M{^mPoIt0*z?`NVTsn=UZWK?T1_}?zAfOy|xl$-2N7n0c0L?TtiD#f7L1q z_J@jkFvQ;E_oM5*VNu+FuQoCC#nIGS8Qv6Q`i8BWe@W_HGkMZ2%laO|?bXdEH`{NO z{muh4#;IMuCekkSG=D!`j);g5ljD79;`#%q+(mIgJ}=R}OUNf~SXMClJ;ME1ChKB@ zPg>TuObqyi+y4 z0prs>_{=;u?$Gt{x7M$Jj($8}a-9%!2o~BIg0UUrFsQ)ItP2Zz9BS`o``!H-1gN4M zytXg7<5<*U_%TGU#OX*J{ciS(02TkZWf_?Zrju`>It~c7J#4)_Ze<5_Demoq5Q2hpm{DBd(c=aU zkO#oS$e@TE8mHg8Q#>o4`2cK)7bW$sNqhuB3VPvCqHep|A#|*iEo?A`6(JFZPD$P& zEdFJ~lq39gF=Fe7idXuPdR~zN{(tWqD;zfmVx~UZPGF}OQMBwuijj$Pw1MzK`0w30 zAUfw2ggb?oAgi)7GovoBq_w!Q`do7QM0>S)HOe$dKYBhT=%fZ;+*|V%ZeI9eZE4HS zwNvK1DEil*Aj;<2J02K4Zfm({>B$u>$Jnx_Uh z9st~$>eFcyd&h1#s(u_LNs}5H@p7n&DbFXR<7EvGpJ|7z*$>^ulK5aHx9By6Gz>I1 zJ+V3Y(zqhdB6fI(EuRk@lehtw_it;0DK&NVDX4ysOXDcXKO3z9e2cidg@mu++%JgH zCV&nI{hpL)E<%|>%(~=lfxjWUx)T=-EJ6KQsw{jBA-+W=*8fE?A!zW@8t5Mj(JCXLHc!#nPRFYST(Jx>=!l2-874o!CT@#j;9WS2 zr#IFLN`68cf6yNybFO9duFwI+Cc{6Lf~Q4sf=vn^?Di>*!Ot%aC%#$ivu$o{#8bEV z*EN#8`>l%QgDJ2cDyQI+YV&WBEfAEJ(!8P|XbNy~zE}!KXWJv8uA>U< z?KEfW5z$tHK^t9@`6(fhva+};Sj`$}9Dc^6!Ey(^7$R@h zoCai`ZeSA;RnMX^5!R!I+no+^q&sw~WgLzI%r+K^v1fQ|{*TZPfIHwQ(ZNfOGd?n; zdg3h<7<=aH)vnI{FDi?LI#IV!bB^-x!`t@?1 zh-hiZ`o;qZm6}*3hMucMJ$FWO2Q(3;p)Lu+rz~&qA4H2$^U0LXuzmY3eB6%>6~k9D zJAJAJxc|kcCp0EsM@a&UCIXRPeb0GW(1=H-5bsjl zdVd)B{hpGE-^$q(Gii(WJF1_>bVKSl?g=*H=5+2Vtw?EPtGylF9MgMOP5Sr>hmryu zg5}6o<=S2&4A8+T;7Q_1Vh&u`d+v+6QwqKzi6H`MDK2IZ21gXB!?d^oepI&+lJE#G zg(q{b2OWC3(+Z25@oCn3*F>jXXT!NDm9Oa zGeTG+@P%y(z;434YF=TVqQf@{Qln9@IQB@%Ik*8wGN+8bhZ2@o_VKAAkm*{>bCRXO z9Dmf$%FxPogRTfrJj-LH-Ho6t~3cCG5gCVD3IzeHpcE5~Ed)2;_A zA>b>7J{_2D41Th!r@%F%Jjl(Vzi;1m>Ui&A1sD&dNQoRas;y?D?(4;}6!zf7ATJi~ z^u;K_3CqoS#j5|H+y=Tn=!5EOZz@fC-*Kp(H$Befa-pFSse{G~_85~(f=;JyAQ@4^fLEm+8U zp7YTEB@4%ocujQHlt%*Gzb~jb0@_Q0kSM%e%FVeDKq_B@JRymUsNQ9JBE(<$C;QVU z?}t0D4A$*0oA^%O5c7VnL7^FA$2SZTxRnPG?SC=InKFQB+`U9=#RI|b`%NZiw5GJ? zV@`7?-}5n^w4Xx>)a|eGfuGM4BbnIK49<<5WreZ;29GGhHBd+coE@G2Xv94R4!a;W z=e$87^!H`eYq;A^H8RfUcu;vF(1Rgzhl!NF>89NHUaajAdieMNRgZ7nfMwJzShW8z zl^D0D-WtSRf@%D}yy0Rr9-}%@8Sn{oDEl&96#zOlyY7hFfZz;6^}6+>qn8i?L-4c< z;7Wgvr2}kzO zAtB)@DZp}XIy5e87OPOIj@s7yfk25@>MqOYSj-!^-6(u^X*&=@%+}|(_jzpsM$2-Y zjd}u=RH;VU`<9+-T|Manjws-3viRLmzJsXp8pJFKtgW|7>FAlJ%#Pp%T<4J#4OK^i0)vd z19{V9%+l$^ifGKxq{PI@@tZ#fQc_^(NGCu}MF*45t8QFi#lrhzB8`zc^tj;kH_gXI49-wKdJqKmS;QLrNxZgRwe zrQ^t!t4w;4pVJ*K)Y}b26M~im;O^~uR?QmV_*~3e6)9(<>%i9xfvDLTzd{^9gyV@- zv>fDVR=_&~48fT%da$e0br9Ci0)5A;)Jd>30K~3$br)n)OI0RMJVeCs5lgl~J~1d; zSO0Kxz@F?2j9!kv_z3}80hP6PPN`~C^DFUsJZv_#Z<<&(T1-KLlKc$4Szu~FY9|r# zryt))RP2@cQ2f#}&ai*MdPaZ&W%x&$wU7X%)tpvIsa(}!qs|AAJkEGr94*p6upTWn ztD7rj2@>+fyH^->R^Um97h65tpZG_OsujxT5)_a)5DGRYl-Q<)c+hilelx8WWeSok zWGz>a2ohTk2??Q=iM4+FjF=nkI=)Ww^W-rF((`G&;4##XD%ZsyA772D9c)r5D_O7? z6(LXHO9zTsz~mG8-P8qA_iLbjg$+dl`onAK+3eYvS34qqOWeByO;CT*|At6>*gZ*7 z78XOeA@~KSiS%nqhMe*q_}R6O??8@}YUJlNU95WgMHQ8TlIo9`LBUwa_>&v7UEMXl zg0a~`k=w(UTV3*qo2eH+1Www{x000U=;`x~UXioOz?R)py(2=Z97<}P>_QeiB>9!) zo6DNioI%PAXd&-|(La|s+UA{TU%iQv4IM=s(Y|8U7E z6+sjRWp@lHdU{DjPtEId!4hjN3a_C#WI%8%fC*buduchEw(58LthLyfRDwgh94K~W z?Y8Lh?^rJz=;x{1$BFm!i;9g~Q7*vjLRu)BzjahZHn|!8n#`48czUFC+!E`Nz0LOZ zKVc`~W7uiQ7;I0)8-=2Vdco6OP`D^mFb>(rYx-mCf14*2bs9fFRsr^y7#y%Kc-&e5 zW~F<=d6!B#ChF_V^$`?maBI->{*>!ADtSVp662E&H3Oj*JVs+7X&Qj< zw;MC*{*lvS+Ly-I!Uq`s^%@@2KBx?KT0@&G_4^n->e1yOb{7hqJe}4ZR}=bA?B3?- zkABJ69NaOwjh|AO+cQPG`Gn}G zw7W6?`ZH@p&6TI8k%2F5kFbL{-HIW6wU!?zTx$5AmwNnI_F>~lxg-kozdtYn7rzDO zC%S#Nan-NY8lRiaJ}xNi88>G4ScCc3#G%0pn=8gL4~CHT?rZfS{0hNibg14ExGCWqb_?rY7^h za~4cr4~#k|uR6M0hOJ6fh8LkRBJI2sr41$s0B-Ze671&UC~A=V*bx+KL~baNn_mh4 zeoQMwD~R~r7|_@HG#!cw*EW3t%aiM2zpS!l<`bC4A8;hN4{N~~6f)wiPw*K?eBm|l z$~~*hO|&}mna0l#rb{+kga^FT9haqwb{D=d{l}&M7puRgg%EG-M1m`R)J^kyM$72+ zCoW{w04z+e^`6iTW5$sEz*}@*Sd|?q*16LQybT2EQcm%w67z7X$O7N$OP;5*|0nKtyE$=`77T;>$-T?Zwv z#CC{VN$TSYo!v%Y*YJDOuPd=L>R*|%52<;=_3-ZP+8D(shqFphb2iKU6pv?8z_exwvoB7?F4BmN&n z^EIYGz>w{VA!@BQ_6w5XW>Gm^3*1q6bK^o!=vwj9A|BR3w-kUBNDJ`QzHKldi+~2S}S3oYFRL?I9NGKD=CA30fut=E6a=)OfFU|riY4e(ckmpBLC9jMbH`)g zZas>!GqbZK*Cm0_f!@{YBtXW7RtDyamqPR?_Q{J;I(Td+ML6fbPRU`Lx7Y2#1BALS zIx%&Ctb!;Yc*Q<_ngm`9+w;?s>aRsHt%6c`kF!P!K(?Xdx-N7yYsv^6sE?O@(3iQ8 zLNEc8i_8}~)=Ei`HpMO&yeEO<;D|#1PcUXhI$PKkBXFFj@d0rNr!SUb!w$M_!Np@i zHE}cT*$D`3cxl(7-~`bcH*xshf=Ts!W6=xHY+}^6F6}HUuNKk zH9q*0r)wrnq!U^}uRwgC0SdN*jAScdkWTVlt^e-NX#e=@X$EKqOw3n5hEPz?sk^QI z&rMwaU2|qBDNnzHpBa0~!t|NdE43LZ;8LImc4$ zzYIf=hJyKQKYtMb-Mlxx%_oLNrN+g+HSBOy%-ta4PVKa{nPtmvb*Z)v>~WTo->NKZ zvr}(P;-O9fv*4uE#r$u0eC8{(fp^adTHTN7NBjkx9?}|00Gdl^vgt4A)8fUl6pvj> z+zkFn`up}`C-}gfcxK1K2qtqBTZvrz4jcLofeLN{s7nZDG4&pKM#tjHbx9$JIHX;i zl~1S;>}SMkd`e-#dH3_|dOM8gi(rk#^sq>*z93)AUl9X)CW0u{CFB8(%`~|#C&-#b z2Dx*X((rpdRdj|rC`qT=&%c&&JsB$}zeZ1`-%WpQpT1c6Du5@%xtA#?fahMaUoUAm%Ss98X5CSQkfGMZ8jkS80PgUxpnOYCtygnryE_#~>c2w#QfRr-rTp&Gm zoFv_oLYi&)_9i|h*MnXbKem?+-V6&9-V?A@ce<;nVLM++Aq%eD&_(;Z?fwakCq1yVSM!7bd#YIJSJ|;E`wa9$)xt+yT_?gDJMI zmh~iWwdR|Uhru|F1~#EO=gqIn0uv-Gs>RwxOs`wD-hOOl+{%y>5pi5>6JnHMv$9NP z!v0QE?Qv4T-Ar7kH!DZiPfG9P`6vke4B>^W7v1q>;`UPua2ie=bedHi_YZgWLha4& zkDuEIdked|U!-Q~^>#UPF};1yeCWT8N8YXSOo(s9C}9XqnO5m?{s)qI=e6nf3Q3QP zd5}h_ZD+Hle`9*||AopNS0?_<&8k(T_v&4d(`P@A8yTG%4c6=X7CGd8`Pc22wJsMl zto7Pi$j76@&#@l$N}43QYX5BDN8bE@ppqmw zQYz<)t(TpXV>txosUE9>RsV08aO;MWJq6(Fm(pI1EIlE;`cXx4chIU04nq z72%!$Oce_ZF0NnmOxX*CuSrX}-uLX4l#quH^b621^%roxEqN0>ztLOy0+3~mPSr1fA9{)KLZO-XZF~xgm8jPQ0ffTbeA|~2$li|Ik5?}v zp^SrCw_&pjBC<|O{Yjb(=okTgkh86m$m`H}TO-GeR2IKhP$SNHjy*ofL^>qB)` zB$3Gi5jT-y=u5n_q!GH3npo>EhQ5E`-TD94yN`ze%<3=S_JN;em$tY5P#u{gXMhK# zJQmIK&OLg;1zNptmRdTUMJNx7`;W_K9e#g8UQ{|!cO8~Wl_ug77$+$`vGwNEyD4mY0hl>SQDc4sB4V7Aq$#$e-81nsh-(TGcuNV z{vzoP{r>Hn<#+~iE+k0dSe46u4wKy2;Q?g&nicvbV^g2aOikT3&>@-0$vEM>3#L@h zoROY<-=rn2D2l|<2GQ-q=HFF$F)j`u?>JV|vZ6X@fAM)P80AYJ=$~pk<=>lfojZ1^>PG$sbK* z0b!-2Rt#PfhzOI-qP~GW5|`qBfQxQwW>)WzA>!}1(H9YKU-!|Li1T$69-p8UrIUV# zug(xZ7N1dgmP&`OSAB(S7uFX*Opnhuo(t8_dna>UV==NnEO8Uaf!{8wCU$Kfu{}V% zzfs6+1R4OzvliYS5st>5J*x(PT6;-%KIr5RIHn6&_9hFzTC6WgZsXAt4U9$d_gD-& z;CN80{JefVgqVMnB1Sii!>R&V;gjuM=7m-FZSY-qiTT&A1x{hMeLm6+A~m^^z$K`9 zQEH|8tVD%M7=~K011hJPxC+2CZ@q@Ya?A36@bcZ<(MewSIFrN~N6hgHpnH0Q(H0-T z1!P!5^Q2sx9Q8rI13FmE^%OzGkZ>SpWJ8F@qK%2ikQ^5s-8ZOwO9l!>wIsPzJ)1K?Li4g@1 zhk!)Ov1$iQn7n!GP?vg<@|TO2o(;|lC%5Y2o=M#9PNb|6MC$xQJOu_rl>WXiW!$+% zX%*ViKQ#={-(OmEYKiRsS*npB)*_p4IXsn4o5?Z%RP95k|DkN)LBwYV9Q++I+$JeKBVk7`;>!7uW2{12 z5M4C}OJA20r%xUj&+&WwiH8 zI>d`jQr}340~5z+5U}C(u(~8I6cEU4zSiKXz#PM}*oB43lHzNOeN0I+Uu$b?_r8A? zLq9n1oc_UGKeVY!H<`9<19HRZdD z`Ap7IWWk=d`SvCgl4QEKiTAFC$@aGFNP2U2soa-zcl>VXPi39YP!o@@_s!owg5H@& z%8LEMGc~5md|^K}s+KBTs5dPMrhZCQyjnRsi+q~`S~j-Xx$e(&viil>_{$HXSpBnF z$j&FYWgZk-Hlem>1Q7v}T_^~ki9Z+t-}su@!CLes^Rq2B9&xx6ygM(|y0l&K7sO*} z98Uz2=bO#{a^Z_*5!DDr}_~=0CnCjv-Gd2zpkExOtk+;)Y1kfZ5i(h>TOZku_!2*v!&J)GG?3GgWwu`0 z_(!(})x_(EHsP!7H!Pa(apl7td7J4uwz>7KZYJZ>k%;RASgNwc{5x@QVJRU(*3R5YSFD4PmYA8y z2Xx=drAy=sW+If83L5PiH|5-AW!n-hPc6^BbodL);CP#?1{sKd!Ycfb=7K~|7Bqx_ zibwB&u^b5$!;;+w;U~8!vEd^`xuKHM(mzVn2j%lY{}-#?u`qDyLoL*JxhtEx3=MSC zWfIJ4GE<5@cys596g|FHwun=3a^D;{e8(f7hn%ZG&c_bES89e;k3D5BaqMHmd2v#4 z?L0O0I<2k`Z|*7XG}7!|ImyUy%JM9&2*OA3z`X#tmwd0vf* z0;}ptpj;AN3^C96A^a-nqJzSxjYsOzt|^3yiVB@fFg(ubhjWDeKBztaIC6=S>>|aA zcN-*~_>`8I=Am_5WbgqQA(CAI0nww0vw@PLR5{29DcGk&ZUUdW9HtpfR^o~A2k4<{z{6q_|OVL6#WXqKHfljx;Vp}QpTwaA|4Eu^m}h-jHW zbHh{+BKY3+HSY!{UubX4FSZ}5*K1y8HL89epq^w1-_o|dcHWH3Qsv-7WCzuN!`_4l zTI0XSOf$EdifTHhM!HEKNo7ALBqEY$6E^G4&(Hsb8TKQ`Ab8^xc{U+{Sb}EOl^#K@ zHA!HiHKSg#3m1BtT$q>Zj6l{gEK~VpGRZ5>o(E`40KAl_n1#YU8Gc5q{!g5!5as^9KFkbqVq%5TbWn~b=Aq`j109w@ z1ljVYUmneDz^%=N25?!Aib(jt7KqKMJ?)Wa;03jP5N zsxk;SyP%I9#AdGnw0H!3E)K`idBAtM2VyT$S#TM?sDSC?EtUrtw+PV+Q3{@>c{S2# zw8$zFyuyvoECL^xE8gB0VY~`z(!h0y4njrUoCX888A_bNxeRY8T^T|1$uY;9 zUzksgE22vXOl3l95Ll`dSWYPLYMWi|yFqRK-QC@%Bzwt6I5}s=5dlx3va6)eL_ZkF zgE3@HJJBZbog!;&?NB!%27a>a?=SMRb!o0FrZ!OM&F1dOJ$4; zA6*4jYY`;2FGj>b^{BOY>wOm0I{@{_k{-e+)X(CO0; zZYLwiLJp+@wo;fi%L5La!UWfkaM70F% z@>h=C?Ji)*101o|8qjCgO(?X{-TB;^OK}vbjPVhiGJyi5T6^E>DtRhtod7o(+Z%y9 zs_#6l7;-fA!Epr5{X!7DtMDd*6WmSIO)^eT*SJ z1x44u@%2Y&i+a7S1~JBz-b*>yZx87L!N*ZVVdmgmgT| zYzf%Xf%cdH%!$w}!O6)9`od!So%XsT%h-YX5nZ5*NO^gDU{t;#m$dMIo0v^JActwg zW}TncfXJ1!L1lj(X!+5^90BhZ7Z(==N$iz{{0{2^W(R|mU-CzBd0^Phb4}L>u;55c z`M_SJ$^k`PM#;)2xs;orF+C=^LxaQTv<8@^20@p)%5_Jn37rw1_?2zJA8p5}PA<#lew#$`$31z8ne+Y}bdP{OZs1F? zI5HuIIb>KwWaL7vRU8F>@v-VPri;aoXM7$#DZlOHP(ED`EYq1nOFl|;&GW|~6c?J)qtjqdczqJ_D7MRRGE3SKX9igkRH`}(>WH?ZC3 zvghF%*KL0~;#9)QnBu_~2ws7L0;5h%!OQ#Ib#0r`Z@oJ4oR9l+o;m=}3IHgbNYz@w z@G?!uF-e8_u=9ckvKVxjB?lMa2w_Rom1n7gtAm}Y@mwxCz4FK3c@PRLG4+c;Or_*P zN*5U!ZlCdH^n;tsYLS6FJ+M8Vm{766**Uc}e z!1Hkr!ox4cQ&Usxbn75h^rEyC0hOK*3&Pvo9QYR39dyId2J;_`_l5@ot?bL_ooOCZ zYs>a+RkUDN^=v%4>KTh zZA2MSMFXN&V~+eaSrtdsQsD!-Ys2%B<*zz-+q>BCNLWKc`a7k6Gw-(_w)xjSuv1U$ zZ6Hq-Zj70xii|DNFAKxWdZw;Xckw^rc>VG4R6I-Iwb|l~`m$8)3Q;*K?so^sfM>H} zA9YLAXfr1kq<_&YS7R~ED^lgKLtG*Z>AQI>l${Qj|JI}?`x0hE`|f$&slB^XKl`}g>KtJLe~_x z!I$It8B%QG6tuL6>;~Vh>RsWupm%-pNVjaAwicByTg&5Gj~1fVaoH`lelaDx#SLE- z-H2M}yiy^_j&4K8cs9$(U-!AN_iML$v4qqE3sJKCaegni@n1v!OJN>2&VRN%% z+oc?f)8jEe28FH<#goq;9v(a{N?p!dZ3t7M^@n9S6BPo*E1J*(0k|jrQ=q;&KnJ{m zsbt)PxE5wx6;1SNm={NMSyg{29du#{ocJ;%yU-!0{E`VzMocKzWJ!>pyui78Daq({ zCGJNmG^#?>{M!ky0F4gnq^wV}KRbzawxzp21p+Cq z?eguWAo;-@`6&{yfJO>cs#HldmRL8FKLjf!vQ`%L5w}Mm6}lh)m^?>~MUkJHH)LWk zidE*Tq7t9OODmszIus$mR}$SysO4zT>3PoNpUNr1z7lO_q1HJokIVAYd?y8cf;(}9 zo>qDHt2pr)nN{v{c|%d}wUUh2It9zvr7o>8P|O$gF5B`brSS%}Qc-x^WVGpWE!34D z^@^no`1u}k6iFqMkgt*}E=Xj;CrIu7;HEet+v!KYK9HI2YiLHh_p^Qxxt#bR5iL7r zWX8Mwo_ctiM6daS3w_dXA0hASyuf*et)DI%^}k!+i?Swdi}zR@?Z$r9w(NAlF8P5T z+CRJR{Zrh_)+8KfI;{MTi0|X44>FFw(I5<_^``wOO-eu@Qm@{2n=r{`j0J@K&&FMF zBDy-EYoWn|9H^2=CnVJps#{$yDo^hgb;<2&F)H}p-6Mq1B@Gh{y{5};_Ii1p;tv1r zoS&@U`^J0pdz6TP%Xab~RrwqIXH5Dv(d(6cBFwa9cp=1TJA~xUjgJRX_b5>xT;K~8(W~$63QaBi$xeTQAus!SXa7?5YCEednO^It8fOhU zK*VKQPnwmwBywGJ+B|SRXgp0@ptBSI^N5*;DQ)$*hyTZuDJeKXI+3UM zPQ8_kd!KR1h7ln>KDt`=lb_Yz{pKGCDru@vrSLX2%~QGV>+54!(G8DD!Yh#Bz1jT; zUlv|3fCj#!KOIC@?oBnPu^8#R9!5%2}dFo5dD*Qd=Gh6sT` zBcIGVKWLgN!jdUT46Cz_CwRx!ZtezG(X~JAX4(W(TOX1;@Ha;j7d~y2t5~3;rQu!H zG|M>Jl-_Q?lTPSYy{>pMnDaZ}la1J|@A&d*2Y$xHfV@qA`sX+HiMrKt^8Hk>T1rEz zG2=kp`4|Da2UYi{COv!YN3HW-AJaLc=Q1}H%I0Fm9 z=0liat9|w)BY!|7?JV@uMRGz?w5|NS&s1!kg%0_PDh}TyDXvuo+j6x+6~PHHJH8IV zo-0=5cjgkM(BG!%=h?iw&yx~%3`c#PbU9=(ri;XEukZFPT4jI9-!@DRbl!P%SML|5 zq{mO*0+5qa_RIC_iNim9u*KV~v(cRj*q^#@4oMOVpF^rlZERm;%GaA)XgqZ;kl`Bf}Fh zH?gd#ari(QCm3IcN40l4L&6@}!S=TrhLB=1$CPfkReSYN|9$wyfm>zt6JdProj5~X z3T!nOjEq*V^ujkGmvsv0tj@FZ`d^W~5`|gXOf}$xSKS5+Y~L;)s!k7iyALwI68yUt z(?Q>`71xo-qY717a}@T(U_~XYW(x&q0Bc$r=R)RpZlU>cr_OX!4d-}j zgXT#xHX(2*n*it*uRn-*`TTPL?gQWmZ0r*WdRCzaOJpMHd?VfANEX^cfrNYz3(;I| za+dh`@)8k;PtawJf8iQ~sdvv|do(7>>}NZ<^IlUNtOF(>^kAI?KY)!b*Kv2H`Oq^V zwep8Etp!?YtJZNrznoP9dP2?$NIxQC{2gW*`-+k9ScI!56hJqPV-F$|@+37+Gjpz* z$yR{*(BNSbCYGJ4_349OXH4Pn4p?GQb!zAn3Al8j2@1u&lPpZJ!VFj@fw!mdK^qtHR)4BBS!UGF!jb@n9Ivj4Q2bfE* zy6|Ym9{s|k79}F_c!ODpVteDoj*-VikxAvgL&xzSaZz~n?H@CGJetxDVVCy&_4Ir~ zfBJu1c1~Y(AM%g;{cXOQD*`3xO>oSL@VBPFi=^bt@e{tEvgi7m=KAd1gT?=Do^NID zM9z-AsjRCZlzRQtH7YUN8t+52@PN{`PJ9z@&*N6PFXPF{FwD6=lJHe03^5aFUE zpS{J|nyo{#pelFE>ov%OMoix15U9}LL}VI^Sd$m#$00;zX+fjKD}zx!5-Wjg(H4igD6`j_9hU}-1d(}W zTFPM`(7zshksGJc8t*^U_6&or^cMYk8;JBJN4RanZk^LqGi|9ML5pQnqe`(Uv^o|E zMc8VbWU`A#=Kv0>pXjXk`9c%a$_Q1f$d!bcXr1}14$sBI>qnBtW258`ys*t(*p5Tt z*lW@XulHQQu?JKkGG4(%MrmbSHLYw{3%+QSr689YD?xa(S#*_U2zGHX`geD&r9)i}T zzSY$z7pd|XTzR#CbK%bY*5n7}s3T+YYTPM@VA56ZFgI8E;u?Qu9rJ5j&8IUwcw_pW zH>NY~F58ilH~J4&fN4suY>_Oa?P(%%mE5#uQhr17=R2k0V|j!7GZq11;2Rb4)Mn(! z^$=2W4RYVqTxKoqKBOw=qlJ1W(u|gzzMH^tFS!Da5ic|gj22OlFR}UcP$N|;f&c>=eDhNX|&~3F%*g!lTqkOr?#iUg9yY&{`*{qV3V!k z)&g@QS_xhIxo2b9lC`z=tY7^?^}|~M9;4n+a(ZmP*Hz>s%gOW5dIn95;y63CdnO5= z0hC`V)-Z7i2~bU)h@PyBpO5uhNy;k_FbbNMoHLqkIVppk&u0oo2|WBhu& zQ-IlUS*6ah7mLcq$`|q;ngiSZJlcbiHjBf7{Z=7l0YwitlH^%fs=*E>!Y{c9g;>Y; z9`-`dDbHRZUEv+7=d-RG&&X>|>oT-O{g4Xl_0c3-&H7|gOvU-HpXP1Jru#czd^;Gu zt8Xf7hqeuAInM5E&E}nM{W1BM^E#XFIf+l6qOEm=kXe9Ss0q7=6~J5%hV6MWb?d?}dja=1X9NpN&)R!#ob`>JsQ%Jp_3JUuMSb9Hfp2B$;5Wj#G9HnasZTeO>EZYq36O!fc*Fh~V*g8f<@~TKI1>xWyj*z00u@ zv&&JIZ71bayxh;fJ2}8ABs|^*)UI5riUeQlc%pod%((9g4gvzEA``+C&Zp40(nS?qMVjsOp~>=FKc|g(cw(GRm1(aMyf`B z5R^66D>Crgd?K&FQ8n-{k^stij|dvhAhH33GjyDv3JuE`F&;y@WT}1f=kBT#%K{IB_WH6claH9~Tq++A{Y>@Z+Uz)hx zUu+=Y0aZoGlkFxf6stHu1Uwm|WOXDFl7RjBrY>N7zf-h!5Oc0=$xc1~JMg;Xd4-AT z?014blrmut#<%<;)CkvU=i<2=Ab`_H^GPTYx{UB&Z-2Q#4LGthisPWQ=3a!CTs`v= zNZAgYJKM*K7^w~3!?RAddzxnoydODO=}-=W!n(Iotq(NLfcI(wiVk1oJdTnX2{iv3 zf$~#VpS%d=q2`^rdgh>1cTKtI(&W?Rn_j`~s=97MEwA1lEV(5JfsQr0HLc z(g&UV%j!H=CR(;0CwMQ37Myh|-=&_Wry`ysOBZ?+v@;{ z6qJteS44cZe`9C)%}60YEt?U&8~B0&hSET8J?Qj-%=Y-fTv}O4f4$oH2Ob$A%5V$= z$^IlG(9RQ&4YsxOgQFf-CaWHS9iJqW5WKaI?_TuL+}Z*8-U1{w%OKR*0oM9I2kbod zx|i1$o@YQNcy@Mnrd#1zQxnoRA-xnoCzr9-p$jLP z`RXai=RjFz1J72BKDa$7qs@kWKZp#KO-;IeH4_5D1UxSsG{IVF=QtbLzh_Q^amRzo zURM^jTZ`g*5CX#9FcU2D?X%z*oKX)^r}cfl98}2B^ zK9i)cCXA0L)3>L(L=2SzGiE>v zt{_yParGLLHbEL-`9jqP@;;!V2p7W5*+WU1b7~dl21$-0eM{Pv!;hp*N|4E%z%40W z73*;!nd~ffG0Q%;rQJu1tXx#h=DH=@k^79O!IqhV3cNfk5VI=?i;vpa!Q9tUu zF{Q_{Dbbjk+!jpWE=<*`(gTQQmpo2~{|>e6k8#(sQ;cm^A}qO;4rTH=vZGG6BPO%| zoXkXzl_kgYymXcmMtqvd;zq=;ej#5u(+XkRMSX8r0QsFX3y-9(Fj*VzR&3eVl~3&X zu7C3erBQpY!=9Wtw+1&@;a*LG3`00}9L-u5{%UGbJiu~Nxu2J@8thh!I=3I!N!fuE zg6$7m7}k&oa|?`cRvei0u1L5rni_vlgyLXCMMtxIvl8~*8~m=%=CC!W3ljMhF;ZN3 z=6$YRzqM;67kAS2t&Qh|Vf_l%lH+>$%tf!Uun{)}V*n{3G+~W(KWTN;1!d9y01v49 zjIOeTp3oLWXuw*}KaI3jz5JICJ}_(ESh^*w^C(9dyvd-~FqCI&L{8DFbkMq~_f3r% zPCkc*nVxB-#i75%O$Z009Yt0#kQkHFiHr$m+GJkXOK7M=5EX=j7Yk5?BCj;i8*sN` zh$IeF*$wSl+zD)Gxt*5Z_u!%|4&}<`G(ZE5!Y+gJ*+kghYi)muFC_3%4}HZi%Ks)& z=I8`7INi!=RZZgb04psx5xf+=wD1UuVrViB+wY${vbElL>Rk%Og=v|N#vdTLYst%# z78;+dRhLo}T599e64JNW+q5epOYG_&8~X=^Xr+t%z z1e85|D5OnVfAgA4Tjp`8VJR`&+bXn+qu*;3o|MDGxB65;>PzM$v`Wt^c|SFB9V>r; z3ywquc3tbk?1gDl4yBd#L)D7Jy=RwBovIk_@0gVD_h)I(4Cc#?sZ>pMcagu^3YsJg zzEgbJd=rz~UA!eU6tZfPAlwXu_NaQA&~~1J+xdqn3*$IFu2%T3(4W%V zxj$KdOCHBF{6aC;@V|OiSs#e6;RsiZ`5bBy6d$H$?_u)~uK)X7H#ZY8Wsz7-*L%UW zcCrzOgC{$hB?|c7%HiN7nN{_>xsbV^Ibte{mEKEcmg4_V12b*6VtPj6V)4ULzB1cY zvF;I=IEgmN5pF6&Z^{cK8c-@;E)X`-<`x*=&F@}MIUob%+&{D`-;RHIlst2Khkw*T z*JhmA`7?4=_UF=y_eNPV)Th8T;o&AdWEq<68w3#vfCvPBvVcS#iEo2527%k&PCvf`u$gxz zU&RadEi9$QJ5Bdf6`tv8hBUzLy$mKO0LAtS2aevm_8_D<99QsyJ8*yF=^oFxrYJQ zD}VA{ONU!(!8e*)m^z}!7fZdTXb22HU*w|^6D#vez^?A|50e&MM2x{Qfkec>GO$Lr_wun>I8alr4uut2s zON*?*?JR94A%;Xk;#T=TR$_WKiHwV8=W)L=m;^niUVU%`{9AXu)m2Z&>c^o=0AUaY z)e&aq`%^TR8zDr1$D0Vt@xTT!_4Y>p?841REj$wRyxzx_tAKJ`O9HJh+OC8LOAPR= zUY8+;@b5)$2DWn54e(R@&L*70!yN9M_)g`w(_&FJ(I?_ z+ln+M16@*d`_KR!{-?J#N|uM>m88Go%{%u6D+Uk^<$1ie_jK*S64gVrR|$4M3C6+} zPJSx3S&D&E;$kTQtJpTc&Y)D~z_VdSkKvNA{D&Nj<6mw)YY*W(>hy3&49_{_YYPRpN9(?N*AXcahP&ayQUr)ZM1$mJ^=N!>MT zH8LC1#-1k!48JiOS=U})Q!3TrAE!2RiSG=Sjv{uX0vbBNo`fO>0yRVq7wez_1H+2{ zM_BRcOie`;R1A!AHWu49sk3g}7O$(H1Q!FUVMY1tJYB{`+}H(`Ci%;%X0fEgL0T%8 z`#pecBp-1&9usRKJ9<@*1}BwGh3bdKGZorXGTn8@JavRgxjMhQ2lrVQ6VKNywlH)2 z?=ZC#`(+X~MOmtG{`IIQi~Xd|M$-2H&N>!dIV*S=(o#R28C$iGoconw^{hFoPA31B73c{Z z7+9)>$(oGr!usddCKVw<0~(m1`!?3^1xJye$nMV9b|@(7JU{0Cm3&=ZUTI)|LU+h- zumtGq-3h$MD87Ne<-a-S<&j{pBa|yst3R;HsE15r&RNw6Z}YXg+b7Q~5gclMSHd@B zz_NP{JUEs8x>HIzGvl>zXv!IjthFyS-YK1j$1bzL{6~HZ;3p!&!gh2f06GGscUZ8A z*Z+syo$~*|TLuI&o`4BJRm6iTcR6hNW@x@R=cgHgpw0dCw9gZ~wM}lJfxvY9qIi=< z0*6*t^3&Nmd<6c;snG71$z-PYn5tPkG*@ga=lO6RdT8RIlB|@_+WKy8!%n&4;K7Z6 zPPG2!Ob^+@Ht=q+qR}3z!Uem<+bWwMgexcZ)*rSA6y=lPf$ST}-5i>TMEnqbN1aA% zV;hF#J+aojnOt_Qd_}Iy=O|s^BcHJ{!&3j(Sp3C!tG*Qo zTJl9do~_D?4AJ)+${~hmLjCMzvP-s$Kxnt$FC^6jkFfw-%4j&*5>qvmVV&Om`p7Jg zQXU4sal2U21m$%d+U59XZPqYIUTVvm1B}A8bH0YRW9{L=McW5dc0qWVZMkyO+~t_t z+FN6Wr|*QZ>v@MuioKt@t%j}ynj|=fnkQVzmp#1~-~s=!1Pu)2v&AOW+4IGvDASvB zh5oY^m(!W^3GUg-s3UGouMXP-qs3l=S-kc8$M|9r%0eproD1PpnMO{-Nq4T2hUzNg zozPp&47e(;>EZtOG8PVpLqs#&Wx!TbDVHNWk3G`%A3xI!of^!c(k^OBgG_?gigNcB zspdl4vG4@rXgF_aO&mO67!<0$yNth-scx6vW_xthmvPSO91RcX1!^yYQR5FDRD0tr zEBSAY_R@CE1n#0RAB0fpe+^aCVhu(8o})J?VNv|klN46%=s zu=)~Jtk79x8wj)Efc?)&rQz|q&^f;|8qU(E1-|_wDCPF0ExDZ|)~5Co4`OhT^!s8% zuT!%=Cpk%Zcn9T)5m8xu+jwc_EWytSAc_Q-pTdl_!_a7480_)|TuTA?ONR)8cnXv8 zEkXWkSS`BVNTj5EofXu z#Xgb^os%;m!y?sWcM?dbv+?iD;(+~NTfbxR73xm%Q2Zzh$j^V?V-Jl7qI6Y^&_`mF zjZ!q03SrjNJREW`t}pi#I3_?A$^@gKU)YxdJ~E^INA~KRa+Gdyh~JV9U3-Ss5W6(Qwvmr<_rsVtu)&j_JgZQxVKCCk7a{}n0+tR&VPOn z8xW0u9jiY=U_O=FY@UzVHSrwpA8vgniR?5i09xXTTWd6>Ti?5@ z=~U7db*WWP#HFXwevm!1w%m(^W@0`JJ!dYbN!BO7iBIA=O1v*OP%}wf`2*+YKd#;% z%(1x9qEtb^qh?e9vnKc3G+Z2n`8ESW`Qxq^2_4?(FrTry#+Lh+Wbkx3Jw2v8x}!zQ z`D3}`_U=AvaT;>yQ_;5R>NGJ|<`r$kJb*DvLPJuxLNAC*Nu2?Oo{o3Fu_T6~Z7hX58fjkM?$t1`?I{3cyzUQ8hsAn%ffKIXORdC zTW$Ar2gE|j&ldvMeH?5_{<(t36Fi(3T#fJr`Szmrc|C4?SplABk*GmMt<`#ZFGKp9 z>^SerK@2~e!`B?&>DORyW-{656*&fPfO~3+Bv!x<8#pM;U`D?~ky`y*x!;J&Qg_~= zvB6tgJ(yw7PZp|YAj|@%0jqW|wZ*u&w*-!V>(35lR~6PlB5!r40Qh$4@ zJzMa$lfV&Pwr!)>M3b|)7|5><92zv>M*6%u(YZeJxCp3WqyN6%8)!uet@p6>389Y0 z!xrue){T@r+`{U2n;kEXf%hl*6ezg;2Mtd>ksxhccgwwo#N`uL4q6U$I<4M60hlA0 z<@RJuPZ3HR8!T^o$tfz0J{9h_WQ7Nhc^SW)Wt;y##abj~TzB_itAE_mxBBbrQeyW@ zzL&n#+%lWZWAAnZE?cGpofa7b;2)U+GAh9l~va0L&qX?-0Pxbuu&^*W4R;uUKMJ&4kP=|7i zR$a{KmWd8HoNW<4fBpCbb&9L2e(ko+p$(;u|o-Xk15 z4#>n492U@)RlB+2MY&>4%ZL?AJv=Ea*AfiB69=l`xO-Vp2t?*Dt`^=6Z#Vm;hheqM zDhef?(Qp|Go!k`7QKL;qh-)>zWbag#*6$wQ205BuO-bE4YO5Y_YJK}{BQLBkDT&z( z;Qz@Pu3Oj4ot~lPuO1!*7p1Z+14HTdyEdij0C!BQR6GkfDuCaB2cX^IAPK-z0;e1~ zba?v9d8R3y)AJontBDdZ*DmC|>iLKS=e+@wsj_0QxDrnhTi>!rOyFSOfis?tIVIy>U+d4J?XccDPGGdj1xGr|37 zG+q0iCpKZz@@ByEq9DJZJaXA*<_gd#sPCRMH&l|iZp-wzIqaTh-)c5`ADNovwEb7T z?myj$`NAgfIvNxOfm<{U$-e(~jr4>DVmzcyotC{HLMI&rKYl2m8ClYt2B~rG}%LC&t&pJ8uywZ z;e49_=I0mlGu*)Z$+-akk&5!)7~$N9jFHAm0HRQBl|&MDzgAMWtDx4t&TcuL#dKNF z5$M4yxPBXqiirOF8{b-&!ARv`M?YxH1$743jcA1k)nHd97ce)sfr03moUWwUM$1l%VG$vBgK&ssTq3qn% z>K8aKB2|(>(t^ICTs4osf`uL?6EVUUeSgGEC`%J#_^*1jUa>f^5qqKNj7R)_3|EgD z%Jxw$UvHjzd$1LJ2Rg!8iZ0Akt#I7X@E9_L-vCal02Ld&cm|;Y7t3>Y~qvmnj>_ zk-H8jK+n69WboWx885iH`$BHzoM2yI&3!Wje5yzdN19$wrh37{Sl5h<%k%;?zY8I)sdj1 z`S=I9asH^TA17~%tQ>)pwFmu;hxgv zceYV=3O@(8fMgOYTU}e_sAn|ZC0mrlN!PmM12woia8{eEGOHFq=;=xZ6nkSSsUzm| zr7?RmYgfeV)b?O74$8sk-(P@+EA5ikUjcU(_E>x3DxgQ^v6WCBuA-ypGSK7xXu#(l zKjdqL!{rulL{I600M~@PDc=vZU+;(;x2wJ1AJFugQZ{3Q+sPmY*X*|fdpF0hw}~7^ zGy*);$X{tt7TJQ?UXxKSSKF#|V`wBb#p=_Sq9nd~zE>Q;Wyih(3o?&@6V`hOnX1n? zR?weuvF7iF_8(wOhdL&1pbw@jMgpT9f^--jmFKqi(8Hg_P3frB9u_Y0&TqN$S>^yU z7(}ZQZ_tWCzAXy6&*b?hH29I7B-|cdH5CVAz|}k0|CLOd%sQ=9XLQ>hNC&d6c+#CI z^289CG7J4y?;Q)_BijkY$prh$asMqJZp@m3)HQ!IuNz=EjPjIlG+C?hjI0z;<6d)W zT*t3owvJ03i_1`bST!GqR{YW{T)RR6N)*5&2j3IXA(e~--zVd4nc ztCO3jup)@$BFOl5Puo1dt#{FV0zyM@2j~4#zR2MGe8A81Zpy}5Q*$tYG`TNc$5PQ& z79Hp%Yv=H&-fR~rwm1&7ixou1s^Ws;foTQ2^O9m8*`SXroq@nm&P#LdE8b(Zw!7D! z6IUg-th=}S;io8)k{s4a)s`N#Q?<#ozrd8c{=S9dO;zn|Mtb&Nie#iQ02C?84y0y1 zxAhyaJ~4?Pc1nI4A($h!fcz}GnROqy~L=fUi1iZ-a*u=;4s zpsR!pT*>=^OE#CwL;rJVGD8a`-wc9+!eH8PHIKfgHC|`v2p>C{7mdhc?-T-EU{VHw1*qqyt!U0K5F&A-gvtOGiFI8wX}Ssi*h8gSKUfTQJ- zp-CD^OiTTRH!kMa$s{~?Hl}5qYGq;z$;?EVEKPP_Uj5W1a=3x z1Es)LL{t59Y^v-BKo8*DTLPQ=lI|@l7QVE!-(@u^jL8&%P_=O00 zm6}-;NaIZqYS)hZ4$rf{AFB^_x0c1lSbB%3x}o(GFDrHRQ^bGZTFoMWqU;i=Xv!)! zs|6T%2IFEY_%}Z>sK5(W_|>bq6jETXISWx_cpXj>;Bp#Rsyu*3JY#GXhGu4FMng$9IU8VjnWMM6~rD!{KC0IE=7*$%(l=I=A+bjI$NRL;o-`pNzgL^>fZgb zK+ym4GTKOHINbLJi5B^M41OhPxg{ScPi?Y(rurgtCHZ&uf+>#oDjFFTLkP3`bofT1 zV^`>3D*K@$Ii$|?w|fCE(YT1~5(WrY6vMzP<%0#=c=P)nNb*%t#XS$}@}sQ2D1$;v~?!`^WV`l4l6|mWptq`hw(eLe6D!mh_Ja>~va_1mF#+;#)DViU|76J{yB3 zSOAa#802!tjUaDBLi%+5fk-(tjX!?czj6xzLuMsQ-KJfIm&*=+-+RwETlVv3glm_}=+`TO!1M-6;3@%E{5|@zJ0+&ZgzaL@5RVe55S)`$#oXVncL%xh@3jEW* zt_dTf$l>IC;Ie?23G&K){wa>xU86EPb#Blnsv0R|pgiC9yh?a|j=||Xt0n2-JbSK1 z_qH`O1(3p4ysa8HV&b^5w}(Qk6hS|rH*dmz1o!@_4LFn)$ASW3N6zT~Ty#np4X_A` z#?22@_*K)hJG>7>gXw*p$yrq&7R*FT4<;|`SmWMVSc6N%S(9|D1TJRWNB5G-+IR-1dB?OIA1lvEmkV8ihI)2(7-)o*-M;*rGLg2R{x{1>4tmao zl1ZHO>UDU<4#MVSU3EA15ZFTgJIRqPx0D(ZS7@9TC$e;EKBJRKb@t)JGdWx1rW*f5 z==ms`3A8GZiC-0@)F+3aWZhPgka*iFsuD=X5dq#$EeJ4QMX7lEdB9r#`STFaFs(R0 z;hch#t1d6Px-b(EY)>W8exP}cJ%Zy}mLT^{7EpAUbB*|xJUEyzEkEN{le>sA8>x`h z|Ms#vN>V%{)oPRV^8Rjo%w53diOp=m8x9bAh2!9oq+&tM;-qEHOtb|)X;`YQ>Z9Cj zQO)m`Y$55h!c=F{KBE|-B#3J?_#<&e>v9$I>m6~-@PZ%CB|+sO*GI%aZqE~AUftva zqUdRs{hhyQm$j_RrbAyCQPc+b|6F_l052W&#~Rn%-pg-G!h2@LjG^1r0)KEL zZ;EeCS7EI3**`_o3 zh_=7E$YFp?*3VG<5BjT&ETwTRU+MIzrg+Ey@E)MNr86G30l^Va`ZBkv&%ej(#T(0e zxa`a9hUuHK%ZZQzd)^LTc}qdQQ$F%skfYec)Qa9HZS`Q$&@JdbfXM`Y|`L0mTq#mkP&Vg!OH8k!B_+S;cEQB zxz~=6UsU=~asr4L#b5i=Ov^zC7xGK~FOCFPP=4Hb0dxJ36LyDIlVh6OY1@a3(~PM= zcpF%ekC&?1&!#&)A>NH{1~4U?Ef4YZQ{TB`+fV-U4~nNmK$^{SJ*=SEh%NA zIK(O4OQ$dRZv{e@Jm0CTDv%orYqQ?ZGac zV?_Y8H4G`Fx}ZVCs}kIYMx#w!ia70iWQCG``horuM<%n~FDTvjUp7N%RF;1{?D*|z z{s4U03aUK6dUr&@JuvKFZQV7?!J=s6krfC$-j;4<=w{kB{vn~|07Aa^ru)`HZ}xII zap3b@bnZ0{3Qvq1q|G+*Cu#QQ>9dZZQQgE`(#16CI;tO-VIFBm3cbb=}($; zk#U(BDfdar3wWDoRP0}tGq8@UU$F*RU7dk4+U$VNlkA?WdzK-;qCQ^s)1N1Q$TP-A zegijpmU^|I#bpv?GiJN$n1IBb!GJZu^Y#~e=nHSTQoNSIQD4qq&EzM*tnL#PELO96 znUKAqZS@rAqBJbT_qCr1Pn^7tGPP6p7Dy-?3RBS0!4I@M&>T?_vh>#564@mrgg$xb z`We5i03QY=i}BeQPhBei-TI}Kf3&Gh6V&7px}0(N7b~em$O|;&c=~nH&Hcwwt@3hGjM{7dw@YZD`p)`loC6DoH&&Z{_p&ql68lIaAN80;h(9WgQvP>5+P`clgb zVcYqNqBx2xvMs}|#+IC6cz<8ko8ww^{9TL{w0hQWs_FL0@0un{bgy%uXo{CssA7Za z7AP+wgSvzOMAd*ct*&E5?yz8AW)|0C&c$m@a z1?@ZU#0s$%Xt<5(g}}*6!PP-DO|XcPuP8273RPV|B9IK2W1-%T2uFNY=#w@0)&G69 zfx2^{!|U&{5mNuk_depCY+R@7c^{SQo00biK&QG5j5!BPB9w+dLFwpV?q-hyG zyY7n=sGy}4C5P3fb?->B6~`zu8nKqsfUwE!wp2~B)Z<8)Le;maxp!S=K95Qt zIV9l594Ry(GCO@^=rzxU`)7;hNkOM&CsP4}K+UzPY6;@wbD67HuY?gH$2n z753iNZgh`5R8EvxK=C|I+F}YY3zGZuuOJ}5$JORQkXyh_(9q>I$@b88J$<{&hK;^1 zZ=k-l{(5DxmvwtC!-~PeqXa3`Cq&#Mo=2cgn3?Wi=($k{?;?!tUg3kK+gI^dQGs#p z&jth)s`Cq%*mFQYI4a%gOwfNJh&^A7Ot2`zf-qnU+q41^CAx~hL!_<930X(8@}jKB*4(74cjv^mn`q%#y=!^Q(`zG7%gZ#E!I z)?Ho-*G;FUfaPz97BV6_a+{Y{3{ae(#PwLdU$S0pV(@+A*GfwBHnmS`Y7v(9umIYk zzP0YU_o_i9ffm9~x^iqnAc($zoG%(1Sz9M{&cOXm_3i8j|6p@E&eR$&2fuZXV8@qj zgKNv1XT@+l626CtZ zt12dOC8xJu9E>YK9+g^ZDbz55n>(BNLhh7uP2427R;W`9`Dxh^I)HSrs|V7L49Y8* zJ{|m?ZGrAKkG-?Un@HqBd`Gv%-c$*j@92@b6!y=jy8+w`u2sbDV+@;z!iwj3a`UU6 zgM>p{0Y=!Wl7f%L2LDW}@9a;%sG!-t+cgpT&4~qUb-B zWSNfq!CUcbo0#F-KK}kdwHKM&NUoICqj*|Z5Z2w=d+!Be{B8L6b!(0HN5p>?WC!rN z|INfZBX>g?yF%^O!}IL(TiZk_#~#0udEEx!S=>>?Y`?v~5k?MmoTppY^VMTpuQrL< zr2&>N!Y8}>2t!Czq{--WXpvtml%eICA+m1!2=756DWPN&kj;dI>YTBzD*FTxe>)*b z6Z{kq-9X$7)zFlG{kmQ5CucW0U$76*GLr#ci16MM~a zxbgBP2oQmmYW@+OiyK>mkM9P4aZF&kVJLNo%oFUQogo6B7-jGvY?a?Id+m$jsagwi z7W7|^5u+tLC;B>!B$%Wnn8h^v}QJ<5#{ETXWiFEw&_b{&%e4A+1eUbp346cLEdhD_dfGZlzdW+{Od7o9oj; z($5MCjnVRGoQ{DDnU8j7B=zj5W^SP-+M2P`wZ`2;QZpy{Nm#$N;wc_QS}SL6UqB^u zm-d8hAok!)CE_X>9$?J{Q5e0>SrF_fL$@c zRa-w(AtR_2S*}=mS`9@8crrV#v%+yV+3$B#>r~sxOyO#Rl^Xib2UNTVZ3~ zo6(n61p{RFu4-b?2As`zj=u4D$#3;K65a?;g_YKPCue(FG>DUxgw596Jui9KQy|Ma zj`eSawyN{n)NaBVR+`*tza4ulJZfLwPd{qsA%bdORNG*a{)ft2ryiXb%o zrpq&(vJ@F7Ej9&QKy~9HA^}{U_e#GWUQwFB)D$d*gv)gRI+dl0v@XL|9E$}7X}|qr zGIuflLkjVagbhGoe6}|~+wTrjm?hyhy0kj>eG_zed3q!*$df*OvnzvA!FD?}lLSOGq za#dohU;UZ}#N-_!3DrkOBti`|C@WzWeC;Y zXIiFKLyB2a6KdR=S6T6q$)4sL7N0-SqauxjBduJr9` zbe|G5V9&Rw(qe-?nc&ws5F_nY4#u%%*Fiv}Qhk&OE!n(|^{<*%I5L-J1y+9%AQPSGX$JLdCQE@?h)j?H&8w z&~pD0%|TQVZ^y>h5lGdzu;H~z}R^K8ji#q5#A5|XDq6Uj_h$; zu|&{VIEr2VjJjnCG81)v>{58>)og?(LxZ<^y-|2jM8Bh=_KtSq6JImc3U>87@abx4 z8COE@j|wdf#TliYE#Hm_ABKV#JoQxW_|npu#UL0Os~X)6v@~AwTU=~eEv&P5FNVwB z9F7eJC?hZ-xQSyRsC;F|LwNky42+W5gQ)Nj*IGyw)O#Bq}3G*!@99wtOW(X zbU3F@SP%q+3^jCYu$@SoA&(ELA}F@jAsc%g?av{z(dj^s5!j@e-B=U6|mFVOS#Dc$^cRv9B<@-9{@kV`8!dG4qV zikrs(sC$v@KHcZTQAs!bu<~$lccoA-4m?+BifBnmMLAzG;yKctJIW1J>&+`c^*u2C z6zea2t8Y9dSKYefwCFDZcHV&jP9jU^ufy6eNfF6cSabC0R%~E{w7LcvEu;5_yNaNL zc9jALB^nRjvjmZP7GYjxK^9?w@)P*aiX30^n^o%f81(z0ai$J&c)W1*(P&4FVgg2y z40?T^hbvORF7a^dNhWI(0|H^{Y|L;K!#gPf*S1M<)&~4L^m78 z5D5~qJ7$i^lhH}-GvC~`5Q$#Qe;Z<;)Mm6^ks0|wf}%g8`#`U=7#rnTzhhxcxYe4y z?x6+rE8UE{)N(IyxN2Ud@c!;PwBi6qU7}{--**o{x0qi^S(6|`(IO5H3$FuG$jx33 zjojm~l|%r%dyXIhR2%a?jo-T z?2hX4^+QkB8MuGw6R91HE^&juRxB9t1!DQq&ko#;b0O>27z?rl*`_txFMtV`w$VUeg-8dPX8U;S3=bh7 zLhR1ymDZMXTW!SmbC`>xq#P-z2B~9ba8PubFekcFej%>4&0uLOYduQd6 zEUom#W=5J+CjC@6w#QL+P~$-~peS>nZ^>^Ms?L9DNp}VLHW)qvdoq1{<*{u!`-z2n za=1Aky2I5lKOu{UoN0DgtER<*FB#I&aCaI-w}zapX37vcF2~0g8$4~W)Kw)m_@5KV zILTwaz-Xmv>|6o*huBx|b*89qt2`8c63Xg@9UM31XXD^_gpK`>5A&E2xYzVdv@Tlv z2QMW&yq1y-NDYYc2)K{lzb}J!zA=}7G`oLB&fge21lKJ^y=f<5###1`-HidyIgD6H z*2jr9H@?k;!VH*)0ZJIB;V=$JOMd0r?HMN+!(m;0CfG^xV0h~crSMxnQEN-MOSG$C zfas5c{YeyK`^yr*&=nZi_V zNTC54S^5!8t(bf1y!@gG?3nke<)5mqkJqrn`8=l3-cQydwUxa4+b_hYnp8a%^|s0( zC3&0K#N@N@=?uo_?;7N$s=?FU7sQm5lqDj}J}1<*K;*F*5XZmC-up|xUq0*vV`Rtm z#71y6OZ5{P6cVp6gAt=ty{rcZCnruC&Sj*x)nQqx~o0TV6aT&;nEK)2}d|oE{+dLi?@K=U;HZW)a?V zn-(4UY-z_0Pg(Vo%uyBqu*%nGdzZm4{Z44H$NK!^XuK`-uxN;lOGIM;y_a*xDzFS{ zaYi~3x(=#rbuN(~Qwlh!RvQ_RX35=cwh2-q8NM9)h)Q;M_XFv>J?`NT!*>=)opiZY z`+7rz0h*A;#%Qta4yTJ%7HW4Rg9cQ&8Z4D?x{@lDGUOab3^mBk*M}BZHzO$}WMiC- zuzfH-(z5~d^TUg9C_`S%kxv1Sm!U!*3dz3M+(VIPXJffsjsym#&_5*|UX<*c1>_{5 z{WS_7uXh9U;VD6Yh=O~enKK4A(!T6^F8d#wR&nza;rF5C^x}Pmfos~NYw6jU zp+uA*uRt@TR}uH zwP88`C(ln-?1eakcqU>byt{1A-Wt`XEAVMmL}h0pO{NkO{nUo+IudA~j^#R8ry5|^ zDU`!`XwH~gK$>aO9Rn2xVf6HRZ9;YW-@oDxPtd(=9Gx(%`e&H1KgRUG^=j2J#yz(& z1NiIA0^K&DzDMxc$v{OqX3OP{KouEu6UiT8SB#hgo8fdoZdy_02*!mrrZ`tIcrT>0 z#M7*wMSxhI1|cy7JBjf8tS1b~@25suO(%>{Q#)^cqG?D=i;}K;9~r{2q3@ zYeNFeUf z-JIe2ZL3khKv{MxR{Ve}%9@N|i>_tgd-1NwLNiOO_`1LNB6db$MY)bd=I z-zye8fslixAmunu)@9lJjTFR<0IhW%&ueb;=;yLxt%4#GfT^2Z1#^FRMT%$djQW-5U^B3 zG5sC6Q;(#rdI>TO$kz4;VT_~seWX1H0ekSm@ZvBdkG(wLB1PDlN+amNAE_Xg2^_~Zd>=xUtK4|ZMvHf~wP z^maHjBrM8m8Qng<@irgz7^5M#y*K&kXCc@JW_0TrxgRuqha2U-I zqqDOKtVbBL)?ZnIT;tbJrR>vlS5ec*mfHL38!g&rms%IG8jJe*jR#lKVBeHa$RRFb z`vj2f1ExYzYkoivm&xbB9^wxzdpyfC66l`J<`S0Jf zI~c#4neP7wDqaPJgkSCe2%A%7XS&435!qy~kB6?C4bfkz6JTX0aXLxUM?iUEF)r6f zHsTs4?x>F=P53|d-toJ#=8G2Yq+{E*ZQFJ_>KGlnV;dbO9jlX$t&VNmw(Y#TpP%j@ z@ZN9t(;4H8bN1dE9gz59TF-)R z^+px!g};!-Pt@I)uD2<9&za{EmFeUsT_x5FEjIbm&({la@sq9V4VV+|1yAQ zEp&Hsgb36sTXdb{jA}yp;xch71JCQ7`bIw2(p9h89OI+7B**rzgdbj!H3u8_-?2me zt+4eE(1WCBBfIIy$bxDhzQAS5KC8iQ$#PbzAb?7Kwg0A(ix4Gg z;-n5M;guY#$IVYxHj=w|h<|{9kbbp%PwC_KYa1*;x*~smX-Y>h{`~MPO6vP6fq$89 z=&nnU_7&FnPxg&7^K@=S&)_lrNGGBb zyr>S;%;PH1f{;ncnqutGK)f9nJF-P0s+)Q}V90%eRxNB8*#PHpH z-{D@HoDkwb>t!_^ggCy3@sbFEYSUQvI7%4~wD2l)VWdHGlO?7kqP8~O@To?ex>nA2 zC--Dp_Cz+h9)V;cj~d6gz!?U#Ox@r|@q-}nm(-KR8Z`R}p)+@VJ9xWG7z&rmt_P)6 z6=lnIYcKi&=kgWmR`jCc&GLA?4{rs~c?7H)C^F@inP1vm1o;6q$H9mmI zVcC;7EqP^NQ*o~;oo>6z#It5<@n}#zt%1YH*8W(bC!Pe(WB~F16|@r)+XcZTm%jcs z>a5rvD0R9Gyz5w%YfrK)anAwZWLuHI$m!3-O89a+_3HMPCvhj-?urhDSrM#4AALK~ zqf*S})k@n}4w+21`By3$g`gyar(X{v^GSQJ1VG&nh`~^aPWL>A!%Nfzr@VYex^PRk zw6{UyEIQ`>b!fY{$vT*onajHk9X9gIy!F9zEvck40f^0!0tjKCpqWZZ`s}EqxVZBT z$k2GU)RO2piJ!D7-~`MKn5;>}M)5yQUsqlxeC-7e+zTGvJ5PZI|55lJA7m}s(<0)x zxZd_f)@5%<9jX*IGrH|}@Fn2i0?fQ zU^by54bGoHqK^~=KCf+29Qv752#W3cj8!42?iBFVm?bBC8t%FC`xJwaZTGxWbjjU{ z3W-j0rMyt&F=r9@^)K)h|HKGG@EN|Hby8qX9?K)OJ+Vsyj4clo;IrC2Uit&!pHmBz z8_3W-G6mR|yOjxmgY1iDfg7hx5FySa=XkSt;>u~=?np6H_;_uZyD!61rlZVTaV*)R&w1*b4;cFEFBr2dq zr)1_xsWs-r?>4O{e9P{g^8-at6*o?gd|6b%*DoI&mwM}|xnh9s;P>R+s4z!0OcJJt z$z_K~fp+(sl5hRJ$6!4TaVJD5F*o*TR9tiip_xv9U2^=dknnA<3l!FY)AqQ{F)V*V zaUXQ9J2}xb1?{OLL{5fXo3}BbEK1j!HIdik>v&0s!3bQxWN_!MNlPov&?vhjQCWID8kLghwJXvx@%miBm@L{GthUFULhxii ze{-8={nMvAS7i+!rKDS$>Ej*1lQYH0;K)UmMN8N-AG_tWtl`L!oqJ|6l3YTRQ|~(P zbyP;d2{9>TkfE^LRdO0%ok3?w8iBYI_OH|-weG=jIWC{Leb0c3->=Y(@!?|pu>Am} zTRc!91i(p~MuyY*{S3NE0XUY~)aP8Xtfr^ilt*Qtevtn1OR`L<(0uh^LEst{I;A=o zKJ6VwKxo_5>R{j zx$nnRofm#9$tVA+bzs9a@2maUXlAPWeOYepuJX65oxqNBeZMV0#*8}Zch2wgvg%OR zr!(kjSKVe8J+L~SnU8tamu9*ne>hUs5bsx`bV#ia>lw9Z{coAb>I9|ipvyS5fN4$_ z7|elNto3={8@wsbXD?Vw5D(-Hk){i@c_RN@jOk$ka?m9bA#lrp+Ay{(zD;>#(pnZP zUP%{to2;|y8vhkyA+s&z+Vw?TC$u3Qu!Xm#lNmrmv6MG5sGt{-^Hri>kPqfE_NnaU z@m?K|6H9sLW-bXeg~+`$V!Vq?g;*V$hRrBCwP!EmxrY=xy+9tpC2OeM zQy+m4Da?W7B}h%x(ny|qGG7l`)w|-`K*F{fKsELoZO5A1)#B@*3+hX&HHThqgz-=X zo+v;Py1@aJGh##!Btit-*X4R03722~gbp;~5c_S;HC#QAhEMQZuj)o`|8Qd5=1JwA zC$zHfMxPxc7#cl%ss<+%a?=_Z+K(idK=$9ej50>4Xl;5et)JSuovfBgVDu13VIRop z$<*Pt2c!WCjEUl`leYJK32(E`wv08N6aUPL3+Zg&yO<`m{dFecjuDdaplNhzLO>VF zwy~6|CQVKnfG^36QBRkuxIrW%xFbA4W#^ZiYmCPS%JM1fQn;m|M(W#JpQr7 z3wv1{!vF^v0KOn(@8;IHjnG#wMeR~hSSVpBs{_MMgGiNX^QOhtl5s{zK1Swb<6j+fPz$?H-V~eQA0h_f%b|jnMH$s zqxF0%>v&FisZGvW%wY6(t%$u`>U$aJrsCoAzSuaEnWar-PVv+j4TW4t3KSr+4Z^B- zpaE5lwioN+3{d^WN%a~^>x0*5j4&mly6kP$SW~zk5#|wQV>#cI)JhKWp5NlFu9Yq< z9W+`91?Ka5HNcQ5lnM(P;5R|UbTDakKGf~Qn!l6MJYzan8F}$WR(v3rF1|s1_82Vm z*aK-$@evJ+(V)ASP2zxr0sAmG5jyb9jv&#?<@9p>#G7lt=Jl5OlX1}`IPdW~`Q{~d zjf?kYGeQL0>0shV&!DrJWpQP{4LWwf^yW(5=)}I$3zR;FXOVW<>IU^Z{(~_CD-7rr z3P{PR+G91Xc!G4=k#d7}t8O5a4G73E%xkmn(!n=M{SIG9l<5&_i744Y(B{lF-BIqF z&EZ~XRDOe3ZeHysGp*en$H4^lqn>Uy%408b(`|Rlmp$sQ_{zWs4g#Dd z0eR;^eZKa68VRv$@?hAyPylPt;}X(9!?a?d1}X|qI!e^zo%66pug`0kLn|{Wi3yPq5xz{Z0G6c($53N&049q?py;oDrN(v`GLK z2I1n-h4LW9P@5|5$Kzv1_bQ{!Uh`{MtW5gE$YoT8XIk1~ylQSoO-y#2O=vvTowD&h{o(s#KIE+8Q|(R)wrYWk$rj6eDU!X-8vd{&-}mR+ zt{ExoRDbBtZB~2L!ChI>2Z54t)r8!IkVugyC0u=j$Ynl#o7yIw!KmSQOkRZmkTf5@cz zbsvJP-k;_zp+ZuGIL)FqOLmKYg4GcgLaKuIH%hx1yLh%acP9Q)F16CHQz&4+*ZU1t z60gk%gT{*|o83L>l(LkxjK2Ww8d1mGtK-NGWdWMTqr)}3p#u67389VlMD>WGwT*67 z3A?|wZ$CE|fFzEahnu=JE&g!%aCYwEbtD85Iwnf`JE2!62a#s3C6*Dzd4ui zRm6E9Ov28Yfm}GlOxV$|Xk-rGv{8)NZHpwM7iR%CBaG7#MNpN)XpLE1Y~7O+^wj{X z!}c?}a$pT92}HHIwhBtH_yY*bo< zXbxdVzg$zrz7*x5L}8KJt#i8)QIDU$OkeQ#@XmO&V_*xa_l5NCPUyLAlDAXeV@hoLhQ}Dv2zJ zj}68{$LGti--mg^>@9bti}R}53ga!JpG7fEeIOX$2nr7l?^t(qe5ulCKi2Fq+nsn&8uANS!LfOqdl${vAbaqg*)q^%J|#9&4y6F7TGK3<$Wu}JZFZwT!5fI zIEhxhkO-+w%9Le)-t2fV<$*0iO)IJYYYtJC{X<^p~iB`{jw8%aL@62HaB!8(B)UcpA>R}WdD?@J)DLvIGc%e zgYcy_a#N$8z1eY4{580oXp;`C$^2^XsfSwV+I8$4VIPiHmTb$I78Ia?iUlebBx!v` z7JjGwIW1Z5@LIVoha0a5=u%g`pu^X5^2)|EgD?CQQ!av&dw_bVtp`awVoIyR#x)V1 zet@g!0UwxvTwOk?v>FE6AQOtl1peU}bQw*okKHXK&=JD$ufxwm2` zfq{yBRFzV8D~~!rRcvD`p2m{TuUi}nYQ^3E6NNwKfsY66wti+SI)ugv4xR`4wnXgwe!P$LhoBtmty} z>FM>}Eb~ml->x3i=d8H7yHsv;tQj4DN`k%^Y|V(e`QyFP;Lfk|-JYGNTc#gJth3$} z0yzM2U??4PTihPomvpkGnS!G58yiZ_rZ0Y{iPF1Xu8Wm z3@$F}e(s2$*JC#J(FAM5Y%bzstixCEdQUgMQ`LSie;;<*2?`{)ZxYEUE0lex=(3S0 zV#F8g6z`e3d}buzKG`hk1cUCdWkG68+(D)1ttR(89#znkz{ebY;A+xc3x~&s23;jR zjQH@cK8UCdjjc>tYhR*0@LDf{ydi`ivX-tRww6?{4pPmW~+)8jP|UC_3}9Vw)b^?-2g z)TRU^Epu6xr`f{jHdzp9{|twJy<}=kwg82BTdZ?voG$)E|K|fXhJwv@9PTf+$Gar; zqJ~le660k#-_pm2Gltc+k}3BB|M|K5heqD_rwWdj*Pd@DAJ4$#WoE(eHP)y0=jv3y ze8Am!VlO}Q9WSp@C1#lk{T-OXrc&!Ncyd-384fOVr#uRXp_(8;1kM1n4j51O4U?9K z4SwmzU3UJnwfNef0V)U)#ON13NX0BH+Cifn7Mg-PIszVO0OA_@I{ zJbIhYt)m_!*m~uRr&vBGwXVqec7Y!3_5x&2cA<--nTEyU9gz8)H+z-Y1#=8rI9#32 zd27bpRBKROv^qaJF1noVE~!F#Vz`y4owd4iZC{D5t^j$6c&ZMv+k*kOGRHS$a;%tL zaLEP3O=#()E(GNsLh&+zwm=#kJB_mfIc*`34_w4NM!bDU?=adrsKcSG{vIW&@4IuA z!2C4BCXnIlg5&?7(P2-*MAbM`+B!SFkiT7M)6ReW;QAWFt;@9j$BXtsN}9tx%UL^{ z@jfFkX57GS{KNVT?7h=S=2WK|KII20;Z7Dgm%s?DX?nhrPlK74z06I~XP?ijPQ<@i zmg4Sx509xqJ)q1vXerYf5I|}NdzX$(P88O>(3?PFR5KFv$I4;kEM4683AJT?BT3l# z+YMhAtTdV^x&^@o}wx_RSu>kk8=GBewG zT!K;%CT*RA(gLxj+u$|xtG|s}&T)T=&z{w^d%+1+1};W%P3}ddJd8Rk?90VVvZH^0 z%84)Q%d7__^ZwYc)^KAojwK*t8`LxK@NznpoJOStRnTzbYW7UD887R2`?kGv&AnbC z<~*xnTeD{~LQ?k_!aVD`^ z^`1@x?^Zo2&EY-H<+hPQV?p4ZMFyz~*WYv?pTA^cuQUg|LJz*fl9Lnr&S?@!C&$D=h#wb?Vg+G`;40dv$$q7exZ<5LT3 zUBaH^uN3S*i}Cj0n{MWkUvvk-wS3JuuMbOUZWessme%MBl0N^`4vmMeo-tZ8gn2v} zfwL88%%)D&BOzd4zWY4%9ulZozOeh1l{Os)wSqPG zVpw%Wg+lt>wmeJ6AZxYI**Kj=FpJOID7m@)W_Apg-yFWLUucPd1nD+5W3D2z(W?)y zFSBoK`?v8A^<%4VW zpq|XIDgh^fu6LPS{1>cYssZFjhZ8yH$ZuO8f{R8syOv`PZBNGNsZ7LiJSzriGOdy2 zSA$gbYi2%D7E;#lB?tX)ycZ0(dd9jJX48EG99H>W zO;9nWvG9 z>fkY0Gd$1Q`*QjG(%!KYYx2QWOW=6S$kXEgt|5Mcl`=7xh%dt-H1hPaNb!DHiiKlJ z_BxQDp4T1->dZI;hcG+7jhnPTN)jP2Jk?TM%!>d@*JC5`$!RCST#-6BxElZCx)ip( z7fFYr013N~<0c+tT*wct*|5}?IM#5WP*mgb$~(OGVqW~j+T*Bk0=Lyn(t8)ck0Du4x~KKkZi-NHA4Lvm>0P9 zL;8%#F?H`~Ec34xvGd$Sc@ew{j$*PJcDVYc5O$++1HKU^^!UvKtT8mfUWa-9hJGPm ztkJdCRZF5%tNX=rf!-7Z_!D!CpX@CDxi>vV#>IZ%pV`aF3~ikW!Y0=bjPvw(1Ops% zukQ{j@uV88ic6u2c~E%s8QQZ@lIi=E;m^;wl&G{|>P|`99%#DmBZ{_jWxQ&;t*F=% z(vFa@PTFaj1c0q9l9EHraKhc%{)R)OlA@PLdh9GYWmGpRzr_$|a)@k)6LM#Q0t*LU zDE~sGW$v*?sC41m#H=qGX+8ud%?M|YE!KCTSQ9D)Ombe%QDH59lQWi?x;1ajM2oW9mqw)d$z zh?PNWhy3PRDIM$C4Q|k;PWTzJAp_x`Oy`?vSdNc&QfTYXG^w90W_@Dx1P^*kAsY3# zE~i%~s@UKvmOj@;b41Q-Ia;G`49l2Oa}|wHfl8M7qV{<9!D~NiL-p=@z_evTdlL7T zSP4FO`SxkGU&YJI^Ia#pX75X9%cqZM;)<=jPHn*^%T<}BTLxYlD$_P=WYti4seZ_7 zRlA%m!v3mzIZcu|YaYu|tGwWNLkNWcFHsxS$8&${GBcw!(m2#BJV#0rF(UpTY!VJQ zx{ws7<7vr~7|B~lY0CHSrf=7}t0qgh5Tf+;*e8h4kX+_H9a2%+@7f9}uhm}VS=ez;Y~ZTFZ=FGO-k zi{naoq#!(AXy&^YG^|h#tF($xfp;37(3>Rzh=eZm%u+y@G$MdT=mkXGOif1 z5!9Z2^CXOsG~2QuCaio;gWW=hvf%S|eFXi082`mF1GyFI(X7Htpo|-Chl8WU!ER51 z;9bFZF58~wReiqD`0<`h|5&9!nEG1hjZv+^;ot<$?8~p)HGjnSS@8YQxQI2FIOM8y ztI<_e`oa}h-lwcMNzeD?v-PyMbeZ$l_)y8=#IPy3!9!`SxCl7w%-$;z!kMv_$24%U z61k3sbTFledRw7CMjk5GJl(TD5G&PU-tCA41N?ZAu>gALj$XOzNpwgPx!uEO#uvrajbHGo5Vqv0P{kxuQG@-u0w<_yX4mU!25YFWaw2~ zhA{DYtVHkhM&~GQ_q}X>22y#gPP)s=_)BX(r~{P=r~MG81*PtCPT0^HNzs~0DzF2$ zGlm-}p&}4n8CLrzislD%kw=hh%u082<7=c#2k}$UodNcoiQl~%?zty?9#Kd+!NW<# z6J%w^Xfls#16!9g3c0Oz0mcUHB+HZ8i_G4AaftavkmKu;Gc!iNrp%fOf~LO&g5A`n z*RI-mipxs4pJ?nMsfeh&|0yGCJP`#>B=vPt{xAvav*hgCLA`5pf>2H2{fP%|OVO&h8w zuW#0){&?mnTBA7R_KWtsTAF~}BTl~pXV}((s(lo{;2Zt$XOtbTjulRBNnEgirzPm(Ko$`uX9JVe58j>r1Od-CGqr~g9G$P9B1I)wX{I0 z0a+gE6#AA=Z_6{h)M25-*~Yq@K`x%B{2bZgr#TqZ-DPHJ(-_ z1Z`h35H;V6e;JoH$qqKFex&$nw_9?jbl9VUZHxKAetR;j98_|H;>{Ub=v9t>NxA&x zHN=I9QVBiiL5hMm^0C{{Lp9uupMV8H71(>lR4l=1-uzjq#AA%0;kMT-%C$p4@OI)2 zIIl1MmE>i;P;Ccf?$7eGOHA=%tma!l2I?|g;pM~v?k|<~l<2`!K6;-g%;5$ky)@etQ$Mpqph}s+>pWx=g zJ*w~fuBht!xp`Bj#nz@$jT?Fyf;+L=>J$(;bW6`KAL>pKGu6GH8%x|g|Ck{nGR!^C z_MRl-toaK<{)o1@vR&D}E1Ow)Mk{sv&5W;B(&r|>k#}ELju)hlWRC=5yE64I)*VsP z+B+R5b|hE<2?^rt#pPl#I2NKfwrc9|>54py1M|S}3u!}-!#hrBW3m^o%Y_kwNK8M# z;asMKa`?^{=Nn7%A^o2HoZx8AZMRzI zLxn1>%;7B(wcNZdZL+lSQ1NlZ?9|{F8a2=FfzQ5s!aMZIWYZg?F)pG->r9P}R#^PIDT`GF%LDRVYvx zkoZyA=%eCX;Wj(a!Q!A_cTzobP(Pwy=QlQv&2ziQethANG#!^5gxwm#Gs8UBZ+3q; ze&U^i=ZiYVF@8fLwK=P-kQjB%S4JOAg-sc*ztoX)zB*?IEi}f_+^CpZ8}1VTd%n$l z!4AG=w*_PG&}bHIWRV~0QOeA?+RfjrAx_T>1D^2pk-nb=S6H z=^zM1Zf&B&o%$-Y4#H`FMv$f^cG%rFxX8R@p_oc!KK6r`DpN< z^q=uRb4RpWKT5Q{CY^pI5j~agD`^TkJydlqJ)p@XSBEHE(uMx>dx3J6?q0jM=v4dm zeq#!^+=ppxZbUN(a$-p|pb+tLZxm^KmBYKei z1tGllI%q9#p5TMz$dW#Cz9Bsj4h`&7)DNeLY)Wb5yc}mu&#<*lkwy?Zcq5`UdU0d0 zelI@}NiLA$C#ih$;X>V1G_Yr~;=!Oo|3f6sqfjPYUWk6bhPMTxEn8t0Dz~Y}s?MWEo#gZ<5VSFahKX*lIzCzBV-MVR)|23RCtx5*L=vGoYT@#(P9!bGyw_K#|G5zu$O1_YcX?B& z7Om3JGWExZ-fZT_Hu1&3p9A>l{0>$8%%f|1Oi+|W-X*?P9Tau@e>64|s)?t1Rij#L zts0#<7xRDDy@9e&pw&!)XA^AO(^qz*GbUC1KQ4*|YTJ(1ovQRvgyM4Wp(Q2zzo)ep zA##~UTa)2*(PKAB52_Y<&1MPIINXimJkQ|GwZ4Vxr@ikF(}7t^YjrB`cWN+Ep!l%HDrp)_W+h$@AWLP-FC; z2kV?bLUI|H;LuH`{a2h2LH)mn0(#>APEV}{cLJ3}{jr1w&5nC&%z|w0vPFOPQ@;Nb zgpZK^VAn;G0zn=evQVbUF~=zpW2)8%ha@K^`e z5^Z)sUGkRz()HCY_`y&(6M%p|Dw)(~$ETrTAgVgspu$*3+T2I11N?l6m<+qYWLpqE zT|$9=2#IJ=11;6}2Vz3M8lEJGXww)$x!>>01oh5`5UkQe_l>% zw;|2TIuFok%La-E#rFo|NFBC&*Be>@m0dA$aZ^B0S~{L=ogy9m`*;So=utCJDoQDL zNF%-qhE4OK|Hqa7`}MANjY&00Dj&+f258sr~z~oSQQB^iU1KMolQx-oc`mYJV-#r$1`?ky7fDnQ8T8j(NSU`1?O7)lA z*iUw#kloJp9MClpZFRfQr^-D7Dkx5=maD3&Hhe!aCo=%4IwUjLrJD&sgE|Z4TDWqz zkdP4D)h36V{V_xWzG9$g46u~Q`8cWUs4*RxZ~gAH3>?D17WDBNPNa?t;*iy9w8g(- z02~)oG8*kR+P`*ziujR|kg%AIY85GD0`fuN;NX@wMgq;gA3nerKppk%!-nt0ay{%5 zAfu<$=5dEYE@NTx3_(mWQ>3`j=E-TeSyl@4A-B{1EUK2(0Cm(*CtxL2u2mhdc_! z9ydBE3(h0QcmYb;sAalPR=E13Wy2FRX`*RFtgITFLpg! zrn2<-35`m@lu;eTUk+GK!h;*4uI`U#dIF|lz_YU21E1DzEzUh4gJ&b_T$q^*$<2aTRf=KMYcUC^ zBH+AbLZvvZ&kA>1p>Rs`K3N6)!TuQr8c0x8*K!2f!ktYmfyAL5cX)h2H1b1yYX1(z z92XPQ4+WfMw2WtlE1uBz6}bmOTJ%u*5bPE3(bG}BKFbJ%XNeC7!t1Y4s5JzZ z!eMUqu9LW#WAV4zL)=t*we|;GyVh!}Ihs=>!PiUCv-?XbjH#(?Rw(Cu$c;U=ZS+0XCRsmUvsDA#ln>t#9`$ zIE5;1L_H=fPWz*P%qsAO9Sp{}nSAKv;T?QncPf!_-ub7mOEsq2YFz=~l!{qVJ+k?!7{4rNqpmMA*6i-e{ zO3I!FOdmsZuRw*n*h$nH6IB#63&k;l6$~n%Rr=5$jk}CSr|tBdTFi^oBb;C0KN72O zs9vPvcBtL%)hrPL0*y(Q3HW5s?S#L}P-5VShkF#&p`x~lJw4hpZda=$_B*KVi?VUc zV_~vuNo5E!{QR=`uc;LmL|QEN#;!&b><}`__oTd0j>Lrt>#{yeD;copErGUAnRLIK z)0F4K>!NXb|5p4-2-#4AAA8?_ z5rG|w$#XLH4pcQEZ$LWK1?F4y&@`R&^|UObAQ#QRrDo@_m8 z772Wbo41^`*N z%WqW}_9(h5wIF!)D}kJYc}pUq91AUe4u2EbD0GdPo~zq(-TW1u6!Om)YmfE|qWRv=IBG&n$M?I*LY~c(OeM%Q2v;#7Egm<=)*MdQuZ`a!iv_%`6uOgLl@64P!>N3>FsJQ-_|9jaz&o-}LjTbM=spaU()lNC7yO&? zVA`@xp+5>=vLPEIzfh6*1vab4pl?;kAb3g4gOdZ00JPC&Lv zXbI#F3LoX^n4BIR&OxcFm&C%V`eOK1h`(D|z1m0|jcb<}1Ggth;urg+Oj?;%w3Cexq@{kMRnUjd93q^fu9 zaOiNhBf7TSTGotljs#`?)ILe7ke>twR}V5B{_gATK?S+A7US>_4qf&^sXRlJH)9dw z-SYD8)V=VQd_Ij%BO|kXo^fDSAB%C+D++-9qxaXzkTcT$A5hdui1b6rbm5#ud@I$@ zT|dDRsyvg+fh04+FJ#!apVLGH8haSt<#p19ibOID4va=mHosTN1m!)Czh?HUJ|8ui zIBmHsY0qk2O+p=kEPNdvf#K%V3C}H3yRqxwgy4tubgU7Z%`zJgvwRv~%^mPuTH6Mt z8uSEx>)@dAYQ;^3{uPIi7;16>!&fdpFM(B&w}CJT%=}tv{olE3uY}NS=3**WEo2W| znv8pnR8_Y~zShqC(NZntJZPR_ozhIMXy|n!oS}CMyt->~tw={nS)iL-AssSpzVwi` zR8|ZOk1`0`GAS)Q9b^RMk@6QQm@82=bqxdxTH`rV_0W)Bm+eIM?O<5Uc&`4H#dv$` zlbiHOm*?9f(I9qLwBb$*ywh)sG*N}xi{NZ<5un9l+~N|O_eI%0R{Z1rD58?jL!k#w z-M@0yj_txBrH^rQcT~2QG~P?-ek8w|p4k5jNE5Y_bYjy9x)SBgt!>G2oI}(KQjy#5 z(>B435nHBVVIJ2g>4wI_QnHN`z0LM+PbxMcYGT+s)epD-3a!F2Qbj8)IUC6ht`TWx zNlB*y3d@ls+0~sftB3?#*dZAT0_IR!o zvii$Ui84DZ%ziNJgcjb*UbLcB*CxzrJ?brgJ@PT&WJa!ppFZ3&V46VFS>mJaM#+&@ z;xKB8@Vs>jWlXJ9;F5<*n}R{XW|Cv&m%LlZm}-K@S?EwRb&8V&UKgIR{*~C_oy%Lr zkm{dYn;fiQH0SS9xiE`Q>pRozEbY+M5h#!gGc6Pw7gC!b`^#v;Uk@Z9R$~Q12Z0BE zi^bT_T zrIvWvsl+X?(((13=1w${C_$Hgej64YpczcwE}zDdDy3KttEcT1`V1baz#i$r?qhu- z`Mcq7>K0yp-f06Q_a-m2k;p6l@m(S%>ZKvO1%1Dd=#H4j)!kBIJ1DDppOxH1GVMWW zm}g5JR;y0zFCgbKv?Fy%atD-_|KigXD6s?h-#e1}deNUSa7e5tX}Buby!%cI9;u;z zZ#9v3YRfK9t8RCagh9?gg*LK?Unb5H2V^osXqi_W`(aM_5Hbb|iC`~g=U>XrN)FpV`?&Gt4yXU@wL4P}g~qf(b> z))}D%-hkof$QL_Gfz4)w=lkkM)6XYQJfG#E(-RhSJutB1D!+>i=7j}dCogxWa(ei5 zc+<~Qg~b-udCFVnTlX^EE}plChS9-W<{mBsmqZgtjuU*YEV@C%mc`J*x9H097TFSI z9?Mj|wgB9o_`~=O6RZ^pNCzIes}- zHMtJpzO4W268i}sv^;SIHusZ^dgA^URmLC6V*e)9s<45Rks4$lS z6c&yLr(m)L=_~+V@5f<44i#z>@2|L*^0k?iVE;H zIoX!cETW_2uvIXml>;~pA*4&xzb#si7H~b)0+6`XeA!VONrcIN`asA_NGL=5$}q+2 zOFtkE<9NlTK$xvxm>;>591AFLZ(lnl#zplvSVj%JF0g)|+@mBMDD zo&Ui;jVVf1#FkU1+6bKRlZ#8KJQXobb`Ys#%%$O`aZi7>{BeRANyxwXF>~b2(?r4| zqMY*%-3DvymdN*=ZsftjbX~sR*?V1qkc@4&@mWzEn#w4taM(EEL!|3@7?;Jm&5m_| z2z}{6zpyMNBzNP_Z*Lhja`Kc;zsCU%E{@Br`>C|DeL4h-3>Uz-a`qzLPkSj8t4wo0 zvdp@bK9W~Bu|d=3wTqRAJth&+r8ly zb^x%+hj(6{S%ha$mrr5Hgq{EhhR~1@t6pJYVWj_JZ-xv>hp9methh-qPC#EgjfLE` zgoyAn3iKTdmZeiV9id>L;4`_76|M<5wkmF%lEptkS`{SZR|q+6M++*mCY?lst`RYn zFL`4_aAyQ4N%;Hw17#U6XN`AofMAA7VZ{#0hQ34lZS5tMKN!2HfsfBt=- zr4nB(+Mo7uZ4!}LT;`2xWD=>SJTKY?3Q|)5{91_yzCYL0XPb(*f|RQ67RA&Ullv1$ zR%QSK>TC3F0I)`#^oFCBwE+aq?WE#)I*ID>xOujBKgBP(G?a0QmqLR3Q$aDp3f)LVPX*$f7*V&$8*)=av-Yh3A_aAmoJ_$47TIKgfFUvIu=1bK40s(f4BC z0R_wwpThG*I&Bsf7L|Ol55Ew?du11#L?C(sb(m~dx%Rs=OQ3~z00flCB?3<(t-oPc zP(E9qir#b)EFC>a!S>J81ML2wOl7`m0%mRiA8{uaUQ3*o!FrKm;G_JerE2k2mg$Nm z3Wq(yCxb3yzysl#BLoBv+JZ9GP)(474CkWbL!by?xk}lM_$V^0fY+HH`V}q46*N3i zqHrq5F&74DI#)q8(8P+%kP4t5XSJq;rncs&g9ElrY>`A?15@??V&{~E55$rJVi37! zAGX1v1j>W4bj=ORY5itLa#x&D%fF^;@5nN^5hLF!+xjI=UaAX^@=ffWY1&jphbW-$ zGAswe<6%&`as|ckmy{Qf1CztyY~4@dq}Qq#{@IpV4F@nUFSbqXcZtWW zNiqaNfienJbHt4cb*379P&j(`8YU)tnB$i_n{7DjSO+)?p9p3 z-y^RG_k-$b1XF&-aS@PY-j!K6_1{FuZv%joP3jldWNoc>>>3_^>9JKC$<(!dZ|2M(#1t14L<%Q2_0WO8o zlHzcj@H)mi7|({s3B9Zn!1ai)5#vez=4~}9CUOqZ#g&5sCMn^&u47T-r|UhYfvSRE zopU0AMX#xuiP%S+Kod*`e{|8&7CC=0w?z%XGH!pn*C}HGJD^!wL=dq8h+jjJ66vVs zA=Nt!`NWW`XU=)dJa?RwNwEU1G-H9@A2<8ycH>$6>5oi4n|>M*aj`kHyjJ+guF>ZT zXo=xu6`mu`6xHh$>9SmZ_wo?`rV$C8m;*}IF^^e?YWJUNf}rrp!NA<2U@pSs33+{G z6nh$-5NNt>xvH4Aa`Z z5<@&u$5pi&tPpqw?=l?sOd&*Y=7ttj6cm($2sJ+h=5YrV9I`5DkwCsXlVY)6JLKNL zd{k7@$|m-3uWVkob6|ax!>fGuPhM_HUj2O@MmaJqA=Lq-D~mxuPQ%fK&dCfBznB#O zSii8wm(l7o_aJ#k6+$YiS`Rl-hUIo<2RDd0$MDorV~KK_+D#U7O+0RocZIBo&^G2% zbQerj8VJ&=Vv*H$ck#zY89zRsROV}=ES3huv0oj|E`Jf-o3ykW<4By7kK~%4q0_4R z1Rl`zwH861zp&jVA?VqkqyFhDQePoxbFo@b(Be&h@9SbOq>(_Q?u86Jp$_D-@Pt^& z_z!$z(|_&!dVrCsX~%}3E|-wyWFb@18dM&2<>fqcBU=_9e&r_z;q=0k#EzJ8&nbCA zZVz;dNPrWE6cFQ9EB}(zV+|PAAR{A3NL-m)jXUz`4#)n=t^-(cEEhGOAHpne_KNk< zU#0fruK9u0Q=ahvFh;B6qVevC;}B;0VJI@AYiu2A9+B8I}DOH>u zz~)}Y1>Ci>i5kyuV$lT}_o_Ysy@2S`lj{ZKBJO0^(AAECnzF4tQ2m zA`nAt4Ov!LT}b6|9y5oDCWHB})h1E=RAGSmDqzl2fbnUWxT}hz9F&uU`~?dkz6p7X z{r6gt<1#gw)E4>9sxDbBg%CiYJ^dh}w^!yx&P<=cI#-YpMJ_fmx}BEjTO#s}{=!vI zGa!BR|FL&gL3IV)f=0u^Ex5Y}C%8*+cL)|NKyY_=3+^rn9w4|w(BSUw?#}F!|K22lhF?2KyzY^Ir~7!>FZ+TxP;!Zs@i=;?luk zX4PohAD-{%*3Mu;54&pr8ox;d+)10cDt`!^!-H-UJBc`9Wuj&WH!2XQ9OJCAbhch! zp1Ad{5BM!Kf1>0`icu)Asc)g^Ci=X$;eZe)|Grpg@HShhM;xn!k6WbvTpRA5iEBJW zjtGCEWE9+m#E0!%b^-k}*JLv@PDJ8X$wQ0zXA;*BBR%h;6Sk2BneQFfVrqoEUFwet zi@?4Xn9Uacb1gTBx)DzbULOA`UyzpACN|6&K`sMiHl@4sqE_0xpG0SkAq}K@0B4En z0O9LC;SY~iJ7YkkaGXBU@4iT!IevLF$Sxr5p`Ybz^s5}3>~Xn5rqy#C`Vec}8Z6Sy|ie8ZE1WPVD#4$ZI$DZZC6LA(t+X#r22pN?VGI zbAT~ln^}4#_2oJuUD8Aq+W+Cau@VCH%}QyS5_Y9n9v$rcuMPl|l}2a06tIX4(IMr; zD~CVzQ}+kcVDBZv{oDTM!+;%e36vTKXk%d{s1MN;E_99y!rTR%pftS!|e0 zKZyAIUcr`p2vq+EZWYdk0?aR1*x;3U2flEf2|+;t72rUXQhcEAq}XveUPQwKyLe~O z(2im_owm~e$=4F`qxB59s6d;)3ws285BNsn1Vk92!2pM^St8kif6BGj-;cf%6D~eF zwrDvf+7G@7_&Z!tV1pB5fd@xnq)K!iu=XjKTWDAxya#W7V!#dT_4n4V&mx^^Anpc; z*u*9K-Bl1X(Ef37xkR{=;O_{~b1`x&8tk;vAT?BU#t{TBKLIr#gSXC>yaa!sBWTfn zm@A43U#L7O{uy|PA??i$CgOermCi@`)gqjKY9h+WOMwJFHN*w(^SB{0Ah6?|5Nh;a zk4N$tyusr^2p%*-!r(~K^)V245x7AYd$Dol`e=5XP{u3voZa09Xy@KAgv{V^@_JJx zXLt|855CV62OCi1z<2>s=LJzjgr^L&L%d+mx;ogiu0DWZAktZdW>@&Hz_Qo~Mh!H; zy>Gm70Xk;;LAq`Q;#TpK8alaOED@Qd0J(ufPTbbA!^8uGpT#@Bg(j{*Z_y}_*8|;g zg8;hb4&juhf_5$qbIkCs9hW_%kr*(o$Ux8BmSV#A1BAUKI@_W6Rld=*pEe9IfqEH$ zj$u~)mJPy!X(fQ<`!^tGt19sdXy@qQK2s3T4Wq%dLPF{yg2&2so-a9N7|2-QIe~vY+m{^N20|FXQ}?!Ls}1CA zeuas`S&w(%t6W9g0Qk3NExO`!$Ph}b*exT z(=}{8c`^es(TeIXzDo1`6L=X30wG&vfFe(0l;qc@_H)}lnKD-dQVQHIcY}ctav;x~ zS1O$`$>F^sPU0d(cd0bS|LL%v#l!H!xEBmX4<^b#2ms z{^`1rJ9)=9zq=h7=8;O~j(@LfuKv|2lwspz!*xdDM{k#-#LH<_klM>buxi7H%KaPF zK}Og($Qc?SQ!_uvpH?{ilOt-y!AD=Wz~`ir-*0k9*C8R?wXH@t;_pN{Kca`}`;`F1 zYL##OwHX5LclmSJF~W0QnO8vel>z7Bn@04hxwQNy`Vp z-(5}C!5JS({hZ_e+VD90eFR~aAQ`hqVUm;mn|Sn2XU?zFZ;DD6Ulve{*d;fk+*}td zxT7tucY%ySD@GtB)BAO!p7}3<;Mw`2#{+!>gR-J0C9;UHnBl%7^GalE4u!l~1&vFA z*KNKKhwn$L!>Ae(_uo?GeA#^lX29|#ggiV-WH$KB^gRSv`NuxCK+07q(Wc3Da}350 z(}~3_8P3AC+Yyv81iD&JCMaY?H4&`e#9P5^F!J?!MQt`%z>@w=gboA?8Hhndj*e#W zX(y$+@VD3ZSLTR?L=8@*ClyNP9UhGyQ{(q+9{_1w$mVy1jUE9On*n%^=ncvgu6W+n zhoiEgKpwd|llc!QJV%W3+M8$p3LF%*zv>aUO?-KFOLi`2fu*6A2}RcGbusW)^R~9j z9kq|wx88X)-6R6Em@1{lF`E*BSMgMNZ4`+{?NL6M@6D0opwu(Tw(f=(|Z2kGbQElvHj7-4h?0dgrPH(p9LpW1P z&o>r&^F>B{Mf?_K64RN`%B-QR4-DLi!y4k4v3Mn?`riR~g$%?Aw5B4Er$-XKv5_E! z6eR$gW&a^)zwAjl-X>z!{d&8Gs{DGhuPHF%Ugy37h?pequz&SY6EA*$Fl5fKSf}J{ zLpF|<;&^A|cGb;EK_aAQ@31seo#FILKEZ{#P?=AWC)xM*cBkIlWk)Wd>05h5M1=<@ zIdnA*=+iim-WN8Pvm(zSFoI}0GL!$Q;#fmo<}sYHo=R;2gKZM5hDJq%vDW=HEi7mk z4n~dW^^Qd$V%68&b6A~38f+PMXpMq^g$JaWwc-Td?fU=D5T%uq*S>$GLsB^rG$Crl@7>-=gszF?AFJ$!{iG$GEbW%FfE; z+Hmdbd*&35mGZmxcLQe%^b9yf9C;^wT^_eBuX|5?NJd!sn@(t=5j50aV2pk1F8hJ^ z;*SO|3x~NHj^@aI0G5IgYV=KXFA`RP+?iEr0EmAy4`7a);>Nk+%e9Po2N4ZWRF*mt!W^8dG6_E=P&b1 zu_9MHzK>OJvorw4INJFTi5{l3Pk?$Vx7ud+mV}ul!$n?5AwQXD|Dkj)(?}W`6SeA% zzUN>1a{X3l1-qn8>gBrf_W0hTvC5&+uMNc|vr*o3b(>RMD3J`ltIjLp&4s^#( z`R=SH-U7N~fdg5;#IAUmHZrxJoLH#NmJDC)NQ;gwkS|Pn_a@jNPYLDcYQ1G%I^B0l zF>k^qBLvSm+t)ot@eDOi1oNRQ=LCOTsD<=vksoVSQ=zYuWWjO+)4G9dw8p>MfUNXL z{&x5Zp-t5~n(O^4zP&4VPumAnf=2~$JT|`Icm-wGs9Z|0MQ9xjYV(f+NN^9Cn#@Zf zMR~>CTik3G$T+?vNF_C@8F?5O=@#iJ`>aViWWGO?GvVBvV#)jKLTKS*V;TPY(n+&N zQpS%4l}{%b#ep0L?F-z|wO&3Awwz_CvE!QDY-PLj3+>|2(UUUJ?%aA;rUQ8vXg{20 zMcd4L`2k=@JJymR^MOV)yiWH}!qTZ^NQv~V?~5f6=#A8cY8?JR@|2u^)CuV*k&N3S zJF)J!=st`!mKmF{I!SxOeWG*j@|QRQntrzw zSykxAox>V}%{cx3NG4mGH{yhN1SZ24U)W7>bY)z{{CYYw?W?tM-*I20@7lM$z8-2N zG2vbJxk-6DQ2uG5-1p4i)Jf7kjYwY&r>FV>jrdOSMwu~@PIHHlivDFfi?LeuBlG12 z`-bgv#Y@EZ3!Weq;H2_Q=nn*AaebX07!x`FzT;>JWR}jYWs}S{TMz}fW%GMR@CF1I z^yHjr+}AAK6FG9{B$ghH9Sb_V$Q>5atyU4etmr?%%VZ{+WHeaF5(eoAWf!ylT?3rg zixgYYr>5za%`x~A_aHc8lwWi$I1@cnKY3(X=ei4yIDf-cW(xcF^mbPP;Ur2IRBc1` zYRxgX?*&GNXro80xAsnI_eoUBN~A4&V{?wAgzha2jdg*H_dl{&IhxHViYK4LsHHQt z@YcG3=yoH|^F!%!SA1XN42`=7Q;8}Pd5scuTua;(ku9!V9nH4IzceyQ4^4zF`UN!w zWV1wk!U<&HBQpV1IL2TO{t}ua9Vf9rklbfb9U;?k`(!4In|*7O>#u;$g>F!}e?gmN${B`FzGp(?f{ zSvEgBw*%NLOG((vy@NBop>!GV!(CX5!=0+$vO&cjs3e{PdEV&OzE()Qnzr|!IP&@> z1}>r!XenoesP2u;dlnkdZ1TBl*xCbU9Fw|EgB^da_WIOcC08Xh@E2!cXkTtAOcvKx z8R@-?_O0=yn5rih{bhj(TXZu&5!WH&;zup^ACD0%Of|>4*G7ZMU8xeTw{Sf!S=(xM-Key#AT2Bp9!EYMVxQ`}xP_oU zJ;=B}v(KI>tl*E_+vgo4NN*X8czcJ&6_0I>OBPgg87MI`KHs3-d(p5(-B!mS+~rwAo^_C@-gJ zw)*m$>;H(MC;d!*etuElFWWA8t$5UC{M}BRrYjFXXRX?~d<=;=!9fLaH*f|Vk?#e7~_| zOo|4j-*Ku~z5GN+$FiI>IXFo$e^b;8pmAYaR`1C^N@28DsTI3!VECc{49cIF;C-In z@FaP@0jbUYWHCujyy}bNY^FU_YyJi}=j4%ds(!A+f*;pVxRy(lf#@MFQ(?^}ADw`V zci_P1e;))9$`C9!j+Bm5S+t{kgyUBex}9D`!^JR%q(PFM-Rk~Xv0yQ1G&7d2G(pj| zL)}o61|~4O(Vgr!G;A!v)nNriV~vul8Ho-MpW^U~H>nP{a71e}y838czq`R0qoqfc zTK#D9^N-j3`kUxT*D6cfF3eiE)1S=`^u5X26jORE;@E)EqGvYP0@@PjM+F~9(Md*h z;-N^M!=s@?khJYv_ozDn=F%6c2hM$LxPZ~M2=CekpSc%UHsE{31uSF|y_`cMwrE*O zrV{bt*@Tz{OBKw%-J#>|Nwbr+h~@}wy0W*{HteQ6LQ!C%9;|Gq{+K+jalZeG>1>IpM^YM zN3pyWi;nx;WsC55!=5WJ_FpXWams%P-Cxm)#%a-i57n|i_ha8^5R`OSXIJe4{{ zs%g+ikk#a^y=JSr)QH4YQy-``{om^!uzShEe6F01!A+SdagFnkhu43>Fa0T%#FL-& z;G?LHOMKC$2m99EWKkG#rG9OtcveV`ctTb$&;I=yNu5ty?qo|B^|a6D%kNNpCW<-5 zS>n7SB&}yovO1Vamq%MlHv9HCdj$$d0Q0a}KEPJ$@zKKW?{nh~bM#wa=|H=qP|MwU z`)mO|y)l^63d;2ed&R5UCiqoj1SO+9t;NQ-#){d% zJLDUk4=tK;hh>tsoBWfpHX4n)Ps9Bfjbi5g-)9|uP1BmaRV(BGcq@|M9?MbC!#`Mi zyg_>8E=OJZ$&wp*0#|}Nx>L~NddL%vhBLs)=-1a}BY6Z3T?elO$#DCl7v8qe3VbP3 zQfo4*U+){SeH>pF1hk^uu0Nlw;uj@_R3J`h;dKqIqX-(5fA~$#m0}D7kikToD9s{C ziI>CX&lj<(O5}gqqNVkQ!Xf_vG7SS&3!idFBd1v>F-w1o;2EPjeTyyS8|_nV{C6E+ zp8+YE6>HhGpK-mlpAUG9@TZqgv+Yg{CoO)(QM(8mE_wAa8o@d8$#S-vxlGW-vahTo ztI{|fFR*5}DNmBjxvaSTEcp9mQ$*%oNn{kE$b;kO1;no&yW~9LiNF)eM+1(;ymO7y#cym6Y>ux*tssZcyO2sEV?TOXY-4I-R2G)vJbk9A`^o#t#PaWK-4=w-cMiZ7TlSf7n{+X&9Pl&jefC(VW|=< zh(MLG-o%?IrW4%9RhdhhJv~Mb7?SL9gOOtjVL|`NCnFJusOa;+Gfv=i6i9vzVN56) z^^W}JZvr47JmWW(-DGTV6NYQcV^tJ+`Yr_eq@~Masd^-jYj7=Pg5O82keJ6(Ao@3< zmnR}gx}Zr!TM+ncy-cET`@$Pi{m;VUd*u4)n;3C@@5N*sp*63Zy z?H#mEpBM`Vu83no5Wlq|9S%d z*OMnBVEOrXk^r9>fdHK6XK2#l|H*ChA_0du`Noq9dAi#_fI^{pQmB{mZTNbdN56Q(EYFTYxC_>Mu-V1N~x5n#(>MM^>ghxagn za>MjGe4l28JVx4<;w{+krfuuYuEv)373#2caa$s=E5gV>96ZQpQ+q3=BMn-Xgj<$n z@6RELSgKIVpVGqzRRLXZM1uU$0vtnvYe?5gK)Ee?&8{z#ZDq@hZWpKZ!$x!NMMU^6 zfouaOyBi8kwk=fX_EU zk+9ir&4YC`wYY5LqUXgVMRU z{0Cb|;gGocTKSIx0&P{P26k%23{nv&3b`OjU@DiR02}SMcY4J@`ZoiVW{)j!cn~?@ zXr;%3`}?k~>-akc4CO#7xlPEnWxje^?zCQY)Jc#*Lc_J&V3rha8Vm3oh~3?6pl4!v z-v{SNu4uT4EdG4?o=Fc#f8Hi-6;t7-!DL>{IpaO)945u)@@3XE9@BTVXwoI%ar)nB zQ@sQ*)iIeJjyw0!jx^u0g?zT2!wL3c*Z4_p&)XMZ(a;hEob9-7GN`rcYh1a>Zs*dTL|q7TZjUlP~5bl6mTrC z9WOW}i3l9<*vk$>Ea>BOI4e;!1-wzX+XP&3{@flLh79JANddGR}T zP6S79*@4BKuwRKAIPxGt^0PgO#sa%yJM;DZ1c5p0b}z~=GK?a_Kbf#>+W?rtJTIzA z$%(D#6VBM|0q5pwB$CvPGJ?{`5!LD@AqP4dEGTor6Tx(Zr~=w_C|HmZn-nu~yMojZ zY!{yo&4|K)*~El$rRbTkzSZYx|9FiXjp$l+SVZH?r`cQ%%f2mME7C`o%Bty)#AB~~ zU|D$L1QQ0y>wUBtZ)FZ(Q$3F#7YgRavO7Hcz9bj=hhLcwXp_sns%Rrz>M%MAJ{wbW zTxtM96GaNRx6u;;0SYq*#W^~y4-E?BNhyD;-dtCiVD_U(sI8Uzw4p)<8~F|hc@QKB zSMY$c^n`(Qi8iED%b}Pnz{)lGIJ)tEN;aUVL^yz8(Rkp z$pEMh6IeobN>DEi%;<7+-NAFa4GzjFvVh0tpI~#yF^&UP3_Fhe<-|dqQnh4JShig> zobY!sA3uU3fg2`LIzw{Pi>5*@bi8#Hyxb|lDDyS3k8J3BW8Wf`{C6=NFp%IlI95n; zAjR)J}x@>hm0%sOPcH3$ol=*pD>-@(}kVa*gdkRqhnWWql8yH`&i zbCwz&o&|@nttM5sDjh~K)E$%QGwNAHh&OX4<1S|21qT3>vB1^Zx1o!bl-NA@1F_NC z;vuzHl9>|PuW^dlu;(*bNzP4BfJo{2|2%&WlNk=q$pmf9JNS@L6C&|^Bwn6qOkfK3 zQROrrr00>1pD4JI?LkE+y*9twDd77+yy-f(L(yAeMXEG_Zi_>m1AL<~sw_(G!||nX zLkO&_g*=%UqRl}{wcl=j5S1y|7(=Im z#mU|z*^rS{?74f=U!6k9(@9Q;6sDAlFfB+^$;yzm%E<{OPZO>9y@D?jzI(kYtzF9KR&ld zJoHAGz)0n>`az`lqdnCFw=BT29Cuq3{V9Rbv_7+l?C4_XH0NhrBC)%>G z6i^&*B@&PLHk!-NS-&cTm*UT_L-m)5Nw|@2O;s0C7NRRPce!`xo~H_>)Z8SZJ10N0 z%badG(&~LIft8K}%+Ti9YWOz56%L@cV*n^bMh}A(?`PDla{UmL-{U$Q?p29n$hYdH zf{4b@b7yZuYFm;H3&!rZ$Rl-tt}-j(^~9zu>ywPdV_yga0y;XoOW*893?{5k+wKcd z$8vT>at@dFPhR)of}QEVx@6v$96n*;j0=3|+%7MfD&Dj%c4r&jRFwXLoL7l9O1UzC zPa;sY_)cL^f?lK(1JD;*qLPoWkb9)ndzTXrQnO6_zfLzhaG>ns-2gyf#{2RZ2^ytu zoh~RN-F;LbfWaHoELf&^rsd*=n3Pq|Q>q`nI#n%M#@PHn-y1R=1c8h{Y zQU5FS%Vi_}X0a{yi)KBoO5~b@b|mGeI8x#&dGR}07}}+_=ib>Jn%`&|BoFuT-G0U) ztR9LxUz=oTu*Mt7O&s= z`8&NS-tT!TNYT)5&@?sc4jmoJT%6(WMrTJ=D~-Cq2)|D^vJ5>t@uchMV2>TxsaQqk z=WqqU0Bcby`8}V0>FC-;T(jyZR75O9Mka4rJuw}Hi}*Eq^6SleCTGoi0EVI9h=>+j zrCf)tfEfg{M47cq?F(#hX~|h=Jew3y)5F?sPA@r6De=;k+k2ii0IRNgM{P)1FXDG! zzW`R+iP-vO+Yzp(l6UF;)%n^_gkbiaZyJqek=^y%A(&4}Ny`6|fcy*#**@*yjiocX zX+i)zaQVynyGA;8OJI2xHTR0NkL{HBxJ?R6?ZcJ4YU zRBVnQS#K!!?~Yj8U2Md@3gM7UrpW29`sI~+ce|vmY|9!zZ^2H)J1Lm^p0?|IFnv5e zA%7Nd8tvXv9X2pe_>o(`hJ}@Dqp?Imym_D7_3aB}e0wRoOB_+jm9J^8h?pfanP8zI z19%itz`A{^Nr->->jYeP#V6WqfFA`gf!BP_)WF@g*RfYGfgyeBffJtH7l$#v&ThLx zsp>*3|A~V!r^l%|$kmr^^|_Hc#f8WIgAo-d6}%I{lLEU^n4}*=!CdAlkL6qd;C(AQ zDu5Tbn9e0Pa2&-MuWse7j-wYFeo@(D=s9mDk zqOf5`z$M}-6%Oa1H$JSN*0n{JoNAZnJKi(M70)1=>Z*-ucjp+z*CRqz3i}-`;E*hF z?~VTibjr^&tb+iRt?h0<9s0uDc)SJ!?#9FETe5mjJTh$jfAMU-tKQmAzG&PYFUO51 zwLh{7+Q03}5KOkov3JL@7k_#p;dKYQ=|?5k0q+k_sKyv~HF&d$_{gJ2D_|2jSL+~z zqiwj;??Ogg>iVFO^U>|^f+)OD4mgw`XHh{riU<^^-oASs1AufsyPDzjz*Bc-u9kTX z1E5-2rx+r)AvBRrbQoGqZj-l5;MFT5^I%j{qDJ8c$Adr1V@U(cu#OB0-%dC^=L=|* zSB5!Wek%Ju2pxg$+XXbHiPlyPP|HuZlQSZPgl>OzQ7X`Ufbf2v-$@4?3_s$)XL?e_B)@AO8@ywK zI%P%Tt^D*FU%VnjcssL;_vgLSi}g+k^G$Jpz2unnh@$c$V121Lu<$&|QHUX4>f7qe z5y${BPzDB09ZC8-MZM!gOLPWshz(pcpnh!cOK!=VNV=O=v}=D~lx|Kp5tdQVy8qZo zaCasLK&CHGgXx))tx#A)Tp6a+9K>E%K0eIBf*9R|NU1x{NMLG|4()7=N|n^z0(w6>^-ohTyb-;ZAc?gy1IHW9*edVr6xxI9|`y->r84u5W}#-eH3Ainii%f6Fd~= z`_lAwwc5AB5I8-c{;H~S@n?PMu_pMgSCQG_gX0-b) zQ}$uGI2O`Et-95OdU0Vxa(0+WtiaBg$k0!uxo)wKIkMu8ZC+Swl?#Eme8J%pb(w|Rb9hTK)1m@>(&2{oQ2e!-hCz9`o#Vv*OM4%~xgUyU(uNtz zY=$UWTPa<(|9ba1b z6EoJj`l;6DE9Mj!k???_FAASot-dc?zMfo7`bw-^Oh$)y7$3|KMe&Yh^Ru^LyYe;- z+}!(IodNbgnyI9YSf+?+uiCGB`CaaR5va2Q{L0YA%lL4evjsSX;<=kfwk~r%SC*t! zn-E_J94^U*k36tW{btlT8mMPKd21{j1<}HC^9*sBkenNb*Lk(&KYpJLc6t*)#P-o9nh! zTolMz(*MTa*b}(^{=hTYO8J9sJV1=aOx9#jYytp)@G^vBqH?dbqRegunyzfVI_$=p z`++BUvCN?vILI`b{?{TfcZmauq|&+W_^3CJxy_EoQ-39LNDy1FiPFe~vL)6Fh@2B4 z@}zohKOhM-9)Im!v5-g1&;a;TjRQ`j#FMBc8;{yrO zyr2Ln`J-VR2`a_-L5VL+qqE@pNU~oo9~5!gn+zJMLS^Ic2>id4m9cBAdjJY^oElk9 zkKgfCKP7g2_gBh~6G7Q{$}pGh4FXp)LU#|%n2<(@`e*(l*PH2{QduL^-v%-B&O4TF ziQ1Pi($Il2=8|X9>2tb=$q%{BS2k0B^N&(ro|`&URb8j274x%+*X$hE^2aeVY@0?0 zouB_S0l;ds$$>&zUVzr`RqxP*bIbf3H zcff3;xg3v%K(t5n#|!wbi3Cqi@|{+!Zt|dughb8tdh|UiIfH5mf&#Chy?QNo(3;ma z0u7F~CVks_D+5yk>G{INXBAIk@^~r4uA&zlN3&29*d6K4oD1#uO!DY39N7RAjdnfcY zZbo`L4-W>TSUTNQ3z;3Y@yq#Z0bEO!q;&_OGua3uWR~=0iP)L620gct%Gejuh-S<- zP7__!hB~q94&FO&21S9J0cXH%%HFI{dxwBkn%u=JIO^cA5{*RC_JCTxu>2+rMQ_UU zsCh*Aoxb2^;pOy4&!NIDJ59Nt3#TF^gJLwl1A=b}3oCg=7RNp>RtwA*CgOfOdk1X@ zi;D^5vnpJI+jB@W;`Vv1T(*-BX+@p;(DIY~j_Lk+a_1?)v9t0VcC(s^s|+~1pKjP1?K?)w@m zG|AXCb2p++#3Fn6M`3R8n0GXJ-M)8F;$|kCZ_m5MlK0l;zJXEYsco^%cuv2bqu%%C zXod*WJc5-}?`6U6U+Y6ckACzHn&O!pKIdNq33v$a@+_iuY{b_6@m&q+yM{9W5&BfQ zUz$(cD9C5BCDYX0_o|YxEB>`Y2RWi<3wYn2@BfA46Qw}aLv^zjgl1*Y|CNhCHpmb} zIh43eh`M*@{-=2R*%{P-9|aK%7did6w+f2}r%9ioov+?u#L~N|8#oyw>E8?==;CM# z)N2U+T$)>F8oIz^ieJe=%RM)YVVmgC$kf|*IY}d`cCGA4V(8`ayNfUgbkXw;i5Kz? z;jf;BfyXq2+q^pnEDaOTobb#am0a^Hesg!=WAl2z^GVp2##Ccv=<~#&cB~NH)H!{u zm)1oK!-V{hUOGud(~+F12g8kdJYpMxByGk??EC0bm!BXc7GpGaQ}*^X$%srJ?pzVZ!luekgk8sL|K_UU~lxpVFFkMwO%r%h}R#qC$C zvHaJg?FbJK*WC$HsI&wcmw360t)GZEOrPifSiu|`Scg69emM5Ua5Qpg{TvDU4{^qT zG>%k67;21B4TygNu3hypepKekTnOf~!YjqJsAaoJ=jbUbcPcBS0F(e0 z%&~%aReG7)QNA-k>1O}3kox09sFp2QNQl?->2ml=XwCb$84cyeF~JnfK~b#K|SG)f%1eI0lUK!2O2YKIuy{zSSc>|ENwh zgWI90BX|K$$eLQ|4k0Z@q$2KT#U&D3%QMoPH}tNGUEwpckoNqQ{!W0>=;99KEVi=ZozJ?wRYM~#GCElCbLnZ&w(883Rbdwtmx=acMYY}pkVvr zK~~?OH@CNbxOr#SB;Jkrv?q$yi#crnmZ`xU9~LoBv{lz^H}>67`m(4mIib9+|3mz_ zh&gsixgPr01L(cc*ZCI1hN+?Ed4VCW9B+%8+eo-I?b{dT3c+fHV;V)k%5!%Zz|0(~ zKUdYDlD_U|zRS>ZG+21z0Ap+*!>quN5=h)>$)p*?3*~5i{n|#(!Erh6Y^52b0m}3H z)n5g=5pfPJOUL&10jPsV16;#eaOh#|Q}nd*`J2e>Dc^`in}~Gx@*59RcLR^)_W6K_ zK1tZ0^&d3)HBj2c71b^^k7%M-pK0dcO{LQ|WbWnbn>hwmfLkPuVgH!}U9+o=Yi62; zv_OrsFQ#2g={_dIKfNydlvh5p6{Y6QA2F!3fg3&2^ zzBM1OwYp*8pEOTePd5GK_im|V6Dcn*3V=6=WH}lZ6l0!vZ;Zq$BnSTPor--nbAW(s z!(_QK=cI7?>|VWfxh1=?t4$61q(bU_{dbQl5R)37LvbtbGfCa48%R2B771#NYh|P! z)r=Sh24+uI`KNm_Ml_4?b5+}wU*x9F5_edbOj2bS0(of|k=1b|{D3v+ez{pwmjzxvzN_O{YqPOTo*mymS>|h3 z*>`HE^X6|>GEW;FM9bBi=f!Z?ZJbu$oBl?eRLN#h$th|-PG81uBoPwfUJN=b9uEtk zRAA~c=ju95eh)lqk)-)iyTEHR6F|dXeG+)gP#rI2?HXIG&tGj1V?dcp4Ye>0iGvDeOA~b2fXk!&)^JMlE0K77x zZsPOz3oWVxn7{o^>?s@@Y3O0#6cgq&~vc>JMG zXo7|gu4ctuUi$1sxh6r2XE$7vLalI;Yqo>b4s>DsE?zL4lS)Gp+a6DCXl05iBSq)B)WtB6v5C`#L0lrjl&^_e8J5t23@Imk6!j`K0P!ou(7y1IW8} z%4|BUKgq&hA$!;icxOzxQWTjKXrxP#`HGd&&wQLO;CRcywx2vC!RA+ChOAm|-g$WQ zDw>PA4j%e3*OaH38|y*9G{+V#gtXl}!m)>edm;Y3sfnW3ejGneGyOYB!{zr`QrF$O zQ7mcqSot%|EDE2j>ch^9NdlSu9~xwj#=S39I!PThF+1k1b&s-yt*qC1>yQ2!ip86_ zr+oBhVIR^{ybAi04~U6V1n?WSWEk|L8mW2lxZBk?#-0q>E9a=oKhH@z=da8ZMKRQJ z*bDu_)Wr0g#k=Vd!x>_3|F}6JeG>B7Tg57;`VhfUJzonK}z zbKLfHA>Q3^ws-F7kISUa*a8U#DI`X9sxVWeBO3*LBeH)?v`#zEu?WZA#oFpxcFma0 zqK&E;RYhRg+Wi%u1rbcDkNbGrJ?JiL`nX3cF?+eOUf%SI99+sc2y%!h8i}&SNZinP ziLAky7#Vu3WABx@q_@+kn>Hsz@Z0eG)5f2)Y~pNNRNqh5#gIQjO9Q3R?c1G^T!tr= zQWmiOwI&W?Cnh*I_3sPF@NWE6ev!QVykqwjwYmR=*HAuE5!-By?MXW; zUb8g!DM#K`3@2?`3d|?rNElkzI_{sxs#yMu^{wrMSJrC6+#|$_OmClh)FOsuN}iw9 zRu=-=S+fj?RCuDPK6fjoLB4z2?RraqfZtC6>h|gXj4FV7gZ${Se0Bcq!KZ)o%$0HS zOUf|A5FoFXzNZ&K7|*%GMYK7 zLe;8ju09=Eea^0L$Gdxu!(Lgb!zoUG>HUS;vD!{r=QBR3D}-V1T9}!-#2e>H&PuK! zvjGVo2ec#sqJr;{(AE0}-#>)pl&9#~p>Nuq*;;O?sA$x;#+0~Dfr!}_wwu-t~lw|!1IgJZ9YFy zkjQ?^wBV%}18R%&TZLCT-G4zDSA5&(;cIqY`;KT%GwQWz;nUpW`n)&26Z^p(r_IfB zd}SVcdZnf378tBJKULt5xUrZKHPIo(m!9?)-+C$$wYoTb9=9xEwducRCdsFDnBPE0 zu6Ao^kKN|*a=Mo%X|nVE>t75o(x)MMV?=+r(hT(92zR|FBONMVUj4<~El)q0T)fub zF?wM=Njw|m_^wRZ1n{}|+fYLS0ul^VN{f^^)08Ugeb&rQ(@Nis8uNIOx3Unbzf8(d zW>EJw#^>V>|835$-KTW#3qzHe%Rl*~14bqbr8T?dWKY%iPmm@Y$C<;U^+qZQ$wGR2 zzrKYb(d^09=ibXp&lY*VtNIp>j1x+5g(;YZ%S0)tkOb@aP&mWPFXmkg9j9-Yx7k&H z1eZG8apvlJe(A6}-z*P}wa@dSF68WAoxypoS}Zt^NI+2vLs2S8`#FT(`TM{Ni}APK zXW_N@1maw=zkKUxSwcSiV*ffB+=A2EeBY^arc=L0@UY*=1wFH%@EZ^%KNvMxKL@FL zv7Qh*5ePn})~4H*hPjupp-M%<^V{>#Bljm-<(fX{?d0qx=TXP`TZYbloEjKUE*sm6 zqLEMkYPSo9*g>&{#W>gUGF)jxG~n~R@LC$cj(b0^u12M#L9*hpR#ml^jLm@p$`Ra{ z392rpx5Uj50srl(gr7ry+n>k^XKYC88u#H$^Vl37EES>i=seUY>xCt4PHm0T;LAwn zs@KC>CdaroU?Zzz9SeGb)dfGN z+;1LQrp_Y(YTRUBtOwJBx7cD>#$VZdan{|o;sp}f0)P5^N?Fk*;;ZRG(SOZ=;demV zq&fKWbfHowJuEJk2qsm84Yj;tX&#r0qo$BVLG9IF7sCVOrui!W`pt#%`=N<66T!M< zf~SD+6EF(?EZT`Kpw5x2n_F1sidPH`Y(>;Dg75E)ROonFx}^o=2KmF<{(j6^j=t0l z;&@o|A#j@_%;KINPCJ{(+GuH6N=v>A& zz8YC+L*R=G6rS7XN?1Z}`K?Y8WbzyDlY;c?b>f=Od<`fgDId+^$B#i1;$wRgVSS;D z;q73sU=5{vR8q_D4Qkl*9h~LBjt3s#Y+sbpEAvU7-T_(GYh43(Ca2JlO@>%S%`zX661&M3lKyPzK&NNe`#fsa?-I{IXzS(Li7SlC0jCng-NH4rM3*fsT$f zVPnEB+&iz|RPDp!Wa&$^YB9lvZfTmk(6HC#IwT%hq!^nF%1vM#RL#goiinrdzeP zwdv3N2nYyxet!1z^BW&mqN(qyQBO=v+}_@v22%6c*d~9zTk^_NuV-NgOSMG?Z~$2( zHh{aXY;1J9+#W>3K3Qt$*oxtMz3X^eaapo?dAhS*ZGC7tYxmsfhJ~9p@;0I)_XmblG!t)18T z@Opm&8XEx5re|{7Z#Jy?$jZxm-*1F|1i&5uSyZRKG~=()HV;^KNFnFIa>nGVK9`-9 zwF1yZMh<{Xb_aklipHzrxaPJM4df24cwPMkLLj~^)*WQnw$o*8`ZM|ipr5z%9@J=p zkI4d7)Dydie-j5)!4jDJ5m2fg1ojM%PeTkR2p_|bws)ANuiGSpxvtvFi~`&;0bmVH^w2I%_9xIN`T*e6#!G;d zQC{gR9u6#D_v4y=0JjYN_8lP5P}olu%LC*EfR@aRBzivb0`QFHD)paUpUxtm?V!^I zy?OPrYk&P(uOq<0aecX&12lwAW*?u~-`D%CSOBbygv$akXIzY=3P4{uk0y%{TZ4i5 zEzAso0~p57H**Hc)DGQ*oB+(Jr`9Y+xeoxNOc)v%7;wB>Vcn6kmc}~Y`rK$o)|kcT zih?rz@_0kQ<4{-#6-&tT%eW7r&TfA^FJIcg=b@;igoZE=0&S;Cw*kdVcq@e98w*Dv zo_^RY6q}sj$zr|O)2-HKu>qN`Utmp-P3fg`~)4)5J#vb50*+RR8yN^7JG!AT|aH z3g{0bA#?BEy%WWiUWoeeo~ZwL18BRMTN3SZ=@x*GPPtAVVBDp$M|z-D--v=P_&w<3 zB-zH80N(q)z`G3;v&F+09(Gb7cy!ah7d>sgo|I)F?!<*ghqywO;r6(uB|f1=lT84? zM9{%7n0vxo(DyxTw1tODKJ zAmWW`a$GDpODL0&3E4>o(bN0;`_5{AZQKJLehX!M>^fOGZogv?Dyr1u?a7iva~$8N zrl->uWXEo4(A39I9e1mq63uRh6tG`ASxS^FSsrky@ril{@o|83B3dS!d zpyDE7Rm;=BfVu|1_60 zl#}}Gb~SIVYAQ{D6v}KqK@zO&yJK<{az@OsZ;amW;Q&5~f}9+O9HHI~2A!R`{{fWX z0BD}#N$Ct&qjq-H8#~+~d(3A2hP(bKR#U~%+>xOT%D({sHYx14Cdq>k5@kNY?$~)2 z^nN8FFpx>vI9gpeLI9A0&7S}YInO;cxm%4WF-=+d7YUG3{JTs=Ma3@9dfO@v#(lv! z?cYU22RIWJ%*59>7>qO6Axox;6a-Yi8)AW-Qk8C2{^*<|u{Fm*#+vLDS{OFvc|QEXjH1-AGWOzYuB>JlM%L=EcE?MEU znl&`*mmB}WU9ML3!>A`jG92g?7GiM-65eGeG-}<~8H8*qtt8}F!uhx;ww@ppU|Ni` zxKOGC^l!_9!_UvLcs6XmY*6ji8ttN34MQTZZI1Gt^`^UzEawGky$V>*vs}&bo|W4n zeN3l%KeMyD%Ukt1893nyLmg5A!!l}b?_E}nPgoYgauyhlN;u2~QuGi{|KWr18@Zqt zSQ@CZ(d%XmUTHau zZ>{58kj?Z~o?bR*`d(iA*eqKf_mjr33DzREZ8zA@My85*)Z|gwc3#-BIfbec5)uFv zCrMBS4tUZ*)KBD~fV)!-{g~B`pgS>ng3d}P!t^B72Y3c?BY<=Oc#L|Emll#9?u_Le z_bX9hr+0s;7nKlsKB={X7M5)xkr5fRG?i|5Gx&}pj0_xSr&$o_ZF*|Q1QFp<0B$@L z4Go1yl@WsxsEeGMHt#Tmu*Z56_ro9K3v@l*!rJq^u4bnVRmNxiLRIe0Z+an9UmDT6 zoa0`nRTZ^W3XGIzVskuCc_@ufQnj|2*htyNmux$tP0*1`AJvOhf;2g?-Fg(YhoDhu zYGSo+Dy7{sJ^U6pCgK4SHK+{tXW$gZ?07hjNXhJIH~0`EYy)l~7dib`)Ln1GW?XJN z(qbQg^A;E^JHG}orJ}V6O)?LtZ(P|@mRMDjNc4S)ahnuMD{y5}t)B1iOh_zZ>m>3zK1ElLTqop5>*X(>0Vr;|^uo`fMmi(2X zkz~1SLkxK=gC^T8{|^QdJ82^Mvww9UOqAq2IdRMq?RN7~WndM?uPznmIek|=da@p? zERQG~y)Lv_A|%3mTBB-~$j~&#@-)}4ZH)gex&Mo{w|>j=dBTP%rMtU9y1NAgq@|@( zq`SMNJEXh2yStT?7D4ImdY8}leV+f|ISzkNult^Ruic&9nRCvWQI+=V5}+r-!sdcs zXIere4nYWl4dE7TgO?54-zdTnwjax;vRpi$ovhjg)Q5I3J=6 z;{YkZZ}{sCh8Px9Yv55ECz>YVdOP*bZPG zFb~zH1ur7Q`o1J?Oh^TdimiURr84{~q&EWZ4ygm@7WU>2?k3WvO~TONw=FPEYI?}u zqM9(IigV?S+bO)KwA5KKEgBd64`i{C%5e~(Y_{YiJj8$sD~4Tx$lE1DBgIi$o_VWj zzBv#mDXy7JJtgAu|r7t`VWY_z>+{=^ft)O9fe=Z~_#k zKtAMBbpDXh-Hr++h1si*g9Z87l}Pf_!;WZCkD!5~NG|;;J3S;x*9V2JPjBVYwy3?G zh9GGUS#L~yN0n@A9PshmPDiRqBxj!680jDtl$S;kp-^1dJ0hI-e!q7!*@m!BW1=)*VgI+75x-&pILjE7MsppBe6Q(+|F3SWSoZpRToH*S6rRnEK%`=4?X z0iSCUnNC0gN)4t)itaJ51|5&6crz}FOy+cRbZo3|=ser!zI5w@F3gA5)9E{HPmA&1!cVV zs~S`29eXb$>IozKaL92Ruf4<~fnUN_y!6mHWHsUk0i!u>E2FSW{%SO90pq()GZkDE z267M&^&ivg39=o}zZ{163t49Bz(>%6gNYqd!R)KM(=rc+9T_AX%7C$xfj-v!hf5YQf)54puNLjSg9S6 zt<@vrr*Oe4C8?6BeOvPiWLu-(>O( z1anWXWdRI3R>A%`)*gIYl?0(kRmKXynrJR7M8R}4>SCI0v~D$v8Ufoz^vv1 z3;fd>#(b?dim;jjgy`(_AV@-1W*<5SpA-Br;bz*lVM-X|VJ^jbWd0j7X)~&)3{*IH z87bw}ib$9Si5~p_9>}`F0T6qi99)T6wFa`%Jx_4FtHn!OZ`w#zno>+gz~S7HBqAbax`2rHui!pgP`jQBJ<9(HgXKYNmxf}d1T!-}Q*VSZ^A=L-&8A@svGpn{P_&X)RDPcb7htoX zuA(^9>zoN7^R0Y@I{x6dKC)M-(-9bm1)e1(9nVO|HI5kqo7 z3wZ?4)Y)ge`cSGIB*=8y0;?{iG@n`=APgA|fHQC3#_5tls-e{|L7?^@l^C5a!C}ns zgEOc>xbKh@{+(n`(|^A*cUwb(F~bEOI0-!-3c~Xy`ys!+|8T8%&yW9nn3*=ph~RJk z?Zq1IS7A%<(-wgeX)1!f;bGPh02YH2vivc+a?GktA?sC%{RzPSFMlg4C=aw48*=== z9i?WF_6iq~Ks6jMiN`F)zXHNFg0BMc>bJjuaTf#7$DZIbjvSN+wZRrvQtVrE8V6Ie%Z-CFcYBlN|4~4 zR9WGypy17;MWg;(Q%B<|lsLb#*Kh4_8E#AW$itICC^|UQl@4uVv8VEnI{~2>cD&$d z45c*n2k)E&GvP*&&L`;t{!X=t1duC-w`$2U;O~a?uR}k5)h4AZuouQSzC=;wAVL-W zw-uN}vcVvdUSX_dzZyQO9CE0-1ogUa1pA!rAU2cp;S~ehE~$~H%0Y&j|8IV{KO}dO zK{mf`xD{l4LOCcK@f;6`EIO4Uu(ehqNeMqf5m11KA#W3O$sw=MSrJ}!Zl92%DGbI* z4D2~wBn|-?s45D-TYlih<1}vjZMo=hp;e^7I4J%)Un4m(JJ6|NTLDoyGqrANio>^1s1sIzfSE zG&%rO0qqykxh!!=Nl6I^_AlwRKnTu{9v;+irwa!7d$pr16M zNtr+#s|KJoV9tMrO;BH5y{UMAP=;BY zQIH0RV12%+9#yD&1+4C=40zMHK#ML=XOdLV?Q|lIqmGkzl=@X`!R|^F_=P8R{eBy! zrq3MUGlhkP1SiED?L5x=s>)t^dL$C41*jDQb=VH^vN(vCGCh^xR%^LBtv&M~XZ5~! zvLF_K)s7d)%Q!!G;5`++5&cLY?;wM1A20j97aGIP705pp4zTKGhQm`B`+1{i-nmj?DF|&+rKp_M*Ai=`^VGp?Zyf9V6JUX-?}G(fRc z)(>H`t0l^mkp!+KcF(CbZ!v1VL-Zqkj*7&M^?QN=)xTNr9qbqp22?meRbuIk*jTg& zOG9`Ra=$3av;AL<<)ukF*x1;35{&14h+IZOxDu#N^dEk%tgMh*8<2yQbOCpFEhZTY zEu<`bj{~%TzsX1(^TowQe?B@r?6kBnF*W0{GlZoM2oaDOZdnGihPWNAcM@X8feNMX z??qX~rrI=$mp)_6U;rIrilTYQ`v7&?=ReG*2e42bfzep#Bxqwlf|W_`C;6PA(F%{N z{i~xDTn;VJGApkO6wMekATq^-BM z!3>sJBJ{;UZXVG&B{2~$VH&`6Xk-tf^N4$PKLY-tM+>X=B%@$l#Q9ypw6Kr=cO4=;BPInj`jiAoch6F-m|hcMI^(7PFsL#hSiV_BY&dF1%8PQP)c_8{No8MMMB z^4`2LHatm<0w)*-*sIA(AZEl$L*`Sl{VjMA)Uh1=Izj*@u+`Wria49j_0UhloenDo zfhW#y!T&dUy>3WC7Aqor0_t3hu`QJV+qnaVo&M`}u~BP0o-cUmMjQ$MA;Ccy;%y1qOd)B zIr`SFBrE?x7^EE(PX7j}P-RG^+M1o7y!_oKQUk3-ihDf%!G0X-qP83#w7s4Z%SYf6 zfFhWWH;STEIhasx;U;Egec{l#Dw>+_u0G2ViSBHE;Y8-cz`5aPO%#~dK0cQz-}fiwmovJ#iQ0>s{lq__+J-h+WL z2Thk{N<(<^hV?Xl{rWZJpt--V&tRk%5%w|dU;xr!wv60Q-Xi$8w|%So%|Hak57;Ks z65siT6l8M9Vi3r%M^MMvr-!;FXUK>)9EnTb>T!eS5T-&w;=9~%nxCoB<3bZveRIYE z@giDay_WNk`&>@bp2wOkD@Do`iM6t62gZORH3pSLLw22s<1@TeprXps)mwhZJG<&H zy+>~7T}JDtwYIvdV|X<>IvPjg9-NQrgD3jbGye@oX+DOE(EOFQAuGd{T?W_V5I95G zM+vK!7Eca4UV19eq}MQAZZWciH;>_mDAII{Av|U0l<KBM8Ob&OMVyk zF!O9E;Kb3Bkr8>HvY_w7MhpK?M6pt#FKP?Qk3sl6Mic8N17;FV453&r46l!K1MI zQs;%xxQu)86!)h$@U~bWi!g?7j96_o91yE+$E`1B6E{5|jWJ>6tOR)J>VoM%?>2ns z5kx8H?PADv*!)g8A1Gb@{`ckqMv((u<}2uKLE{&z5ucB0#2--Aq|OY#pxOH$u?H(ia2M7F%IG%n@RmYk_5-rv z4m9t*;gY~F*!;le20<9n5D1@9GVY*Q;SftigWcXTX0|O!&q^fTO4|P<=Z9-E{!a3* zJ32kD;9{*h2x=04JM8SZA;WxjMJwol)O_M!hpsSui%~!>b^X?W0=|U{vNk0mRya5~ z_>=zTDy5t->u(D66v=Ac`D(UuCxyX8 zyXah|3fNykU$7O1L$EBg!{%^fZD}OB}M9XO%@vyD;$RV zQHnHb4T;%zLixDlcQktkYF)NYno7LLmzy!I((#+WBq`R}zi3*)H8ZG#?b9ryuXmgK zR<==zFH)36AX<(JG83~0m8#YRavxR}ryxuijwsv~6-=M6u=Oex5UU#}&`r90J`(TB z9`T|@mXF+Zyo5MHT)Hg4fjY6;X!V>m|K@K*p+rA4u7bmsCqk`y@EBW0lGLThRvffA zif=YP3Qi0_A12U5(GckJ6qS|Z5>*BiLofXy-aC=ft^I-)-mReC?RaE%fBmf^kqqg| z?T*~WGL!xXQ60D3$KQ2 z-apWgRBzOznt(wTM&dQUxtQ2^KzN&4KDDey_dnqw>NFZx*;fIg;x99qHvAYcbCTx!ClcNP}X$D$%Qrj-Dl zE}>WLRUb@;7V|#aVRi^TZb44A&!uXo$uLWV;mVfhI2YLB19RVNFsx8bN9^;qG#fM! znh&?B4OcbUH@wWsP1ltO){)~th|ZOW3+*~_yKguna<$aPw655;W5FFJgKd+0K>?!sf9?_rtsg$~=*vX~fu(o2|$MIAg-X;l(1v}I< z2TBL+ywzTT6ORt&_^q%9kN6)uh5^237B)fW{k;Jt;#UNc+O8ko;W%big6?|_@8TDT z8D`7gwS%ebcTqXU`!O@ZR^cmhq;n3abz@^J1kUh?i;HW5@)K&kv6PanerIphiAsnb z@!0Di2$OnZULD6AA1F-&a=!ZtfyRU4Jm;<~;lh-0iE%pDIgx-}ZS!Zg=966ad6vJ? zKj+h0?8gSrWyeu%zc*SgkM*aUZnMKCE)w6*uO&cv+Bm3@wxe9R(SElUd zzc-JLFF}9V4?igBz5v2l<$C*`($^`TLSG;Df3;Oi0%Us8h9o5a@hOwxbjsd!8$Zhi z|CN0^=&Wt;mXO?5)Ze9foBGf#{1P85-bxuUXpP}0J3X=X^iVNp>Is^UaubM{-w)C# zS_zm)d)@Hst$(RXWBt^v06Zu?S5Wu)*^8F9|B$&ArEK%rN8M##nN&uCi$E%LRHsI?{GKUmSi} z$Xw#P9}X>6bSqR*W!PhUb%LcuIbc|eU-ovFZ%1ziQ;W~$i_?7e3MjcncID3E?A5rF zlJ=%(3}hauWh>l`F(3LfepVTFl)g>TZL|B!!onoflH29Fb~4mCBT$rInjr@!8vtZf zc4hCHUo>2^_%tr9UP^DA(*2n6m?d0?NxtS!h70YMZRf3)P7Nf{wP2g@#MR7v=P6x7 zlUV32P>b?@SN+b{U{<@{wOVv{T1rGnF!xJT3MD3+ed=|&F*Zwd}(;vdL zV5oFlVseP(Ie~w5GiHKQ_7^AjgSpQ6tT`5u&13@qj@me4!m_{fQ`ai@OOyL_H_z`l zciThwLr?nRuONF~sl~F(X>r{^Y6YSQ&%I_pv=h482tUJ$- zhhfZU_&(!e_YeZ##D%m0#e&*JTT^wia^Tb9JDVVd#U4h~d#jLnnz z&x+4#%mU<)XZ9mcY2!M(o!J4Lc`UQ}j*;%OY=`J1K(Pq|NBkQK97kc+~* zGxTG`D&qO(ZCl<3Wk$P76@>B5eSb)h)jy7sL`7U!wBuZX0)oKodoB=OtjwH1Ma2R`*VK$w3ZSIW^6bmkG7VH-a77aG- zU7{E_-a`F%Z7;D}{ObO5o2If|ERv<3`bOpFbb{WnPRsSf1PQ=TTWI|kUv956uS7!b zxHY{WaNS&|6>8=cXhpFPod%9I#p>Ti`E=fO0+RZJV~qa8Qy?t@`&uoo6`*zb?ZQ_G zb#1R*iMV^X`Ec*kSrAX6HVY9Ktk#BRC09pXqCT092@n&|h#rWZiQbsEHXjm#ZxLdQ z(D7Ylzp2)_Zi!4aT;#Mo2@x_#NTLMz^^Rt{u{(tN z;bCE^E-5K#Xu$de`+exw?_6gx@D`vPm_G@8b%M7EJK1qj9Dwi2M2Fi_pJ>L4Lv;CVtKzz zWw7x_#DE?d7gtx6`rX3omTH|jW~Y6*ye*(1`CYkwto;0V@6L=6D0~|k8Ht35cyVs@ zZ+t)2>3iMtz?A(@($JcrPZh#!nNA=5*P33LrCbe88Myikij}vwAU}mnb^+Jn^4v8gEqSy=)5-!OlogCNeSd3o_ZQ*m-){eXdb!!klZ7Z?n?v$MmBf{leG zD)P;BE!8{ZAo&QKmz+D~os}Y|<%_zS;aPv+4$X|t-SsgvWJVd5k< zKR7s8Lv9KnV^dT7?{PV`WFAIFM&G%u5u6M2&kQt#1nrFFQoprS_+FX+u$hXowQDC_ z$HIAqxzMocb;(cfl3S;%9h{Q+!c+mmAmjN@pm zcfEHnxZj?P+b<)Ih;%lrpVyR+TmIPCpM3mM@OXV1?^?t7JNi#83(_O1k43wy?~(^J zR9^-FRb&{*=^me+w41EpRl+?kVGiZY*8xq>&^?}}gEa|!J(5!>%vSO4Ou zsL0};=zD@2Prsstll<#x`>L?6ytuKWHkl0>S(EJsb5T@|k>SW!Ks{7dO|-y10x%Ej zc!*hlDctdN-bt#tthBV~b|&Yz8Hb^sIj#TCFNhDm?o#Tbd$69Xw-Ot^D_R6B&1pT= z56>Om&xjvVU>6bpZ3wpNTkYtF(aQ@b3VaYBib=-k$tr4|az8#Cyy)Ndba-%*EiGlw zM&tK=y3+9R5hj4MO$A)Rez|}`wKA(}x%^F;%(yuFe>3FLq=qN{fy$t#WRB;iE|nu> z!fF)6@ei5$EFEtrOTa+CpzW3iM?T7C2qvAekH>&2w88n;4OSn~3+9@8C-cM{k_6P9 zSEJfv5{A{=bGw4J)|=mD_W>l^tO|FUne31IYmsX@Z6XI2iT!J^-5|l}^**9V{TpD+ z)e$WbGegIFLq$VllqKu#OL-O5MA-cA3HhhK)&-XwT)E9lUaoHm;myEPx)0fn#L3rY ztskg|tjI4}&uTQ;N*iRd%=UUDW5j=J=Ossh=|oXOw-D@NF^8~H3_E|ZDA~+4D^qa; zsEg2%6#tNs_|W)IXYKWKjX7yFf4rH6-lL?r7agUhrp^sqUtj;gcc=UW)0D9)1Z<&k z7LAG2Qv|EG`?fVr+Nm0!V{e~%EgzPcKc#xbZyj%AY!mH7H;NBE4#CuT-?!|=WHL_J zVB-WH{ls6N!;8Ha?%y#5O@`>`=u)Bk3KT)B#;yE({9NM^LZRT_GV=1lP2&80ocYDp zzZ&fG-TC{E|6TBsG7IRhrDfd}*ylL0XMpK=)YzgP_aE5E5udh^FiZ8LXqMdNL zijqOOQL%yb93x&CiqO!N1ed?A$Ng-S(P9;zDC}1`zoGxUuHM&h40Ho;bY0VbSB`fq@w8TQy{wrk=G%=grm>eju0$3aqpTp;0!1QEL`tXl@>cCpG@g>e^$qN(R@2-e z;#kcTJt)a5N}uscI)_g!wE&X{G@qYQ;yu-Koa742$-n*}(BRF-$A>0l_hm>N>y7wj zPhDZwn`@M#@(&0P=Gzsx8j694BV{YXwpeIIaeA#^c-X@oP(LYR+ZQ|p3%)2?VgovsDZU8AfDuYGd(_($BY z3$!9NMpWdjcG{nVUvF=7UK_N*MiV+c?A0MzSn3~`&lQLLaG?B~xuGt)a?caw+M(&V z_;hR)irKGKbbMbvqVUtNd2on18t0=~zxhkd(rGtUg;Cvw_kbQHa~K0ZmTWj#CA?js zm?$tsQ9kXEdvlWd^1Du)u=eA@gFTDh&x%fAzLK&HT(UNX6T}ks)1k3Nw+}M z_wRxW)Db^~R=imuNlrdkvtQIkr|>u_w%9_qnD-R(uk!GA>_hmYorbFHf1@)g!WG<6 zg}QBh{Bs`DwcZ7>J9Z-_%q{#iR;Ao{s|c<~#1)*nn%|;KmjKzQzv+9C!@eN#88(Oy zBIs;F3|qI@QjX-Y7zRWpZylg{_gya>%nfvb7AW; zkH`{$9~h*+IE5Qz-kWo@e4W~8|8JFQ-N2T`}K>@3z3j}#|=}APXEzQYyuVD zg9?CcSGAO&>W!xMfSxS!s)4*hXhKV9B%QOP@4eNcw0m2BL+Y==uk`Ec4he~zGOIPp z??m}`$&3o7#yg{7z`_C-N}UVOf`+Zb-AICQJ=ftoEDyJFZfg6(O*?LvMVD8iqi@W& z#im2+BBBdw-s?kbjUB`t+$|?$e#yVG2-7JzSu8{G_fQ?#O#I$W5QOz&c)uZPv8HwD z9jy?RKBwBEIVsMQ!YD~NDVbn;A%zEtC-ff}`6FPaX4PHBQJ+vS&S&aY`TF$eW{}x& zzW^q3wPr084MGs|E76}@(at667X#@}-_yeocceaaA3+ef|6uq2UXYGs23*Q6d%eiO z1b_6ncv#nKOe&p|x2%=g=pjbnP_d@?jnQw6N%?)7*rzSoQPa`sAt>#IE2$eK-a!k| zlrlXx&NI~Kgx460D#!?Tq}QM6ImfDb@t0Gv)ZcP;WXoM`ggAvwnjsVPVujq8iA8DxAp7GbqU*MZTiL`)(t`3ez!=!iKp{^>(Z~D$$Ku1J4^01O#@em|< z!#t9VL@f4poiO0LybWJ$~CZY}3bgsr>hn>aIh~F5! zRj+Ov!Ybd1zQY)2*6r!Qn?qec3*`zb4tD@2yuHEEKyGPx4QGW`5=6k|H~8p)UM6l{ z1fQQp=N!7oFsCY;`i|vIii$0}7qg1X7t?acYsy~;Yv^birT`&62gI~m6a&OV;I&KM zUxty*!O6nSinF{6^{dnyiB@^2etH&rWWL|M{~JnsS&=0cN(biyt))U|LuSv8YLcRA zvtzl1Scc9Pk#LHO4(pAHi*eKwEaBGG{9>0k7hguk|8#sHEK^Iy|7kv%lvndOwN&#R z`M<2ozQcw9Myp!Ys}$$5ceu(06FTOjbpIBX=+`Lc6y*Xx;!6mCR5h&ez%auEUmswa z+${(S0_NA~l)3DNr}H+z?E zKh9DNW|Z zVT-);U-FH!=~J*ir3+zD3;Mm8?UeGk@5{q{seYjq2G$JEd}zhC6i>7z2ZWdtnqSeq z7IsmK-f?OG)X7(A(iS6Pkp%aSO)QvLx8`AuUqAaXt7dBU)SkG$D!b1}j1p_fJFAQA z4WDz=q`hlablnUSKVZ~uLJENm3isdgCuu}6u~fx^`uG}oNB0mVbPU#9CnYAfP7Ecr zCOh3(xC0&>UNk~j9W=`u!jE>+R3@a@IWokYWv|8GX{o$J15ngIPkI>7Yh}N0`j06;RBQj6$vuiP-(1BPn|4 zvDJsfff{g`V^N`cvQSAlDR>b*Bb8!bvj*_Nk~U2l@O-H^|?gYj7)yZti_Oe^j%5)!xH?+*;- zIS=`3IF$~T5Ps|*fFKR~6|gJC%qfhVvxU7k*-f44I(qu^TPm;sHlWnI4poiPs>mU2 z7rN=nJ22T4>FsNJSZ@)I&CV0_mI~TKxm%!+**9{HQJl` ztu1WlKBsF*Xq)%~Jgd|0<@hexarP$D!@)YFe54{_BaV+=kSPJwU|>agV5pHU^(cLPTaquZMI06a-=H;MWK&x*8Zzf)3hqDF$0OAu*?@6`t96P zM!ZE+o{;SjMo9~(+xmygbJk6Y`rb|q+zXwDTWpM7f1{2PG^$;{?N@4S%7vFLY59sk_Ej{q4;H_{9*OPG`pq~4zmv2&-W%K ze@?ghf$p+WL^65OG!T1v{}y#-X6&h;Xfl$tBC)l#ouF@%q_RxG{E*RRxUn8eO{X8} zAV}|-p`gJRiuWgH=4}Q*GbqTK`XHu#MO9Qj)^tK8gIVbP`4y0b-fs0c zEPC`K)Cke{x%EG)G-7IB;4wo!q)(MwcKO)$J9&9HpL)L>3+|toBt7e2E0Z^Jt@7N4 z6o}P)=oyy%=rq@x?fd+#Qw06cX0Al}`T04TUfp^pl@`gDW$A}1?`Y$fX_CYDs4@iT z=xlbIqFk^|E=*ejzV3S|otCFuEtdT%(K?< z`54>c=`;BXYWj?c0^BID6x4^)Lebsctx^mdj?v3CvTt|0H=iSk&2#-vSx8SjxL>^L zzFuFH3lF<_IUV^9(<0Z|8(_6goGL?xGQl7uRN~BHok^3-mnnj*_hqC78j$|!3XJ>I z&H!Dhl?Ow7j1cAFMA;w&+Sbk1NT$!Sva+?43m|XO_6Ut3c_TQe`@wQ{ySo!MrZef~ z8S7rOmRb7~dcYf7{xpQ)A`n~ronjr8Vt@lK&miU~%vxEj=S?S_ZR&~5S5^oLWz5lU z>MIpzy9v-t?Gq)zU8q``Z;hmNAw=Iji*Bn3r~&QdCF+XABj}r*`q0nMXthqIonnT83}3u1;(45v zRPVyHk>V-W6ljtDr?1KMV4<>Kkn+*Zgs>t+LIeGS`4!Lt6?LDpdSyl6eHOpPGKXHZ z&#^TaU$&zt9D<`+s)rjZl1gw9N^i0Rh?Vt48NFCppBaetSt^}seqZO$B5J_V8v57#(k9Tp^acW&R_Ub);sAX=yJi2TmeQxN<`5ARnqsKpnbgK%*9ze_3oc&`LI-?ZciBIQsVEv>#tl4ZV{=HD)skv#fD@X~S<%?kiNbL1OrM+HUu$ot*Vf zvqh2KOALoaFA`!bihv6=u1^&bt|1_cY!qZbu?{%)uTk2f!i7(7JBstU3DBrT!kmhb zwS8Jro4qj5E6XV7Z!;Yq6@uHbDr&4D0HrF{y4@X4%!myKa|hV^$CwlVVCj(R@Nr7? zQWZYu<5i&P>F&#&nk@rOca?NDJz{M8uq`WRjmAj@f(pTX=y8W_kyZZ>ggUdM&mr#2 zR_BcAke^{~gfvIr41ZWg`b5#!_L>ZArx?Nlw_fN(h|(Z|_u>V#RkE+hW}MWO+20i1 zEFK%|@N}WHvE4|gpxgWHB^N8PaQt$DMYSziY>Z-GD|WMN;vMI|kqWhPmVhm)DLe7) z&hS|bhV1bc4GK=Iyu#Uc=!fegLT^6&CDI;Q%e+uqvbH}8m^=@*Kkasw!x2H&Ib{`85^gy3QOW*Tes_j$fnxK^YlUphN< zG6A|h$H%PkIY}*}S>Qq1;@U9iPamXML+3D*%k@}ipxSby%NsBr z4};aIEmCVff<4;eAu2r*-Mg_GHNzD)baxykRoP~2JrdbMK8|Aj*=tkrc1P48jSljl z&tMRgEx#E5FcsHjXp9{qL8p>*Sce~l$= zt27=@`t0XcPn<69FE<|tzP}tT#AFuJD`Ye2c-RR|l-P-KUkrxH{Lzya=!yMV?OxYD z{Ve-Z)t} zDx!jrPh`+Qfwvf*C!~~2)0g`te19H8D18Tmr#xt18#!4nY)C};@JmGeEIZ@{Sf?nP zeq?YHZpUf2a96f2asi-g)DZ3YVyud;N-u zLc$;pm-F58GhdgEzcFJ3$H&q!b!DGccG|4!q%RC*Rh9hD(*YU<$+4_T2FcfT!#k+e z-bCbhRP5KJ26R2Yp)I#kj4fO=RcEPK9VEs-CIx1@TuXG;b@wv%?!Z{Icf7HBC4i7} zxR?%S!>zx$3hZ$OB$@&fl2uTnOJi3o=$c&(*9dCsq@6e-77`UAD8=t`kN9VA@Q-0t z6|UsyP%$RO$N94L>fhmxQni>rm!IYp*#?zi3<$%#)aJ=Mi04%_lAtR&$V=H*6U7Pq z=nR6I`dDDR>eJ}Z7`i{uToNqTX?!w4;N5+Cm9WbUg@b6l52Qcm zmy&ROUELxjv(YCNrAe*O0#6uK9M>tRF8^xp8nxRF$cG;95w5_Ocbelg-F5kwpI6N7 zEMfGjo|?DN$Jnf)%Kl{2P~0W(AVz_z}XMeG*CX0NqA=sY_ z4cZXi4~Mwzd?u#6F6W0F%iNc9iVHwvS!~*m61l`d>}o1FS&wb9H2%e$8nzXoL=36U zZ=h}P*m*y^XxJqN{oR!qYq8N*?u$tF{X7RpAwQ(3Udu}iKHa!q+r3tSfqJ>CoRkDH zQS!`BHOMcfguBe&cDSX5gIOWHRj1+Xb2zqxtgl1u!}gSM1s>h9^r^^U@4IHWYRz=U_>&{-@)g{me;^K)-P5iFs}4B8e29@J$~k?D#qAWD0y42^>%s9}W{!Ow zfuA*l>1&QmvJSxcD#->stqX_wUsc)bx-SVr3?jKCtCkTn`;|hjwMYlXwf@u-#zh}M z?SqzGot3&~sP&Bx)-w=>gGH*5Gg`NCK};~gR;fJ0Hkr3-1~m6r6d(h24VON&$|^a~ zR6z^mF^w1*=E*>5ZZ+Qhu{jhA6k&AYNdpOj5ld|K!d;U+C^+q_MuCJ@6q>r?silKE z#Rc;`w}$*2ruc$tvdkGt)@q<2o%$b&fi6U1`nB&hWi*v0p?Ke@timG=?|VBiQ?~{S z)NJ?c$}*Scy?S1zU%gvh zgB_s`GQ=Ayzz(pm5ow&p5?vC`b*Qpj4jm_xBR6V$O>wr`9x4avMWr{Uq1=q?-6%z0 zIeL-WIc+$Ezmj#01nl4GJ9oBwvwEqD_NF@g3%T<)TFEv3d^6aP)9ySwc*I2&8O96e z&bUHW)et{uB$F2(H2AT6kFGn-bt!ZVf5S{EAxB#F*R&v&0TZ}zEWfdJ^Fb{&K@EBF zR`={1TOcA@$%hZ>-0BhHElLKjBz`|w-}s9}itStcbm8kpnB%&%{S#jR=1C>0C}sjv z-Jvi^osB^p5A)}#aUkDc39%wZ8DG+k*~)vP5M0Af6Asom7^2s*X3A7rQ))0{XrurF z;VWgx@svAKAW~iQRV^3ZPI%k8#*=xN4>f)c+xU;W^e7gv`DRu&%i4SIQR{-Du9us- zRQ1kKA&W6W1nvMIvACE~*V`$*lx$>?(^{l-O^NZZ(>lrC%Vn|@mEV#XD@QyuJa3FB zK(60BJo_n9^xdVFy%83=b?0eaV|9fun7!K`4Y3zPYa{V@+|0oGBr>B)nr4FyN!gdt zP#2y>ot7!#ra#42k^$%myV(r!)*rV0>=meQwkJ$PERFR~+(( zyGMEVU!tjzNdLHds*C?i{nh$&c)Wd|bexs{P{0C4E4mSEhzjbV$7>AHn%(762A}2j zUvnoShrf6WpWp^I5N{Bhn%VUfL8crhR3GeJabn@A*wC|j-4z+!Ilv@9)zOn~tV(P>wKR_f+FJyPjcON8Edj^>WO) zA5zdeHcXamjfyEF+6+?z1Bo76yM!P-Qg?=1=F)Zq;W$-un^2Cr%3DDB($lS$&I|L)L7_>7gJ=?Db5uAR%s=OodoY zBlkf2BfikHUjMaSpgppGk~!+-K)*rinYVA&bJ@Vye-M#l4>43{z=h>0{g;N`2c)>DS;A~lQ-8DpeQ2B3A8-= z2;Jz5*+bun%cIA=mEl!1R|zg_>spPrfNm&U&HOZbFbtcY9zimTZrtIsgo?%2|S$6(^lWl@sf3X+M&8Oo(m~&ISoVsw5)#yvCgZ5 zBGJ|_7Aq{bwVun{4$ero``N2LycGQ(?nBi@#<4Jz`20J51k6h{R|(d+PtG0{HPvD0 zf*O!^CgXaP+SkWRC52-GgS&Bl2|yw*JY$3!x<3fOOxVdQYSBMP`w3j+wAk=CL?Yo@dDA9Bgmd6b$NCJSP86_rZvPS~L4jWQptaso3(BO@u!um24$7Bz8 z8!-VjlFh8QL?72#uKeVmKXe!~uL-ca$UFktG7!0GzDu=kTU{b@Ok;zJh7uZt=}Xy$ zCho&6u@K4X*8FL0TM`HXkVad%?&2t`r-oTXGaa%oOMgH$VP-4tavDhI*J=QNTWZu4 zXqBA0VA5g!&!L#q8-X7uU1d4{MdE!mny`ooKHKapjKN?pO(v*_i6%wQbx`B}EpTk* zLlfRohvm!eXlCB$*^~9z{N8g;aBE7b^p8$0i5hqgb!EFSYt@{@fI6|JEW;`V8uizU zjr#(;Rh})R&B*b=0}?Cn2A)%u_@0JNK{I@*DeA^zVR-AbMVQ=&homdPJSI^sss5nx z=NR!iJgATgktHOHTJ^%`{07G{?Uy9nGFlZ{cfnce-f9b%_~FGuZ4`tYZ+jg|?=~LB z7ww_-M@PxzMCcTDKm||k_ngMinGlEi04}FUztjA6c(RX#M%M;e z)Uveb>q*XtLlf^eJp1T0VaK%EJe24dGw_bQflflPd+CqOtPHq4r1u18nEWSxcRaNY zU+)mfjs|q`QME@@G7?pkTVRs}67B(NV>{PZ*JG+jNOeOpZzn^$pS&DZTdg%()`O@2=E0Rqr^)qAJT_ zz)}cDW9(v!d!}<#EAG%~IXdPf0m}(c?pV%>IrUB;|7_u!W7}69*}CS~*4$Eau}=i4 zm&`S#@;Fw)DN_mn&){ zf=PkpK^i6)o$-*o%`)-SJTdx^ICxKtXkM3-o~B?iodldifO5w$fjvEI?cze~hyaeWS2mwqyr5EO86lFu)U1g@;-?PDr4(@B5?6d*vkqX1-9p%(&K#d%w*qNi+4-feb-edko*2oRTw`a>^;m$9rz+8SxS0m0pxn zdf}Y41sdgt&sJPWG*(eT-@ws0A+%X^U<8NEh!Q`NfNKd*?zomeYu7A+vlq`}$ua&? zvkzfy4*Z>MHWhf+svYhZ9N`xn=@%027ZT|oB90^cLZF0#>iOewt67ewJBBI6;=FGX`g7Za{2Uk88A(%~-tf>??7b`BAF2?FJ;l9C`m#q409NQoY_BkALb6Kl7 z@&Xc2mH_3BvbUdv@FKU|tMcCI!{?Z%_|^0eBCp zTlwzq^)vz3V= zz4D-_thC@v1DDvWlGBQH6(OPPdDr#v@`kq%q>p(2`=GXWE$=#BwY|N(Yhg;J za#p75zsmP3#~wJY`iKTsTrMxKsKCq}e0VFQgQ`TNARV&iLiwc&Mw3wcyP1U@zj2vdV7w6GVJc|JT#0?HA92!N|B zLhhy&Wa9lqLDq>gnOF)0V}^2#&|*!n$G}d5?WL;TgAI=x`G+w)k`~9DeIfy40+c(( zMEIEm)J6dNd0}e{{9&z7c1eC#Ngm!sO8(g*_~BaX6Roxm+T0$@(*BV#KgMmW~T&M9~`B#~8&W7MI4o8fYXqZUCgb{Y$5C?b{Zbk?+>{q8A;tRvy$j=8Z zM^_Gzjtu^F>IYUiDskPu!v}>0l7LnTQ0{1TWZ5qgFh`)$JNRN*S^33_m0mSh-v2(h zbg=^NH5V^cT)JGjoD55#D*sd+qUlWe1E%pw|4LDzc*D$r@GUr zbds)4-u)tsf&%(?0R_7HvPe4TnrDOYyZ)j%@8QqpIH}k&7=PFrg!br9U-Qo}63u29 zNgr56OA3hnzNxc@6J=a1w5-8tvvi}cmz1oadYDK~en0u?%5`_6F=Hcy`0o3LEH3}) zaPsCph4*>U|1n->_QU)>`T2Sw{Q3N$GVR!|Eq7D4J$Q& zbD{HHmtnhfQyKQn{i(UZT14;gQ<~5x&-LY7w)l-n&k1{XdT8W*e9QLGgXf$&Bc0pk zG#iH6BkR@mg_&zsOhAkUDOwu$wM9$_5b+08ly3kM>3 zExqqT3E6V4yKnN$1dgp9jhu_9>7>g_jRkyX6>E7e9Ngw;lPeAT^3r_eMk0hRp}J0$ zr$gW1egUWo|FskQ`OmzBsdUkEA&1E_#g|IdLV&x>Q6&Hw82zAHL?O$B6et z3uWE}&`(o()PYe5C_qw0iOplZPNTn{{V%txK3PV3&dbGuMqSx<*z0kaJh2o@7V-~j zTLmmvxzU4^TWcE0E38vhX7V_b;dNpxeg$op*0(%?m#dbl2j3S$BQ2Rzv&a1@zVQY_ z#`m3kH4KzV$f@p?YE15*cN$m{N)7%Bf8S;2URGP*Rt8+L?c(l{d`jZxGROz{9B$Z^r+QDJ;4F6xl*xbn)-WbsO* zGOJy|XeKzzFeQH$n6+AwBJ1$s+G=8-Ska12fexUz0v@q9#>@`KO~i78B6KBE73(6| za;$O_5-e&u>a1|s28F8TIiGT=v3jy$g^O~o)I#Sqg(n3YI&|F>K^L(~oD34nQ>BHm z_HqkpU3O3fn&eFdn*3NgSsfdsG_#nJj)I0UwUU^Gn!;@jqv%VtCzrGBCG^8kM~E|C z7VF6n1+%{zDMijL3}z&8iQI;^F`ohi`QF&Fx}5e8PApcF1&WmUAymxe48k<1(ziy$ zbaq>6&PDA4+Uksdl)wW7&e}`h3$aa4Q?tSsQZlrU;|*w3G#ngE4?LXhuy6=qD`TTW zil>%vD%nVD%Gbr7^yY!nRlUx&Q=B+D_C%V1@|NS_42}{gyv(V;#3)FDa-IJ(D7~CAF*8v<*uORCs{5BAa%Q$Jt6~W@=J0 zcPpOe(Ps2Shpt>BaNKuB;hvd^Pf8Wxoy74G6LaFl?ZnlzxsY|MPj=CL+1v}#F>s#1 ze{(d=jhe48zZ`!eQb+7**u*&z+cTkc3)*CR}trFXv{6)PZumH)a@PPVQA3ZiNOVQ1lHSg4LDs_Ji?t4im(<72U8TTN9h%i-!utIs?tl`hpy4>g4?m8O96^_ESd zG(`$A8=@oe^KK2}<>B_7CD{hs>IM_T$fpXG@$#2aDtI{lqF7lg1saq4{CIG>)+ zO;}w`JAd_QpgRdTajoCRXhLsF!}qst4e?B(t})5^I}W$n-hD}G&lAx#OE>D{@_W;4 zul+{yNy#GVxusb`t%5E=GANav6ztHWhVJ+`N`sm_%ab3pqjlff25a^KNdeDTq>QLB64TrAOBQ;a8g+WRzT#u3|9;W7b-%uD4Cg{15&dJdbK>t?2K<973T3gZ-!h)K5KHr_r{)ZBO zv$CZSyLu~O1pVWkoPQ!Fa^Veva#$(B((Yq}Nm-^!#aH`DiZe7YQZw$PMlxWal`QQY zjn;-cYaSG|qm!3TP~=0ZPc;>8L?v|Can7`)lOjzU1h+(+icg!Mr6c#go{Ap4S3!Sj zLZ{zJ>QtRYIh}e#v7|KWiVIH#&$*7K8n>j1C*Mm983Mh$G)U3Ye2i>S@hC#^TGE60 z!9c?*g7BhX@8mE}DX_Ayye@ldVWPIYK4$4YX>}hlb&$!)}HeZy-q<_ zd1vX3`*8khuer*%0}SO4OMFRQ^F4+r=2eugwI;hf2umU`F|UL+^!`47%j~M8f-jwd zum9ul^lomzl>u3*OYrIe&%nlSaN{CLC=(`1r*1;-zz_GK(_1uqKUkx48<@f{KQSyT zP$QmWNHtxbG?!hK9Oz{hO*<}PyZmv5DM zBxsjtkI_*-)UTys5w9R((v)acr(ejZZO!0@wD3kc2)b;mE~~J0q3Ad$uO3f4KzCus z+`}3ouzR;@G_o6L+ofQzfwnc-mXK~TH}nbI#T*Htm(%H2g@Z>FoTFGY*tuW7-#9~}B(LE%ULHYIF|w{o+6?G*Dk z)mQcrn7rM*z^boPyDH?_AnW#VhUN6h>|?(&)SLab*qHOnOm3eT1%I-E64^hO=$T`^ zKvdl1<8J_v{VPS5Te+<|*bHi&vgZvs$v4S9{r~@)&H;UAoJ;fLp@CF1D+fvoeS~8F zSGUL??%^%L*FJ$yeWJ}R<<}by*LmV31J_Kxi`Q#iLwBhhY)?5KW?q^ZExJHuEgOI= zq?7?YMQk4jJQV9AX6(h(H+;Ka>C4W`S?XXeGdRC>zGId@GiZXlXgW(FdaaGN- z+}Tsa5tn2Akm<}!2d8lKUlyU)<-UaxHak z9RK#BiXYRCAJgGU{GQ3Nj+U~hfr$t%I;%I{*^Ume5`(=>y`OvQQ-rUqahmoEQXak0 z2VC>JL7|tAfq=(d=a{!@$pIr|naE{0C;RlWxPm_jnlC;5T6prKgYsSzxa%cOD zWbal9ZF5c<^G5sh$G-US{Qb7?NIq}mr6NWImm>C;Io;d$kiU+&3b~vi?bcDB4Bj!; z_|I!7S1OI4CVJc6-WGdE5$UT_iM$wfIKizBrkwjus^gNc^um=#@4fTjs;C!nf1gCb zLIZh`ffIzK4<@O3_7aUZjw-qf_hDN7De&oTZmN?M!cm+i^z*|z`g!Z>kM!IA%0WN< z`Sy4r90T5yr#;nt?vo$!NsPPHe2`uvZ9umImR#G{W1RjarH?4ys{gOG7kg6Xfn9$mIRjX6_2=>@x0S9 zE!s)Ie6=0cf@85`#wXe#;Q3R1kZ5BTCYsy)Z5%MF?>1<8?Hp*}ctE-7(#kZ{$pd2W zkTbyxS^5L)o(@dJgRds8sUs`#xEpN{%3V-0NGp~pP$I$J!fR#3)k8n6)&-YaC1JK?M!3jWfVb|UKx}f`Rk%}csI~Ca?^MD9aG{Emg%^RAA0)^Oz$85&Tj*R-b!rz-|+!nil z|K?I%>7?{LH-FTO^DNSTuR zIs=kY82_!v@{MxWY;h?es`9PT`5JQt@JD)PZ~A-Sy*Epsne9uN5t&rRFdIhZbRdmX zOQ_2}$Jwchxl%ty zn+b1bn@F2a32}h{$%Z_s01)JB7G%+{?53SE<~*CYjr3oc3Fcyj*nUL-9_?mst(Ktp zm>daOj^V0*I#U>=J9*yk-7k@#5i7Tz6X#QVhem1_Q`{+Im>UybI^qg0v#RS^E5hf( zXX5!I_Eg7Oo*Ax>=rT**qHO!-ok9$J5O)B*un<$cqiNEFLb~<(Gpo2ZsxC znOY4XqA_!yiP)mek#=H9Wf70&Ab85AjTTKtcLyWo{~!yrAPH8T&iixMHOe;kjF8Ik&=;?FwMG zU@I9QslHYVAd#^oCFKYS&?Im^-Rxh6XkY907!7}|poWeRlypI}L0fB}UGufwHAsvg zwt|kZBJx1Wi@kVs>{P`)tS)5%Tr7yxA-?Sk2d(Su+Yceh@ZV1 zx1I(Ort%Yx{H!Wt3SfNKgL2mw=4l{Y5*$y``5sfA$NhYs8fW*Odh)+7#`W?Kd?v2XF>PLx ziQh=ParFH>3~I{%?mKxz;QToV<8{N|$$+=1cJ^Ccy!I=r6OG+-FeMsP-bBPzs3zLQ z^Lxzx{&RjG|9&vKnEABt_gv@8vpV-cvd(h&>NBYa#(W;fzaKeE?DR5tJ=pb4o4!l` zxRt}2x2daf*6xBj*wlqZCs9MLW{~DQQ}OE6a}4fX>_IH=Bdt1#tSYT?WufD^VO5hLD(tQjeJM)%8;hComzTJP^K&$X3PsDE=n z!Q8zcUqf7mfMAQpE6P`AEprca|!q?_Riwv0Q&G$xaQvZNv zImbIH)|)1a-66?|H+#O0EmHXq>IzXP2P;t$VuCxdNOL4DW+>|@hb)u)paHFh)k~lu zx&XoZ_~kiz+m+^zaj?SDjUK1#nE#2Fr3>uhm)#8sB788V8CoLp38pFbNmbnyd)5xM zpy=j^#QGA)zX?@+X+RL}Cow94*tBl8e{+eC@AtD~2l0_lG*pWlx!YYA`v4aB05#*q zoSo)@aFPNlEs9DX28T6BAZq&3p~7m17M77^!X_8pz@*F)!%H~(p1jTtloD@3*{Wy9 zTJFlse#4S0^e329D0FU4A3e?Id`L=y1S{OC2uB4v1IuL?3jV~p*)c1K+cvs;6AK;$ zy`WzqYh$WdwaQ|&e(`E|`Kr3p_frc$|Ep>5Qqu9@4v4jL*v4#;X@Awh^--Q{`rIo$ zz~hVrS}c#3R5n3P1Zy&-=)?yS3#B%J*hMG2zybMy+a~eEi?6#3Nu_a7q0r;3VVZ|j8(m$s98)Cn zYrnQTPZ_ucrPA;K$SmCM1$5m1<`N|8I;4x{Hu^y7{3qo#Z{KH#;xUaQXox~+8dG)+ zZlmM;_rQvu(;>s>#U1xKLky1zltin4N|%!3NeL*({tU?DtBv%EVQ%U)qP?vI-kj!e z4e<0K-d_Sw(;r}8QN0b=?8$1Mkq#}U*whK^|dK17b{<*8`l1F z-x;hpZJ!rqYoS*z-R`rFTGYK^mhu|-+%2*Z6{t)0;t#oE>)zQxO9?}=CD*m6c1e@H z|MGqnS%A-Fz*Qkz`u6=QI09aoh>3w1a*fYzt(T+K$+ARaWd3XJh+Gr8TlL+9-q@vg{PQHcQnh0-m!Z)* z)cEz4`upSeZ|$-ES@Sia0AIVwXC>*)evAg|2-6Q~*r_`kJ^zxi+B-X=^+crfL%hw6 zjMkhenu+TL^rM3Z`#5m)x{S8CsPJj4<16Kl%R&z?D9BqDnBxWyuil2wOP$_J!DOe@~ti5zUL+BE*Sk%FTf}&4CH|zES!J}e&;mq73+zg)+hV9vnfM= zl5ra9)((yIH@;{MQ};)l8OVOqs_q9Byq|E3M}m>i?Te6&q-@rFn$iP)lI|oCP^6Uo z`WVMuF@|fUS18>VKo=9b`cP9$qu#ogP3YdEsT{R4h2d3$(mhrZBi>JY+iypCB^i2a z848zWR39?LOo9Zam;ZVS&|xG~Vi|_Tn^g4gY94q=6-${uuhLElsW^<)iyh$k>tD*L zw>z-WOdaJA48%F61OkK<{+Q>c07j*SBocZ8l#d;0s<$=7lNSgAsv%aaA&90yhp>}_ zVxP-Hmu$uSP@*#eSTD{_y_AVj*ke>_`UH<{e=@xyT%X zz=HjjWEiKUv|@#NnUd9jPBBpCQ#A*6bKnSHgL(Z;u@o9E!Lr>2A?%ImC+& z`@P6{@9OH&u{45?v@`xxs1UYq?+>4F&m!xOn?0u}Svn%-V1Hjfe~OU>&TTXh5&b+5 zJjY$iM9!P-C|3xYiH07S2CxGjg-EPif~+Dfng)cuz@a9LcjK=-9zfh6?>JQS7_|Qg zPV&AIOR)!p0?98Wmc#8kj?*fwTg)n9AYfoHKTd($s;d6xTe`~7J;uN?4!vN!Z>09B z%upaa5PRQ^@*?nDlmaL)B%6u@&08t|hpj)njdxdv&6K{D@_dlWaED-Uctn$PSO_9S z0y5Af@{I8)?r;dfZq64wo92BQpdiLj(ZmeL(}r+sQ_s_Bc-y7okKNJ zg-?qUMUwmykV4~2D>Y-BOeO*fFckIA0Fb1fp^X}LsqNm;vpm#xUF$6c_G#dQKtT}^ zwZ5O|IQB2#@5i11Pd9+b3IivKhS^zP`!$Ufk$iR5c->qUn(?^@?M+WVI_5^$Qc$dn zjkE64y~3W)-(cV-zoXPX1W(2(m$tPnOlFX;Cu>X$xyU*1u{-B1T-t4U-Bw$c_GxeO z1-w7R#ypFN3T|_R9#JL~y|qCzxLIkoi6#f3WuZ`31BnXe2lR^+Bx^=WGm`v*(lJ?4 z@b`T=bwsjHhwtwb4-y(HvA+`Cgz-lO!WCTv6r_VxLxIPlYY2i(RUNq8SpL~u*R|U$ zoRSVZwTqr{+Nk%rLvpu(hYt2a41*eNjQ8RV21N_zk0SSh|6_PcDPVu7Ow^k&i{2O) zYO=83_T2q}PvF&1Ov;2PpH6fm-wE%vo8j!JQZRB;h26p*N%2{2tIhHsKM zlt>m6e}MQYfR8Uq{B)W4b{+>MT+$ZmS9@YLI0}(lR@k)%k+@$QO~+{t!X8#Ass;Hr zrVy;1Gx17{>6RxheICf21xkV;7*16`)X1(I+AMHpnPzB##q%%QgP28*4?2n1VRK-6 z0RyVkm_P2KqM+o%On8%#{wMBYpCJ&w*tfpIq%YLQ2khHJE*tIt?fxYGl87bhaJxrx z91@oBKjQX;!Umz`hpj(sM{{wUrI|ERA)7}l=72F8g67|qQX;s1b&tb1dP{b9oVLE)^&Oqo+``Pg7;hUGwCnU1^(cY^rLLw~u}Z&bxrj2e(fo3z{d}sPmg{t?Lsn=ybGS zFf3}KB&{vPm?_b?SQ{JhVJw3REJ*r&DgM%eT*tYifNlX;#cMcQLkI<)VE+IXif%s+ z9830i%6DEZEFtn?C%9RIC0U0{x@tcE_J})y18hnqlF7c#2z~$TolaH#s6D>}a0q&_ z(Yp4;+vmB5`%+@8Y=hgKh=DxdIgqEvxHq~DHjze~AdbmgLS#~m2kvT4MnVKgJ>;ki z;>b+Ob2qicA!X6#^$i_~*Y~8wDL2@}&vh$TJ&(hq<9XXBl?W<`Tb9{y0VMZ;{i3R; zbhK!Ubx}STuGza=P#skr9r^xZk<>j(h=*T}Ad@{yf`w=_0tk)CKtLEX&L$yE33ZVr z;BrVsMf82URUlN94F0Afk?11CkwbWEuRsiF;t#L*ledJs@MuH2sLKD2fO-FnfDE)` z`y_&B@mat^)G)$B3_M8nO2ua@>~{#djJHKTs`R^u({h-M^u4YZ$7|1#|7kW-dbS${ z&EU@6YE>rcim|7h8267vmKY>9{fD8amf0oCy@1++XJ{0aVwvi{yV zDc05iIk_Hj(FM3sM+TVmh#JyZgN3+(Qg%de5SDQ^zsBi21GzxXzD*(OhpYYKKBa}; zn+nkD!MpzH|BNY)6cag=ZjJsQn2V)5v>Gi~+WzsSd>PMR?2S(!a$Wj1;kfN2@A0R? zg@_S+q%=5bYAkw$18Hxr$lk`Fuz_tb>t4JcxcKL^Df?8B2cpPAKm}uamy~qF^9o3m zk>Dgql8~9FKydJ zj{qBLM>8AG4~b6Got1}QU+^37n;c<&2lF#a9zK(39VL@?8z2U(>VGw+n+YkqPyDWRQQP}H6RY(*ghSr3$dHv8?$9yK4*YafHx39_G zx5{es+88(sAtG={Das=d-WBkN{MyXD^OPaUtmkg9p)Mitol|}n25k#MI-pGpyuPmz z%q6_6)Q*V%Cw}`KnMsv_vTU?Bk+ z#2&Hd%=G2*e45&0;@_j!qdo0(#lBRM6h-2YdBq;F=8SiQ;~LP`?(YbE#rD3wqS>wA z+GjB4%>JwLTH1oy4l~E<$wt>ecNbsRf1360DgP~gK6+epgS7&Cm9LmZY28-N)3uAh zT_t&sK4t#TWjVfIYC~XvG~Z%_!fy31+ab_8R}B60NY|smFi6a%eMJcgX7k(8Y}hV& zmsG@!j|3D4W8$9mRrFvvLZL(C#qJB!utZo7)*YchDTYUPeQTIv&T zZ1gj=Yfw~G&`_~ENr;sv?Ut`}S`XHIE!IPF%IaKFx%Rb;O2+JZ8Z4CPD2f%- z617n>U#wHUN9U!9f?{(^Es8Zpbw0rr;$#GD#Y)v>kdk~<+$qF5#@NXYGV*GX#3HpXT7#(nT*>$yyt|CsfHGiEIb*p8c zM^2<()M_u_IICMJ@M2@R<4vvCY-*@WSK*tsk->NJ*`m~(uhqMs5-r7vhvCK~a(5Vuf8FRsMITP0&}Mh$)iHL}8QcHf1clV7b( zxu^OvmTkd#WSXT7Hcvo_WW?kh{)VR*$#on^e;pq4Ol;GMy03)0<&Ww}mk9ljQLIlI zwAs^Iqc)`(s1$fP-44wmPoCaygx^BN$SYk{yk5|ixr4p(NVn9=Yl(1yXDjVtjc`|e-w zY?4Ir*CDZRsR6U@^FhMzLS$C*MkmFVV2)2Zq{37rkjE@OEk`5aE3Cm$BB%gS16G1G zO)q|Qr39BKiocR#nF!TboWz+w(Qu!HPlk;roEVQ5{#PyDL!etX3t{P?ZJ47{Lr`Zx zg=&0D%CqO)9A zTA2n~gdt;JgefnQ=AMYCf{$lSJq{fq(N19#YmM|Yj$mw5UR){hQkQ>+>L|z-^hAW; za3o&DW>GuRN@q?GAJ1WE6Z0jyp@gK=7gJV8*!;wsUUxJ{gE2RpfTN!JJ2A9gq#<#y z(}uo9N%wbIHM(9!kVt{c)*{GU47>A)eE+#*c9SU#OuOJuWE8GI2Rj^m9G0s1h>+5; zCA=ywx0EDJWvgOO+M_~Fdy4>d^Ac<2N=aj+SA{P-%6P>kQ*SCUn44#Q)-1kQl zJY|Iq6e~eGypJP}R@t~ANe!FujvIgFUz0x`x-G$ac?$30S$yI_S@{JKr9EtqEm$@t z_INBj{x5PdXIGxg3Pb#QL_M+`D&|U!v0P~eT*3mZgdos#7$?m&`G2vC7N%VE^g6wz zv>B1sP|$fWC2Cr=V^r2%Oo;!)b@*5gsWZzh=giehyja=gZ%-O4n-?QBRaaj5mCIeL z+P!M>J1YR7OD$F%f^=uHE=G$GO1{N2QLF%6mBs(i#Z9CCMi&80J0$^oW{V9L@RlJm zxlCn$1BWIW&6Ft_yFkIT$?A$!#VZeNz^2s5 zNxuGM@l|UpfkCugYuIZ3Un#(cFM})V!llNAq>Ffk%N2qyyowE$2EawiN_j*}-OS0x z#(DB$Si(5)I@Xa-LsPSO<87cbb$aH?pyxM$20+gZv}O(aM6Sw}fv-v7%mj*IN)d1% zx#{D|d0l0-#|Jy=Bi|`+b`vBrWA?xKMLe(;Yt5V4unt+e4DtV)U-Tdu*!?^8zxc&I zxc}}KQA|>bKz|Gu+8)ATsIvyR@>W2J;#$17a34(?BTTR$i1-}{$+I-c2+y15 z?HQ+=7NaXeG6I=$-NLdA3!|i;g%Sc(%v&%KaXl5Vn$uh|l3YaZIcZ`&%8mCjVdHs+P-Wus7h6-`)WK(@Pq}2 ztJ7p)NE>Z85~Y1$Qd@Im&jJE;wg}RQ3Vlj;sHGu{sRzMUQljW+mUufhQC3s-n~oH- zE~pT*?v-|7pN6Zaz`aRMQvTMNmolif7D>rsZn$uibDZj_YqZ__Ut^0({1pYSJTpip zz;T3jQSmTJ_EOrO;=x+h+8^~af9((lV5L3;|@=^9U z-9?igc!g2zOe(cA=o^5|m^Pp?1D+}aA2mKFy-21ekIH1)rD7Z82$Fe=5x!2%A{isb zyELKMdvJyo{ci&K#d6kkt}QrEZ`TcHS_NrP&jb_#{SICYMaF3QQfO>4IoYr_M=3c+LP8Xl9SBpzT9G+5+{vQ3luv<26HK);;fv)yq3VQ9GzaS*OXvFL80p!Bc0+?>4)B zqU|hYGmSZLV;$igAO@!uy_k1+jmmigTg0a<+hjv^aU7P2X9B2&KPt?v>!`|{H<-|2 z;;uh;`aeoWs-7_!u0g)Eo71LGdm7Ac1G4C5M+a1e3q>;xDdtNPan4Mq6eatc=mb;e zGASpWzB<0c$MaRcaz^V4mg5@U+R3GRy^>|~h8jGAHrc$E2ZAkY#tuOTB`jCR4#`4O z@~;@vBR~vv%FWE`-{+@@-;D;BFZZzt2dUpU&u+>8GiDsz6z;_iUM_!8$dF-PcAw6G zhRD)Nig-3aFQqwkBYeLWCMT?ZFn%A%#b8gb zsac921$eh&J2uIX^qlZoDIdVzdvkBj{O_7^TahV5&`fiFb$u!=t%$7cbS6!D2Kn*2 zw5yVB7t9)t@pl=kc!>?1F3}`T_W!7ZHmVOX3w25mz@2;w@ zuy!SD-!HA7PB}q!WyU$e{KM10&G!Ex8k6^NCqt+ebonyy(Hn>qReE~$RGjkLEf+Cs zsBg7oHO|*|^(@?^iiqZzrftOkPogoWS;>MbDq54!da{G7Nh4#Q=<+e*t21_t*f#Hy2_PC z(19h~0gk(w-y5>=yCtPM_%1C|%b_tglO`8WYC)FlP7krzZ!66SbQclFmeFgTs*?&= zBl*@!5$uh{+-otMYhG}uv*X(|=4ZVK%jr1UNtx|y9#2}x>=7k-FP>rgL3{3tT18S@ zH0WHvXD`aR>CKoD8)~GM%1wV|X}g?hnWUUaC?y;hA}3O*I4d&oC6I`uCIv8|{b^MS zcV;HBDC~|5RCrd{7V|ocL&d;CBN2_w4!{+zpjNF+hOcN)LX|QFL%FO;7aV8tFJTE* z!VE~6hmo!Yc9PVHyve6d!v^qvLracla$sUE|MI=2qXSU^P)KL|=uD-`JK4bH+~N#7 zRS~r>8g#h(;)A^nMznAM-q+W+6jNQSj=7?wb$vR0n_wS#v)2S2PH6_;_PrZKIkase z&a7q@Gc}_Ock{>!lomB{=m9cMTLpd>zXQ!hD|cV7lZIb}C2GFb^C%VO zWLu{+2OJ3KOi|3+JDhdM@M3U_@2h#sNAM2~D(G zG*H_HZI5QY`bJtV2U4DV*1SH z@RgY(sm^>Blt)KLF&}98b;pUijzfnFpsb=A!O@vb_RmRCYqb-ZRPoZ2k!IfF_{mIe zp&+UM)q-^GyR#%;DblTSwwcd-HpLAwfj=>>(qk)~*`}>JyZt+1$Of$}YQ18w^NO5% znK_0d28CcErGZUf3wQa(A_n{z{h{ySKE;(z!e$4!q0M(RzWzaKzt8$}!Fz?7TQOQFV!kh=9III`tTJ?wzk?a{1 zlG_yp!PNuH+RHmNOv~yxmgO9o9^1}5`$#b-_oSgJ&vopC5wXLSD$e4MxKf|E9?Z0~ z$KNm>w1dBXC^NM<@D0((Pb(bfF?x^H;1Y8+mhoF8!0$2gZnoamf>(Y}S?A+>Z$cu_Lm=zXnkCH*(EpfqZh=eY7ml#@Jb&tp3yxsR_8+b<6Uja z7DiRT0qyzVFj)+x_V+|)-fjy=${8YI>df79uNk&wH`qGczrNWCX8qFwrz_ zOS69H+^lc%5r-~?r1A}%^d;}-GY4~(|K@u}aIepATlC{R__Px>_3OruUZ8uSr}o2| z0rac)WD#O{F{8{C-#4JA=`;88vRiF!GV65)F7DyVFosH&(UX25kY-H(QjK(U72@F$y-7gc6XLLwKPu}Q?@PtWE1NB zs9#&|Gc9~pH!_QilI^5^pq{g381-2&vNMbB`r8H5Pcu)QCd2sQ4{Ft|ru3H#C7vXB z<^cFy+y-mG>k0nvNGd&`_!uy7f1${|)6tmdIa5_Lp#uu;Lt~FQFWb0Jdj}>|1d9#J zDP(J@HwH za7I$$Ix-qqKU|59MoCjj88({__{xRq<)WLd@oo6ewgB5xmpT;-`BJ?sw!isF>Pc?n z)CW6>Xo+Dl&7YnB#J@SERvfEyr%9ti{Vtjd#i2KN^)7L-z(TDvjH#HXtNxk&cPOn~ zq8%o+$p$;Jq_&Jrs)6vsg={Hh|HodM({gJWFD!MNj&6F4y>!SJKR0$=bzP+V5331# zE_3TAC<Vk{z5q2fE5dO{}%!(6i+s$QNsC zzlzop^RlDA%%aNyxD;oUN0DOrJjb($Y9Tj~q@X6gwh((H_-r_^>n3MOCn;Q$`1HlT zwF%BxN-g>>IOyTZ6I#x3tD3&x8;E)3X7$K+k%5!zns#Y9jW#ZBV45faJvL9(SFyrv zHU=T&;I6I%y|l9Lv_%mgpebHRfqEt0-TesDC>LfQuWvL(eF!T_Muuatqg#TO}v&IDS=L8Lyr1h6p&doQ=SotDUv$DM>IVO7D*L5 zz4H>fw|}(sdCVEuA?#xGP2qdj6xRCWC63TbNmQS6L-iwUXY^>iy0P2NkK8C0Swpp3 zwonZk(_ab*a(5>qbv0Nb4=CY{3@q|+cX;Qwye4cCCH$;sz=-;3D5Fzby$!?Pl_~lk z{&NK9`(OVF`E4}! z&OCZoM6YXiwmZ9pICaen zPsSEN4`CGRq~fbSkiq8w^qe^9PZ453B%e7K`M`J~VrYb<1~vIWMG+#x5Gh`qWTAH$ z^Ntp+S?bGyAx4h|+}6OKyN}c!D8M42-_SHu*whwc7vha&`c>EUF#>LF)ASY^u5H4_ zYab?J_g3J1UdS0HsUK=li8RoObwB`~{pUF(9`MJst*t*0t`nSanMMFeJU|p73l;B! zyzOjTXW3@uM2vkMI29NU)PrC@8Z4U|j4HuMBe%#RJOugYdxb|hVbB#PYM9H!4bA~B zq_0mhX&x>O1sEd_B)A>j9O62%*dzAf?DR(Be=eJ{9t7={FwE&B9c|VK?-vM^ z1yTL(KOJe>2*W&KoyuYg=@=YTCctB|)U=2L4i2ap%H97bz1*Bl^e#RKZ4xwdKUkgp zk&YB{kb%mdIb{E-vfE{2duF_Bz?vQ(fxeDb&<3pKD?|}DKvf@7O|PEj2)rC^FkI#kd>>oz&?6SK4m@&-9W)X|MieKybO`OEV?Vti{THy6 zcL|VL#R*>RJcK9yy3qjH1>GEnO-lf7^m2y1|vgk zB6(w3u|uYhhtc4%5=uD{E=(17cZLKX)(h<({xUEuOYsCL(#*p00CBkegC|IqmH&YY z2uhzHbxjcH8kY0-2a+>UA28&Xz0kB+^bi z*WEPn_chwL|4(D*7@SG8w(Dq;%*3{B+qTV#ZQHhOPHfvwCbsj&&cr&Iz4xiJKi2u% z)m2^9y}Ix9to2-1tMYC^u5~*se0YI%Him$qASp}B->?Y}8TBRQ?YRmLVav^`w|^&0 zk)@UzP@@C(ZCH09ymrHG&GHsgej6{a6&~`FU_d9@vnk~e?Y$-u9NHOBU|=6mW+v%` z5kCwU_`VZnB8~7eK{?5jJFeao) zY5F_1!Aa`ANie1!B$Htf$~8M!jg0;$jO&`A$?fxD;}8$AIO%s?Wodf~|Ke{Or659v z>gWJkJG|(K;3o_EjDl@QNTpd)&faTiJq{}vVPoJMHeh{7C9B4TfPB`aOebW9T_L~g zDz8oy_!8X+uNv1EDJqd(THp?_b048vDWqDd-pf}%9cXZ~s2Dfub#>^tV>R2%yzccS zIo@xpi7I-qol^oSS`+OE8Nj?l@`6EE#2PdFj%>fb~vfU)m@&tAO1+JhIc zaE?&hkf{P7$0NC?0#`1eYQa$%mWpd>Ajl&Qb$eEWhVBbZ4dXS9^SWD zm2FS%Q&l5+bwFIdZ@h=tp&_i7@XK^lUp4$1?Akbli*8TZ%%JFtDbPj0Gq zE~|_;f4CM_u;T~j+c=$K-D)4iMH*mu({FsEu)|hB6~$usVu8EqMQ*$rDW~@X*eQp* z<2_$n^nL=lUPJ%D?u~xYUwQodJYKihZGLit>1qmr1bv4m<$uBg(dYN02kyTD4FV1& zfdUspo&iZISzX3`HVwM^7G!Vu)+_^S>->aQD;)uhtXGIGMorau2VAC=c^v$Ps4OH8 zBfzo^zbFs32(Nr6`wQHTIcjf;evdX`)>2`L4N3*pZ1=7ZQ&W6a!OtYZtK%`5? zsgP}e1D|6|vQ9zTD8lmy3981bKJ`{k56+(l?7@p~1=JnHv-{B*?(*J9>rPi?`+1?J;z*LUG*pEj){zI&CW z<;%(sVKzl2*E;3)fqU$i*3D6wg~5JzQ1{mcELWX zF3(XF-5vz}0d=#2V#_Ix$kO(LjkNvq%rgzxk1s$^3UTBq5jG;G9NglCx`tmJ6A|3O z_hdwh{jYc);nTOZZ|hlD2I{zMWPvo$+8{V1`KBD4A`H;qL~``iC4*{sS&*G!ZHAGB z_P@gxKqB=+Y@xbyu(lkM621*}}&@+ncan?s;=J*QPFe zF>anaqh~_Z!F~=9`fbyAA&B41ii~qVX+Nrxdg(X5#4uP^w+zulAy~UbvcA*ZH+EBjU^9r?w_G%-$tpnQPa@k=PR|J`R%#CJ7^4W7aC3v zuA}fc29~sySXP^5PFx=u;SJ652k#~4dx)HIQZP8@`sXxyVhS zQ@4Kx^QAo8(@^SePu9EcNeRBptm&F&9bF5D0+*gzPM--33t{#F1eFbeC;~);2^oe$ z-2bP!-A1|TEUS8sOn4MzpRrv3EAzc2Iv0|}36hzcfL+#p=AFiSIm2haz#S9{U&W!w zxsyh9_HHQ^B><$|9d=t=0S+y0VLin)rmQ;2RiZ^Vy?OIaprdCFXhs53*k7vI{krt< z1I2bg0(Mq4$SC)_H%aJf1BV|uGJ>~p=9nY;u6xS>NuLXhQVwXbsXgFs^ww%mH*KU; zi-r#FQ7O$%nHq*}CkRRDWDKb2UpVlxmKSxdO0#w$;W`V3vzL8!Bwn@c|`Yj z(+=A8(#%uWp>13IHu=jXMGtlI3wxu3CscPcBAB}u;=#>45+$DqE+0?vtKYU-C#i5TzTcB;t2F1tN$*5e-2F43W4_o_+Pme)Q+r6$5aKS>jBAH}1xpTo6 zB+2+WA&HaV{A;FUDZkf?u<4_HF?hX#ADK(xT(}Nt=wo^1QDA;sXB#Ak3+?^k(``o_Sy>w#le7KJZ(OI>=#}nl|$CF_4BnMXd`nn`p-Uy zX;*9$sH1&yWLj>7T47cM^?->9R89C9Z+mD*kN6*`%pms!0iyT;A&{7aX5GADZLQUx zb@-yh^}(a4gj(I*R&59SLE6L`1wgG=bQypQBA?^B7Ob}1sFULB< zbZcc;qFR3yiEin%!~cfAGJ=Jop2dZ17fvlV zAHXm2$OG~KTJ7$yzRKe)(q|?|NhYuE*lp}NkDOhgehTl96s$1IZDT9ohb(r(Oq=a{ z51@-1?i$0&dJ`I! z)87^rd>=Ryb2FZMu2>UWpp@i5W)h?4t{N0D{dx>~;d&wQ;N3nf%6bspaQSb{cib-r zKxaTlebsl|f!a#_k9MzMx#7@X9KmxwF%0n#F%Q?$e0TGngCi1Qo{bQWiO~GJN7`Is zur0}ePjtC0g5bV$&Ag!$PJ@cRDMDNV!}Dq^JWtR(tx3YK2uuSfIQoIX!n50<3&*{v zOih2F2u=H1O!*KPyiQFwxqj#PWUBJ<{1zrkhM8^VR4^MZ>br2NGx2SUdECAUmGeew)qx1$$jd?@9;XWy*-fHy5R@Kk-MV^ zyonx#M-XwOoX)SJ7~#;L`~{E+!kr#?4uJCr#2V;OLEJzUl&+ueoM7rUt*dpAp&~zsPBD<`v$-52K+B z>-T@A(Y)-00%k&Q_#PI9?*22$I2`f#x02oGsQ<4d-^5zhx>teKg-- zT9Cd0VoHRB-RW^4D8u*AFFx(c?ocZdFnR04Jz0Pjna`!p?z3Hw)))1%AKU8UIJ)s( z`?{b*P)l<)9c?t9jnd@X=zKbYr(=q+ce>#&v|VT7+KLnM@z#S@6WuC-!&ebCGa`v} z=P1#c4a?~uGrHVE8noBF%0d7V20A^5-Rgz=R@$P(J+s<$5!vDxLKB_q`0#pZ@7By? zV;HkhZWq}cQ(#RT<0nO8{1==t<^E>ty4|Z{X-~!_n+28o;u4U{UMYeArw@LP+Iekf zvb`Ejtj!hhp2>Fgad`FzvElrF`D8Y{=?j@S;W=}Arm;!v*+Vt)2L>6bjrYAz)=DD? zoN@)P_w%%W1Q}I|D}CJGKLvV@maY4FQFPT5Kz2Rr{XLZLv=C6pe?Sx;E6j`d{3LNe z1eY@^mh!ImOpf?0{?Fq+DGhY$s0Q)kP2{|UU=z2p(N4HF-<{DWM zNOq**%|7|u;W5Du+#?RJg^h%(g*Qu{%wRE<4?Y<*n7RED6Z~-n7`rzHrn`m=Es)<2 zNz*d*Yy#T&AP!B)siB7Ci1Ew`@xr}`Z{tTt0V>6yhCK=jIpwbQN*b_v6{>8`nnZfB18xsiLOga|2}oREIMbMUk6w8Ei<8`;tzgbNcR#y@EY6Hf7bKN#_9^dk1O{MY4Y2=Vft z`nX1f_tM^RV>H02pfKf;U%kw-O$Q-Gpg^YTL0&bD`xK)_=Qjj)8wMF(4`Vb^*iCwb zo$xZgQSkHS6zJinJXldbiSUxhPZEivKJi_NgXPxX^oil$-70}!mn|`ke-}e9JjeU* z{SQb9{z+7s$|3zm^Ud3NoKqG&FI(9wW^x5s9-CAuYl&scB{0m#2a5+fJOmPo;yE_r z&v8``8PU;7iOt!nUyGI5q+F+sG$=Bi*o>`fKzDh+th&nG3%zMGT25sdu}0#ifXAf~ zGS#}wxgwzy5ff;&YtDNkl%$Zg*z(w~<~SpM$cnG!mq==w&ZVnMu1yz6b_m-~>}&-S z5fLz;h+<0bPr_ui&DsFKaX8s(oB9l+J2_0)XvqVi%3Cm%JJK^c}wgKKU9M zNyg2qd4eQVw271^tR6a6%F-vaF&xzccBSz3hiyyWa=!dgRj{iqcI*K@&8g};Uf z86@-?F;jn-DbVlZGCY}Nb#*+_KY41t8`{(1frUeD z-)>O3Y=7t}-yp{1sg4+!aafH{K?&)XlY-&WG>mJ816qNufWb(bJWkpgQ$U}Yps5%o zt-uQu;Vr|6B|UDC5R(w8#hx0s6T-?>1-#e`5aYE`4rl``lQTUZ_r>~%EwBECu2;{Q`iLXfX(FT7$d3cy$iq^(P-H6E z-As}Ab6+(3?|foL@^I*vR|4qwE|9>8SsitdCuITwjF|TDvTanDKbk~uNkoVse^BW? z^_$M->`ryH;kmbv`$RY)b82q2H36LK@WAfmw}dm=(mWjrFK$d=0}RYYbfi6vo%iQ^ zOU22*=&&eIK4QenX0{UX{gU9ToJdoL?!1_>hLYd~pwOdX{9|<>L2?W5cp$N*z7Gap zcWFa*ifcn2WPmxy2s+6KPT703_FaC^Su(zl#PY?|i1gNkKMPIfE=O)dMsaUTl3`84 z&S&$$SvytOnGRhUYcoLS?v)jaXMC|`aVC7LFwp3WITtG-oo*fwyPg<^Eo^IaOMd9K zS9+a)TdnbbVCR`d0KQ#{3~Oa+BCBhHsjK{e?;LfDR0Ks`hhb(#M$ZVqGwE2Y^}_Yd zI|&gXQ$6^YD)Rw{sLFrj1n~1!U_V1MPwWf!yE21B-kCKtNz1@R0oY zy{17x*Z1&#WbiwjsjRrbUwVd?Vyxt)Z6icsCXoWLhlcJ&3E@m)gcR-Z^99!=Hd*@A9S44fm)b* zAQ}RB1T*x1Dl*yItA5mzZNb2HdS-4-jvV>>x4;FaE+ycL1z>-7cw-#BaSVDYEkkxb zEICx{SHikC+$*XjZo~~w6x=I{W2eX^+2VtrzQljy?oS};Pk@vnqzS%#i}Ui+I=@^|+y*uNB;9g7ZFGA5qopY_CtN=?-+W<<@jY zdHRkpPCV+8yi(MAH-ABYeNh=UrT61#2L<@UGORLg8aX*CB)Ns3Gg!FJKr(~3a7jG+~lIMqt?p<^4;As zhpoqM^$-}Rd6adG57T$slks-6rEGJ}*x-6+enFG+uAYKH7T5vvM*@?De==>ePkvXv zjYs%rj*1cD@r8R8tw3e=@P^k0y`>_js1Y)3(^A)n82ZH5c4Eh;|MNb271tsM&onWm zkZx^UIcu<>QbQN@(QT-}0Ld}c^i*W@A35h{Jbo^0VM2b{M_{+ILzFn z1N#Pl)do)ZSZ1Jk`5XZKXagLTV;QN1PQ(yugHZhQ0Tbu4s%EwiRfAF&R_;UHFBGGQ z$ZqC0MGoJfW-otaq>H`RP=!PF=ps~I;OHAci?esK()ZlkkS)=!d8;VvVa3_O^`A_U zQ6@C)9l8o%Jt=phJX2cEWa-g{qlex2N$G^FH6HUWsHW9UyB@|6s##V)i6W}5Y)%&U zrM%YvhNu%pO))X?SJYBoAP&(4dZNWZE9n;<-)d%6+_(X%>|J_nUL^!DwB@@N^VX&| zI_KlSM_eyRiEolS*oRJVdS(IL-@iH%h*(tuDG8Vs6qNb3gH>%DTaR4-#${_q;}TWo zzq0c56}FdS6x^!t{Nq%|IBg?_3MYfMEqi$@@o!FU!>IvV0&~7)sl;}7KqL%M?cQ{( z#n>=yp*4Dn4549REr#VCLAjQ7%l-5ls+)XQBa#0I%gr=fsYFFN2LwUI zuup}#R~zhq2~xyzk>`eg;YflOF}}L=)d)B-(c!Wok+6v+%AXOwaX?aENc|k@@Pq@x z>oMJ(QoAt#ejG4S7wE-{D-d~Vv)PNLEy+lHEe&&jL#Qimn^>O8W_@Mie>XA2@uAbo zJ`8FIR>YLg9k(eJvAZcJgDjxp-=0Y5X)4woVXeE9u=)~Xm2_sa1-j*b&RM9sH7&iF z;M#CU&%p9~b5@Pn9dRofoZ_$?MpylDNC|HyUP~~ODxl$OYDWH9MU!sZjEjwP=n+lo zakQxa5qJ;yC+s;)jG1UAUBJL+lZy1(m4vx7bcc@Kf5Zq(FjNAT!E=7IGbGMcOA%q* z`skFswWeBQoJG!7u+d%X@Vdw~=jPCM*|LcE%N!^Lwt?)K@xMK~CWGtGr>Z)QglYNs<;HZz_w-sDrk`^iUw0$^@ zw0rBHIq&$|lD*dBWAf>Rn47!Y3Hd*{?Eh~D z1uc6$IMCd7Zsn_Ed9hdlhJc2H7xUnvk{X3;H3Na!V$ry!EeF&vD|Mi@s5Ky8)$sxE z6=_HkEdV}s&-wkdS-;-daU)Ud^rO>(mV?ULGF_q6e?I1h&dG3nesCzq;8DvnYXPzv zf>mc6#TfbYheIS|Lv?h)%#*tYv%EtdMpOKGfelaKFHsY{mcltp6A5!s?cZ~GFj!*@ z=jAcTXz3yKziEm&Tdr^1_73j>KPJ zR74^N5-rwU;KIb*@*jR|(2t6A7?GJ23l$6B1dQwtm3En+g(s>e;C2e4#~*;;o?J!M zzkX~UMb^ER^_ZoV=E8>Ii!eF!t-=a@;ouiB_l%1%gU6+4OT)FTy!ogY3b_A{Kp9Iv zlKq3-5{10ivj60c%QN9IW9ybJ##=^Cr*S63U4!P0VhB6DX=SJ{@^k(YA$QZnP|uL^ z`w(`Nr>3EiNHIABgoN#yZj*_yvZOEom22n35$m(NklQEn&R~zV*S5WJa1CjurCxMS z%)N?z5ha;sS*N9h<+yq&&x4Wvnmw&atF5j&MU`vZMher(dxKbWu|falw?H{wEG!!) z1=sk$X|lUC8U(mO9*I_|`whIIrGzY$(93GD2$sp2TG2vZA&f0CTiy2&7G>5N(r>8W z4P-koZkcCjL(Ttcva$Hav@m6(cmM$mH<9u8#I|jy02PcKUsOkic<5|Z^FQi(WTET* z|G|?jL!LQgjMYv?%_~z^xKY}XwuiQMN5AgkJE2ZVe&O7<6%yQ@GZ|wbqsTQi4J4&5 z`6bHcj_Bw6quFjbf;cHHs*_J@O*H1AxRdCGB$@R(bGs>}Ip39VRotE%wI@h=;tVZ|g3j#R>Us zS66qQAy?}b%MH4QTU8UokK~y>AMTF#u-n%%5u=`De16GML5~{+k>oriL*q7ZN#aIg zNKm4}Q?B}O^D~Cl#W<3WN4q4HVtkrCYCv|noYfV_-Mp!ftL?#6R&iw#1CNp|hwdMNnL@+No!1vrwxbL2)H zh|%=~sQ2|De>hOL96$je2cZs(`|>_fCe@t+->|CS!O@0T*McJaB}oGaRLhK+#uB0G zt}FkeH@iOnG=EK-vYj&VK*O}CJf5N;gq9^bDl(&a!I=rDNpK9N3FP#6F2-Tc0@Ub# z@Mb6S3fhpH#%&1=G;2dTgGZ~hp&m*fC^(Q!R>k9u`V#l4-WoGI1A3=sW-DnzMf3eo zOD6OBp{t}Tc_M;|XPmWWqxV3pb%r3NP{2Tn_fuHIOp@}oYzS?BzD&|Mir1o0R<^bR zCwUJ1z7A7xHlijru3nOFsw+%tEU>^@^pt>G4A+7x64cTu^w6QKXiYtROb{@3NzyQy zF7k*g!-#HPXk_ZDEtAN0F*#-RH_F7&YwRSR@zF}$a8bfs)1(-TK8j|UxVTn?x9HZd zD&ml|!!dRen;V~$%-@V>y|WrRRw4dn;yF4??2?sDK=xgTFx(@B9t$uAO z^HPuEto^cW0HUq;SR&_gG?nTuD`GfD8_wbWH`YXzy$!PLumjtuwGiK~(&fxuIl|Lz z*f`toKLW4|Z(uysfVT9~Su3j0yCQ(I>-0E3jruU+Sfy54VKVJ@MX)DPC53}LbH8%{ zUvi}eb{p1DiULCBX04$-Ne(ppVuU0Q$P^Sy?KOoEf(vr*mdcPxMU2=9oWYXhlg>`N z=ZtzI%6bA~FUllM^ESfL+KX{+X|<%+-*FXY1?3`zI?0!d8{B=l6XkP?R3<7q`=8SJ zYgK!fO#!DR88FJr)d29eBDTeF1uXIBcuu;dG-m|`wxRaAVTVYHrBX$b!mZ3PuT=1m zT!oPmmR~50?&BME>h}juYD_v(hY`HWQ#b(OHEu(Eb>kIdg+T4 zRWcZsI@wcAO|vvLXhhN9+quS{^^A?H?-Qe!{zobYCM!69iBVrU=)7a9D*Sm*Gde^U2W zNM9Ss6PxkhE!u*uDwZNl>K#PUR1fzuK1o<4Y5JVoK*j2miWkX*ax)dYw z@(gDri6G&X>&*v>xdFx2rb-@q_-ZX9Bw=PhPg=$k`qI9(wVeS46Oztyv@KvR#ZOl5 z3x^)#10|ivPlDbIRF1&8qwXYss4^&|lP`WL&JVfZz*54tFQY6~{X?RS=%nCp!HHh{ zWoQ?~QOQ9EXLv`8)oREVI;1&S7~rJEev zbD7Va7M1eIZ^CP3XoQ{`7B=NC|I%+#Jq6U4u$61P_G$%S{${~DSe{J+E$%n!#9RWH z&*Iv)w#QQ~4%633pLIENnEnqQZAp_6Fe9=>myreJtZUm)6YbTesLgD9S1ee&WC?{B1yK3am2AN?lnc*TRCT4y zS3qsotrk6a$b`sPEMQ7x*#^R^O6_3 zX3lDfQ`T0|{4!A0IR&(5&gnGUlkQiq{yb$(DR`E;D&L*_JnqXdg0<70i)7gB}@_5LDTV48#$QZa@$0Vk@u($+y zo%Is5#|5|^eQMVU$d;++6z)%hg(bZcjEU2=GXSm^YQBx}Z*=P4iK?_Wz!p3fFjD5AxmFco z4z-JsH@FOqFgDBPz9h>Pa2#En4|o>PT9nc5ma%6JO`U3GESg`R#}Vs!(~C|qHZ5rz z`7Gp+Y;BtavvdhxLya~KLqR|X6KkWlw?QN|`fFFG;X!M0PslqCAWp{7pVeNUHxZ_N ziT*Lp7CfKjlt-C#K8i)x(IlX+WXY(2M?LuD$IE7D_iNl3#MxV#Pc13j>YEUNV3O>8 zt3h>M!-T2ymOtA$bRu8~#>CQ+<)98D*E(c_I#K+}eP(UpDS&$$wxuC`m&O4r?{?~~ ze=%?VA_R)P$Tdk?|F=d8b2IB50AMyo(|!j?a0Kh>IH@j|u^(i5#sU0qPHon3?Urn| zAC{g)Dn~lTGR6&5eJE_f*45m(wVAWzJ@AL&n{{8Bii9rUb&rb1T8!3gXDX82oP6YM zby{Wr)7!E0U;nFBJ3PhQhD*@)$F9Yqvy^_wk||FUSmqzSGcYK9Nyo1NB`Kw0H2I>Z zHNBWVWUrXYU?B61)!+L|`w3!7NgCl4qhshL!MjaUCLm~{W zg2ma05=h4@$RUm40FMHMb=Jc9Ac2BKqw!33hU6;6lF9W1EUFfQWl{VAT_{ZC>EN|V zA@Zk$wThbhV=Q{I=Ak6p!JtDHd`&O_q6&NG#Sj{vJm2@T*t;~}qciN~X1<&nb8ab) z4*%Qm<&SXFw*aR1ahp6tTVJoGPJ_1T*&lO@P6z^j$&)-2t&vEz5v?Sb^~B0(Ca&pQ zv&fC-nAOz+EcJ#&mEpUo`ruyN&!8eB8QSW{p|=ekZLnXHvJccBjtpA7Q%wv`{g%ix zG}$|ffq=c|A*2I1vZeku=T)OrAjHZEt2|BCN6SVZ)5hKSv1W+yH-skB{dvM zmL@bI{=O+ek_x8gooNh&W?Yydi&|PF7Jja-l`4PTQ?Dg-ojZwK<-90K@HWIn3sAWjK?TLxa?sC{@6V|#~C7Vh_GV4P{U>cB3+ecX;8+b zTpx6Pe6vFa6y*_?#rWvORRMP>hmFEx(=3^Loo*PBu_QdQU&JD?%{cFTS3^gxj)cAT z-Tpu-QrlAT;3{^`nIMuSt>wJYLF+B>J`G}tiN$5nx_%xuTkBraOe0j!JiUv((z;_c z-H9cU_vvKMfsp3c5?(G-e${Then@>r$7rob3D|jMW4$+#X35%}fo8_Z@bhv}B@co> zPFb+s<7>5#BFSE{flgB*BDdK$3!qc2H8KE9anrTr-n3S$_aRXbj0=<@Ol0u+tpA$x|7%8v9Hu6 z+SN`B=&kQJa4%>wxfUtJx{bLB?Ni5|{^5?*d2bZrjddSwl5&?u;!Swrxm7gN9S`I^ z$VU(Ti0lVfD>za+sxj)umEj%*+qaBumfm2e`Lq{Cul~Dt_xB3wv2B{4i<0yjvh`>yC;I;;w-YjXNgmZTdlQURQ-a~ zoclqt!(BPU<^6O#q*_ZUdW5x~No5&)!4OO2SxWJjslftg+O&GJ3AT}d!iD%ym1L^4 zqe6M6lS0WaLKUMQ_czUzt&tYqEgz~?>v`kcz5!J+!10kvtTp?&#{FnKX^Xw>#_{~RJMki8jbK!;Wa@i zUW~{_ zCF0%MT7+Oc@l)fD%dPr{jPFyeQ;e%WPlX#6oqu!muGh+amMHz;;pbr^xR_@Qtltj8 z%|$tLok?VCR{<#Vn!}3Y;ixXQHA9;h#E{`0WWGlCMJIg~hpVzLLl)1;^vJYyGwE{| zJ~wzF1bO@VPE*zt{jz-v_Nh;C*BA9_GNSitucqhZo(q2g1O^@016BFtgrqn3D(BA8 zzYw}zAvKe(L(&Vg(B$)QH~+34D-^6heU?c4{CTSS$oC@#O*TiS^9cch9tt_+us*2Q@1^hBK=?fnU;uo>G*puH{cSIkB8!R zRybVh3aqb4-+iKx!$EteZt=m%A}3AJROw)@g6v`TzH+z5*qfOQZl0OdB4EtvaPkN+ z3XO>;bDijJ05K24Dlsf8mn+=Ak0Gi3)?cPC%Trol6!V=H9_7~BBc9*#xurfb>B?>^%bXgr0ntTm0f`1+E66qdCMKU+BL;=3e94Jy`>?T zgpt+M5W`{Vc7$|q+H+sMtNzNmy1`HE=MD`~0|HyAa%I-zp?QqhpNt%L1n*{D>P~Wj z>VmGYZbi=0(3#)M_mfZ-e)n$%K3a5BRmp*e7*?F7*WxsCg$FIg+SA#AS0^4V`sD`P j@Hr$D-y$dK4d{YD(sMl2J3e1yKl!qbwjW1|z<~Y_H3|@Z literal 0 HcmV?d00001 diff --git a/format.c b/format.c new file mode 100644 index 0000000..20f6c42 --- /dev/null +++ b/format.c @@ -0,0 +1,241 @@ +#include +#include +#include +#include +#include +#include +#include "kalloc.h" +#include "mgpriv.h" + +static inline void str_enlarge(kstring_t *s, int l) +{ + if (s->l + l + 1 > s->m) { + s->m = s->l + l + 1; + kroundup32(s->m); + s->s = (char*)realloc(s->s, s->m); + } +} + +static inline void str_copy(kstring_t *s, const char *st, const char *en) +{ + str_enlarge(s, en - st); + memcpy(&s->s[s->l], st, en - st); + s->l += en - st; +} + +void mg_sprintf_lite(kstring_t *s, const char *fmt, ...) +{ + char buf[16]; // for integer to string conversion + const char *p, *q; + va_list ap; + va_start(ap, fmt); + for (q = p = fmt; *p; ++p) { + if (*p == '%') { + if (p > q) str_copy(s, q, p); + ++p; + if (*p == 'd') { + int c, i, l = 0; + unsigned int x; + c = va_arg(ap, int); + x = c >= 0? c : -c; + do { buf[l++] = x%10 + '0'; x /= 10; } while (x > 0); + if (c < 0) buf[l++] = '-'; + str_enlarge(s, l); + for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i]; + } else if (*p == 'u') { + int i, l = 0; + uint32_t x; + x = va_arg(ap, uint32_t); + do { buf[l++] = x%10 + '0'; x /= 10; } while (x > 0); + str_enlarge(s, l); + for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i]; + } else if (*p == 's') { + char *r = va_arg(ap, char*); + str_copy(s, r, r + strlen(r)); + } else if (*p == 'c') { + str_enlarge(s, 1); + s->s[s->l++] = va_arg(ap, int); + } else abort(); + q = p + 1; + } + } + if (p > q) str_copy(s, q, p); + va_end(ap); + s->s[s->l] = 0; +} + +void mg_print_lchain(FILE *fp, const mg_idx_t *gi, int n_lc, const mg_lchain_t *lc, const mg128_t *a, const char *qname) +{ + kstring_t str = {0,0,0}; + int i, j; + for (i = 0; i < n_lc; ++i) { + const mg_lchain_t *p = &lc[i]; + int mlen, blen, span = a[p->off].y>>32&0xff; + mlen = blen = span; + for (j = 1; j < p->cnt; ++j) { + int ql = (int32_t)a[p->off + j].y - (int32_t)a[p->off + j - 1].y; + int pl = (int32_t)a[p->off + j].x - (int32_t)a[p->off + j - 1].x; + blen += pl > ql? pl : ql; + mlen += pl > span && ql > span? span : pl < ql? pl : ql; + } + str.l = 0; + mg_sprintf_lite(&str, "LC\t%s\t%d\t%d\t%c\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t", qname, p->qs, p->qe, "+-"[p->v&1], gi->g->seg[p->v>>1].name, gi->g->seg[p->v>>1].len, + p->rs, p->re, p->score, mlen, blen, p->cnt); + for (j = 0; j < p->cnt; ++j) + mg_sprintf_lite(&str, "%d,", (int32_t)a[p->off + j].y); + mg_sprintf_lite(&str, "\t"); + for (j = 0; j < p->cnt; ++j) + mg_sprintf_lite(&str, "%d,", (int32_t)a[p->off + j].x); + mg_sprintf_lite(&str, "\t"); + for (j = 0; j < p->cnt; ++j) + mg_sprintf_lite(&str, "%d,", (int32_t)(a[p->off + j].y>>MG_SEED_OCC_SHIFT)); + mg_sprintf_lite(&str, "\n"); + fwrite(str.s, 1, str.l, fp); + } + free(str.s); +} + +void mg_write_gaf(kstring_t *s, const gfa_t *g, const mg_gchains_t *gs, int32_t n_seg, const int32_t *qlens, const char *qname, uint64_t flag, void *km) +{ + int32_t i, j, qlen, rev_sign = 0; + s->l = 0; + for (i = 0, qlen = 0; i < n_seg; ++i) qlen += qlens[i]; + if ((gs == 0 || gs->n_gc == 0) && (flag&MG_M_SHOW_UNMAP)) { + mg_sprintf_lite(s, "%s", qname); + if ((flag&MG_M_FRAG_MERGE) && n_seg == 2 && s->l > 2 && s->s[s->l-1] == '1' && s->s[s->l-2] == '/') s->l -= 2; + mg_sprintf_lite(s, "\t%d\t0\t0\t*\t*\t0\t0\t0\t0\t0\t0\n", qlen); + return; + } + if (gs == 0) return; + for (i = 0; i < gs->n_gc; ++i) { + const mg_gchain_t *p = &gs->gc[i]; + int32_t sign_pos, compact; + if (p->id != p->parent && !(flag&MG_M_PRINT_2ND)) continue; + if (p->cnt == 0) continue; + mg_sprintf_lite(s, "%s", qname); + if ((flag&MG_M_FRAG_MERGE) && n_seg == 2 && s->l > 2 && s->s[s->l-1] == '1' && s->s[s->l-2] == '/') s->l -= 2; + mg_sprintf_lite(s, "\t%d\t%d\t%d\t+\t", qlen, p->qs, p->qe); + assert(p->cnt > 0); + sign_pos = s->l - 2; + if (flag & MG_M_VERTEX_COOR) { + compact = 0; + for (j = 0; j < p->cnt; ++j) { + const mg_llchain_t *q = &gs->lc[p->off + j]; + mg_sprintf_lite(s, "%c%s", "><"[q->v&1], g->seg[q->v>>1].name); + } + } else { + int32_t last_pnid = -1, st = -1, en = -1, rev = -1; + compact = flag&MG_M_NO_COMP_PATH? 0 : 1; + for (j = 0; j < p->cnt; ++j) { + const mg_llchain_t *q; + const gfa_seg_t *t; + assert(p->off + j < gs->n_lc); + q = &gs->lc[p->off + j]; + t = &g->seg[q->v>>1]; + if (t->snid < 0) { // no stable ID; write the vertex coordinate + compact = 0; + if (last_pnid >= 0) mg_sprintf_lite(s, "%c%s:%d-%d", "><"[rev], g->sseq[last_pnid].name, st, en); + last_pnid = -1, st = -1, en = -1, rev = -1; + mg_sprintf_lite(s, "%c%s", "><"[q->v&1], g->seg[q->v>>1].name); + } else { + int cont = 0; + if (last_pnid >= 0 && t->snid == last_pnid && (q->v&1) == rev) { // same stable sequence and same strand + if (!(q->v&1)) { // forward strand + if (t->soff == en) + en = t->soff + t->len, cont = 1; + } else { // reverse strand + if (t->soff + t->len == st) + st = t->soff, cont = 1; + } + } + if (cont == 0) { + if (last_pnid >= 0) compact = 0; + if (last_pnid >= 0) mg_sprintf_lite(s, "%c%s:%d-%d", "><"[rev], g->sseq[last_pnid].name, st, en); + last_pnid = t->snid, rev = q->v&1, st = t->soff, en = st + t->len; + } + } + } + if (last_pnid >= 0) { + if (g->sseq[last_pnid].rank != 0 || g->sseq[last_pnid].min != 0) + compact = 0; + if (!compact) mg_sprintf_lite(s, "%c%s:%d-%d", "><"[rev], g->sseq[last_pnid].name, st, en); + } else compact = 0; + } + if (compact) { + int32_t rev = gs->lc[p->off].v&1; + const gfa_seg_t *t = &g->seg[gs->lc[rev? p->off + p->cnt - 1 : p->off].v>>1]; + const gfa_sseq_t *ps = &g->sseq[t->snid]; + mg_sprintf_lite(s, "%s\t%d\t", ps->name, ps->max); + if (rev) { + rev_sign = 1; + s->s[sign_pos] = '-'; + mg_sprintf_lite(s, "%d\t%d", t->soff + (p->plen - p->pe), t->soff + (p->plen - p->ps)); + } else { + mg_sprintf_lite(s, "%d\t%d", t->soff + p->ps, t->soff + p->pe); + } + } else mg_sprintf_lite(s, "\t%d\t%d\t%d", p->plen, p->ps, p->pe); + if (p->p) mg_sprintf_lite(s, "\t%d\t%d\t%d", p->p->mlen, p->p->blen, p->mapq); + else mg_sprintf_lite(s, "\t%d\t%d\t%d", p->mlen, p->blen, p->mapq); + mg_sprintf_lite(s, "\ttp:A:%c", p->id == p->parent? 'P' : 'S'); + if (p->p) mg_sprintf_lite(s, "\tNM:i:%d", p->p->blen - p->p->mlen); + mg_sprintf_lite(s, "\tcm:i:%d\ts1:i:%d\ts2:i:%d", p->n_anchor, p->score, p->subsc); + if (p->div >= 0.0f && p->div <= 1.0f) { + char buf[16]; + if (p->div == 0.0f) buf[0] = '0', buf[1] = 0; + else snprintf(buf, 16, "%.4f", p->div); + mg_sprintf_lite(s, "\tdv:f:%s", buf); + } + if (n_seg > 1) { + mg_sprintf_lite(s, "\tql:B:i"); + for (j = 0; j < n_seg; ++j) mg_sprintf_lite(s, ",%d", qlens[j]); + } + if (p->p) { + mg_sprintf_lite(s, "\tcg:Z:"); + if (rev_sign) + for (j = p->p->n_cigar - 1; j >= 0; --j) + mg_sprintf_lite(s, "%d%c", p->p->cigar[j]>>4, "MIDNSHP=XB"[p->p->cigar[j]&0xf]); + else + for (j = 0; j < p->p->n_cigar; ++j) + mg_sprintf_lite(s, "%d%c", p->p->cigar[j]>>4, "MIDNSHP=XB"[p->p->cigar[j]&0xf]); + } + mg_sprintf_lite(s, "\n"); + if ((mg_dbg_flag & MG_DBG_LCHAIN) || (flag & MG_M_WRITE_LCHAIN)) { + char buf[16]; + for (j = 0; j < p->cnt; ++j) { + const mg_llchain_t *lc = &gs->lc[p->off + j]; + mg_sprintf_lite(s, "*\t%c%s\t%d\t%d", "><"[lc->v&1], g->seg[lc->v>>1].name, g->seg[lc->v>>1].len, lc->cnt); + if (lc->cnt > 0) { + double div; + int32_t q_span = (int32_t)(gs->a[lc->off].y>>32&0xff); + int32_t n = (int32_t)(gs->a[lc->off + lc->cnt - 1].x>>32) - (int32_t)(gs->a[lc->off].x>>32) + 1; + div = n == lc->cnt? 0.0 : (n > lc->cnt? log((double)n / lc->cnt) : log((double)lc->cnt / n)) / q_span; + if (div == 0.0) buf[0] = '0', buf[1] = 0; + else snprintf(buf, 16, "%.4f", div); + mg_sprintf_lite(s, "\t%s", buf); + mg_sprintf_lite(s, "\t%d\t%d", (int32_t)gs->a[lc->off].x + 1 - q_span, (int32_t)gs->a[lc->off + lc->cnt - 1].x + 1); + mg_sprintf_lite(s, "\t%d\t%d", (int32_t)gs->a[lc->off].y + 1 - q_span, (int32_t)gs->a[lc->off + lc->cnt - 1].y + 1); + if (flag & MG_M_WRITE_MZ) { + int32_t i, last; + last = (int32_t)gs->a[lc->off].x + 1 - q_span; + mg_sprintf_lite(s, "\t%d\t", q_span); + for (i = 1; i < lc->cnt; ++i) { + int32_t x = (int32_t)gs->a[lc->off + i].x + 1 - q_span; + if (i > 1) mg_sprintf_lite(s, ","); + mg_sprintf_lite(s, "%d", x - last); + last = x; + } + last = (int32_t)gs->a[lc->off].y + 1 - q_span; + mg_sprintf_lite(s, "\t"); + for (i = 1; i < lc->cnt; ++i) { + int32_t x = (int32_t)gs->a[lc->off + i].y + 1 - q_span; + if (i > 1) mg_sprintf_lite(s, ","); + mg_sprintf_lite(s, "%d", x - last); + last = x; + } + } + } + mg_sprintf_lite(s, "\n"); + } + } + } +} diff --git a/galign.c b/galign.c new file mode 100644 index 0000000..874148a --- /dev/null +++ b/galign.c @@ -0,0 +1,138 @@ +#include +#include +#include "mgpriv.h" +#include "kalloc.h" +#include "miniwfa.h" + +static void append_cigar1(void *km, mg32_v *c, int32_t op, int32_t len) +{ + if (c->n > 0 && (c->a[c->n - 1]&0xf) == op) { + c->a[c->n - 1] += len<<4; + } else { + if (c->n == c->m) { + c->m += (c->m>>1) + 16; + KREALLOC(km, c->a, c->m); + } + c->a[c->n++] = len<<4 | op; + } +} + +static void append_cigar(void *km, mg32_v *c, int32_t n_cigar, const uint32_t *cigar) +{ + if (n_cigar == 0) return; + append_cigar1(km, c, cigar[0]&0xf, cigar[0]>>4); + if (c->n + n_cigar - 1 > c->m) { + c->m = c->n + n_cigar - 1; + kroundup32(c->m); + KREALLOC(km, c->a, c->m); + } + memcpy(&c->a[c->n], &cigar[1], sizeof(*cigar) * (n_cigar - 1)); + c->n += n_cigar - 1; +} + +void mg_gchain_cigar(void *km, const gfa_t *g, const gfa_edseq_t *es, const char *qseq, mg_gchains_t *gt, const char *qname) +{ + int32_t i, l_seq = 0, m_seq = 0; + char *seq = 0; + void *km2; + mg32_v cigar = {0,0,0}; + km2 = km_init2(km, 0); + for (i = 0; i < gt->n_gc; ++i) { + mg_gchain_t *gc = >->gc[i]; + int32_t l0 = gc->off; + int32_t off_a0 = gt->lc[l0].off; + int32_t j, j0 = 0, k, l; + cigar.n = 0; + append_cigar1(km, &cigar, 7, gt->a[off_a0].y>>32&0xff); + for (j = 1; j < gc->n_anchor; ++j) { + const mg128_t *q, *p = >->a[off_a0 + j]; + if ((p->y & MG_SEED_IGNORE) && j != gc->n_anchor - 1) continue; + q = >->a[off_a0 + j0]; + // find the lchain that contains the anchor + for (l = l0; l < gc->off + gc->cnt; ++l) { + mg_llchain_t *r = >->lc[l]; + if (off_a0 + j >= r->off && off_a0 + j < r->off + r->cnt) + break; + } + assert(l < gc->off + gc->cnt); + assert((int32_t)q->x < g->seg[gt->lc[l0].v>>1].len); + // calculate the target sequence length + if (l == l0) { + l_seq = (int32_t)p->x - (int32_t)q->x; + } else { + l_seq = g->seg[gt->lc[l0].v>>1].len - (int32_t)q->x - 1; + for (k = l0 + 1; k < l; ++k) + l_seq += es[gt->lc[k].v].len; + l_seq += (int32_t)p->x + 1; + } + if (l_seq + 1 > m_seq) { + m_seq = l_seq + 1; + kroundup32(m_seq); + KREALLOC(km, seq, m_seq); + } + // get the target sequence + if (l == l0) { // on the same vertex + memcpy(seq, &es[gt->lc[l0].v].seq[(int32_t)q->x + 1], l_seq); + } else { + uint32_t v = gt->lc[l0].v; + l_seq = g->seg[v>>1].len - (int32_t)q->x - 1; + memcpy(seq, &es[v].seq[(int32_t)q->x + 1], l_seq); + for (k = l0 + 1; k < l; ++k) { + v = gt->lc[k].v; + memcpy(&seq[l_seq], es[v].seq, es[v].len); + l_seq += es[v].len; + } + memcpy(&seq[l_seq], es[gt->lc[l].v].seq, (int32_t)p->x + 1); + l_seq += (int32_t)p->x + 1; + } + { + int32_t qlen = (int32_t)p->y - (int32_t)q->y; + const char *qs = &qseq[(int32_t)q->y + 1]; + assert(l_seq > 0 || qlen > 0); + if (l_seq == 0) append_cigar1(km, &cigar, 1, qlen); + else if (qlen == 0) append_cigar1(km, &cigar, 2, l_seq); + else if (l_seq == qlen && qlen <= (q->y>>32&0xff)) append_cigar1(km, &cigar, 7, qlen); + else { + mwf_opt_t opt; + mwf_rst_t rst; + mwf_opt_init(&opt); + opt.flag |= MWF_F_CIGAR; + mwf_wfa_auto(km2, &opt, l_seq, seq, qlen, qs, &rst); + append_cigar(km, &cigar, rst.n_cigar, rst.cigar); + kfree(km2, rst.cigar); + if ((mg_dbg_flag&MG_DBG_MINIWFA) && l_seq > 5000 && qlen > 5000 && rst.s >= 10000) + fprintf(stderr, "WL\t%s\t%d\t%d\t%d\t%d\t%d\n", qname, i, (int32_t)q->y + 1, (int32_t)p->y - (int32_t)q->y, l_seq, rst.s); + if (rst.s >= 10000 && l_seq > 5000 && qlen > 5000) { + km_destroy(km2); + km2 = km_init2(km, 0); + } + if ((mg_dbg_flag&MG_DBG_MWF_SEQ) && l_seq > 5000 && qlen > 5000 && rst.s >= 10000) { + char *str; + str = Kmalloc(km, char, qlen + l_seq + strlen(qname) + 100); + k = sprintf(str, "WL\t%s\t%d\t%d\t%d\nWT\t%.*s\nWQ\t%.*s\n", qname, i, (int32_t)q->y + 1, rst.s, l_seq, seq, qlen, qs); + fwrite(str, 1, k, stderr); + kfree(km, str); + } + } + } + j0 = j, l0 = l; + } + // save the CIGAR to gt->gc[i] + gc->p = (mg_cigar_t*)kcalloc(gt->km, 1, cigar.n * 4 + sizeof(mg_cigar_t)); + gc->p->ss = (int32_t)gt->a[off_a0].x + 1 - (int32_t)(gt->a[off_a0].y>>32&0xff); + gc->p->ee = (int32_t)gt->a[off_a0 + gc->n_anchor - 1].x + 1; + gc->p->n_cigar = cigar.n; + memcpy(gc->p->cigar, cigar.a, cigar.n * 4); + for (j = 0, l = 0; j < gc->p->n_cigar; ++j) { + int32_t op = gc->p->cigar[j]&0xf, len = gc->p->cigar[j]>>4; + if (op == 7) gc->p->mlen += len, gc->p->blen += len; + else gc->p->blen += len; + if (op != 1) gc->p->aplen += len; + if (op != 2) l += len; + } + assert(l == gc->qe - gc->qs && gc->p->aplen == gc->pe - gc->ps); + } + km_destroy(km2); + kfree(km, seq); + kfree(km, cigar.a); +} diff --git a/gchain1.c b/gchain1.c new file mode 100644 index 0000000..b48f335 --- /dev/null +++ b/gchain1.c @@ -0,0 +1,520 @@ +#include +#include +#include "mgpriv.h" +#include "ksort.h" // for radix sort +#include "khashl.h" // for kh_hash_uint32() +#include "gfa-priv.h" + +typedef struct { + uint32_t srt; + int32_t i; +} gc_frag_t; + +#define gc_frag_key(p) ((p).srt) +KRADIX_SORT_INIT(gc, gc_frag_t, gc_frag_key, 4) + +static int32_t find_max(int32_t n, const gc_frag_t *gf, int32_t x) +{ + int32_t s = 0, e = n; + if (n == 0) return -1; + if (gf[n-1].srt < x) return n - 1; + if (gf[0].srt >= x) return -1; + while (e > s) { // TODO: finish this block + int32_t m = s + (e - s) / 2; + if (gf[m].srt >= x) e = m; + else s = m + 1; + } + assert(s == e); + return s; +} + +static int32_t mg_target_dist(const gfa_t *g, const mg_lchain_t *l0, const mg_lchain_t *l1) +{ + // below equals (l1->qs - l0->qe) - min_dist + g->seg[l1->v>>1].len; see mg_gchain1_dp() for the calculation of min_dist + return (l1->qs - l0->qe) - (g->seg[l0->v>>1].len - l0->re) + (g->seg[l1->v>>1].len - l1->rs); + // when l0->v == l1->v, the above becomes (l1->qs - l0->qe) - (l1->rs - l0->re), which is what we want +} + +static inline int32_t cal_sc(const mg_path_dst_t *dj, const mg_lchain_t *li, const mg_lchain_t *lc, const mg128_t *an, const gc_frag_t *a, const int32_t *f, + int bw, int ref_bonus, float chn_pen_gap) +{ + const mg_lchain_t *lj; + int32_t gap, sc, segi, segj; + float lin_pen, log_pen; + if (dj->n_path == 0) return INT32_MIN; + segi = (an[li->off].y & MG_SEED_SEG_MASK) >> MG_SEED_SEG_SHIFT; + gap = dj->dist - dj->target_dist; + lj = &lc[a[dj->meta].i]; + segj = (an[lj->off + lj->cnt - 1].y & MG_SEED_SEG_MASK) >> MG_SEED_SEG_SHIFT; + if (gap < 0) gap = -gap; + if (segi == segj && gap > bw) return INT32_MIN; + if (lj->qe <= li->qs) sc = li->score; + else sc = (int32_t)((double)(li->qe - lj->qe) / (li->qe - li->qs) * li->score + .499); // dealing with overlap on query + //sc += dj->mlen; // TODO: is this line the right thing to do? + if (dj->is_0) sc += ref_bonus; + lin_pen = chn_pen_gap * (float)gap; + log_pen = gap >= 2? mg_log2(gap) : 0.0f; + sc -= (int32_t)(lin_pen + log_pen); + sc += f[dj->meta]; + return sc; +} + +int32_t mg_gchain1_dp(void *km, const gfa_t *g, int32_t *n_lc_, mg_lchain_t *lc, int32_t qlen, int32_t max_dist_g, int32_t max_dist_q, int32_t bw, int32_t max_skip, + int32_t ref_bonus, float chn_pen_gap, float chn_pen_skip, float mask_level, const mg128_t *an, uint64_t **u_) +{ + int32_t i, j, k, m_dst, n_dst, n_ext, n_u, n_v, n_lc = *n_lc_; + int32_t *f, *v, *t; + int64_t *p; + uint64_t *u; + mg_path_dst_t *dst; + gc_frag_t *a; + mg_lchain_t *swap; + char *qs; + + *u_ = 0; + if (n_lc == 0) return 0; + + KMALLOC(km, a, n_lc); + for (i = n_ext = 0; i < n_lc; ++i) { // a[] is a view of frag[]; for sorting + mg_lchain_t *r = &lc[i]; + gc_frag_t *ai = &a[i]; + int32_t is_isolated = 0, min_end_dist_g; + r->dist_pre = -1; + min_end_dist_g = g->seg[r->v>>1].len - r->re; + if (r->rs < min_end_dist_g) min_end_dist_g = r->rs; + if (min_end_dist_g > max_dist_g) is_isolated = 1; // if too far from segment ends + else if (min_end_dist_g>>3 > r->score) is_isolated = 1; // if the lchain too small relative to distance to the segment ends + ai->srt = (uint32_t)is_isolated<<31 | r->qe; + ai->i = i; + if (!is_isolated) ++n_ext; + } + if (n_ext < 2) { // no graph chaining needed; early return + kfree(km, a); + KMALLOC(km, u, n_lc); + for (i = 0; i < n_lc; ++i) + u[i] = (uint64_t)lc[i].score<<32 | 1; + *u_ = u; + return n_lc; + } + radix_sort_gc(a, a + n_lc); + + KMALLOC(km, v, n_lc); + KMALLOC(km, f, n_ext); + KMALLOC(km, p, n_ext); + KCALLOC(km, t, n_ext); + + KMALLOC(km, qs, max_dist_q + 1); + m_dst = n_dst = 0, dst = 0; + for (i = 0; i < n_ext; ++i) { // core loop + gc_frag_t *ai = &a[i]; + mg_lchain_t *li = &lc[ai->i]; + int32_t segi = (an[li->off].y & MG_SEED_SEG_MASK) >> MG_SEED_SEG_SHIFT; + { // collect end points potentially reachable from _i_ + int32_t x = li->qs + bw, n_skip = 0; + if (x > qlen) x = qlen; + x = find_max(i, a, x); + n_dst = 0; + for (j = x; j >= 0; --j) { // collect potential destination vertices + gc_frag_t *aj = &a[j]; + mg_lchain_t *lj = &lc[aj->i]; + mg_path_dst_t *q; + int32_t target_dist, segj, dq; + if (lj->qs >= li->qs) continue; // lj is contained in li on the query coordinate + if (lj->qe > li->qs) { // test overlap on the query + int o = lj->qe - li->qs; + if (o > (lj->qe - lj->qs) * mask_level || o > (li->qe - li->qs) * mask_level) + continue; + } + dq = li->qs - lj->qe; + segj = (an[lj->off + lj->cnt - 1].y & MG_SEED_SEG_MASK) >> MG_SEED_SEG_SHIFT; + if (segi == segj) { + if (dq > max_dist_q) break; // if query gap too large, stop + } else { + if (dq > max_dist_g && dq > max_dist_q) break; + } + if (li->v != lj->v) { // the two linear chains are on two different segments + int32_t min_dist = li->rs + (g->seg[lj->v>>1].len - lj->re); // minimal graph gap + if (min_dist > max_dist_g) continue; // graph gap too large + if (segi == segj && min_dist - bw > li->qs - lj->qe) continue; // when li->qs < lj->qe, the condition turns to min_dist + (lj->qe - li->qs) > bw, which is desired + target_dist = mg_target_dist(g, lj, li); + if (target_dist < 0) continue; // this may happen if the query overlap is far too large + } else if (lj->rs >= li->rs || lj->re >= li->re) { // not colinear + continue; + } else { + int32_t dr = li->rs - lj->re, w = dr > dq? dr - dq : dq - dr; + if (segi == segj && w > bw) continue; // test bandwidth + if (dr > max_dist_g || dr < -max_dist_g) continue; + if (lj->re > li->rs) { // test overlap on the graph segment + int o = lj->re - li->rs; + if (o > (lj->re - lj->rs) * mask_level || o > (li->re - li->rs) * mask_level) + continue; + } + target_dist = mg_target_dist(g, lj, li); + } + if (n_dst == m_dst) KEXPAND(km, dst, m_dst); // TODO: watch out the quadratic behavior! + q = &dst[n_dst++]; + memset(q, 0, sizeof(mg_path_dst_t)); + q->inner = (li->v == lj->v); + q->v = lj->v^1; + q->meta = j; + q->qlen = li->qs - lj->qe; + q->target_dist = target_dist; + q->target_hash = 0; + q->check_hash = 0; + if (t[j] == i) { + if (++n_skip > max_skip) + break; + } + if (p[j] >= 0) t[p[j]] = i; + } + } + { // confirm reach-ability + int32_t k; + // test reach-ability without sequences + mg_shortest_k(km, g, li->v^1, n_dst, dst, max_dist_g + (g->seg[li->v>>1].len - li->rs), MG_MAX_SHORT_K, 0); + // remove unreachable destinations + for (j = k = 0; j < n_dst; ++j) { + mg_path_dst_t *dj = &dst[j]; + int32_t sc; + if (dj->n_path == 0) continue; // not reachable + sc = cal_sc(dj, li, lc, an, a, f, bw, ref_bonus, chn_pen_gap); + if (sc == INT32_MIN) continue; // out of band + if (sc + li->score < 0) continue; // negative score and too low + dst[k++] = dst[j]; + } + n_dst = k; + } + { // DP + int32_t max_f = li->score, max_j = -1, max_d = -1, max_inner = 0; + uint32_t max_hash = 0; + for (j = 0; j < n_dst; ++j) { + mg_path_dst_t *dj = &dst[j]; + int32_t sc; + sc = cal_sc(dj, li, lc, an, a, f, bw, ref_bonus, chn_pen_gap); + if (sc == INT32_MIN) continue; + if (mg_dbg_flag & MG_DBG_GC1) { + mg_lchain_t *lj = &lc[a[dj->meta].i]; + fprintf(stderr, " [dst:%d] dst=%c%s[%d], n_path=%d, target=%d, opt_dist=%d, score=%d, q_intv=[%d,%d), g_intv=[%d,%d)\n", dj->meta, "><"[dj->v&1], g->seg[dj->v>>1].name, dj->v, dj->n_path, dj->target_dist - g->seg[li->v>>1].len, dj->dist - g->seg[li->v>>1].len, sc, lj->qs, lj->qe, lj->rs, lj->re); + } + if (sc > max_f) max_f = sc, max_j = dj->meta, max_d = dj->dist, max_hash = dj->hash, max_inner = dj->inner; + } + f[i] = max_f, p[i] = max_j; + li->dist_pre = max_d; + li->hash_pre = max_hash; + li->inner_pre = max_inner; + v[i] = max_j >= 0 && v[max_j] > max_f? v[max_j] : max_f; + if (mg_dbg_flag & MG_DBG_GC1) fprintf(stderr, " [opt:%d] opt=%d, max_f=%d\n", ai->i, max_j, max_f); + } + } + kfree(km, dst); + kfree(km, qs); + if (mg_dbg_flag & MG_DBG_GC1) { + int32_t mmax_f = 0, mmax_i = -1; + for (i = 0; i < n_ext; ++i) if (f[i] > mmax_f) mmax_f = f[i], mmax_i = i; + i = mmax_i; while (i >= 0) { fprintf(stderr, "[best] i=%d, seg=%s, max_f=%d, chn_pen_gap=%f\n", a[i].i, g->seg[lc[a[i].i].v>>1].name, f[i], chn_pen_gap); i = p[i]; } + } + + u = mg_chain_backtrack(km, n_ext, f, p, v, t, 0, 0, INT32_MAX, n_lc - n_ext, &n_u, &n_v); + kfree(km, f); kfree(km, p); kfree(km, t); + + for (i = 0; i < n_lc - n_ext; ++i) { + u[n_u++] = (uint64_t)lc[a[n_ext + i].i].score << 32 | 1; + v[n_v++] = n_ext + i; + } + + KMALLOC(km, swap, n_v); + for (i = 0, k = 0; i < n_u; ++i) { + int32_t k0 = k, ni = (int32_t)u[i]; + for (j = 0; j < ni; ++j) + swap[k++] = lc[a[v[k0 + (ni - j - 1)]].i]; + } + assert(k == n_v); + memcpy(lc, swap, n_v * sizeof(mg_lchain_t)); + *n_lc_ = n_v; + *u_ = u; + + kfree(km, a); + kfree(km, swap); + kfree(km, v); + return n_u; +} + +void mg_gchain_extra(const gfa_t *g, mg_gchains_t *gs) +{ + int32_t i, j, k; + for (i = 0; i < gs->n_gc; ++i) { // iterate over gchains + mg_gchain_t *p = &gs->gc[i]; + const mg_llchain_t *q; + const mg128_t *last_a; + int32_t q_span, rest_pl, tmp, n_mini; + + p->qs = p->qe = p->ps = p->pe = -1, p->plen = p->blen = p->mlen = 0, p->div = -1.0f; + if (p->cnt == 0) continue; + + assert(gs->lc[p->off].cnt > 0 && gs->lc[p->off + p->cnt - 1].cnt > 0); // first and last lchains can't be empty + q = &gs->lc[p->off]; + q_span = (int32_t)(gs->a[q->off].y>>32&0xff); + p->qs = (int32_t)gs->a[q->off].y + 1 - q_span; + p->ps = (int32_t)gs->a[q->off].x + 1 - q_span; + tmp = (int32_t)(gs->a[q->off].x>>32); + assert(p->qs >= 0 && p->ps >= 0); + q = &gs->lc[p->off + p->cnt - 1]; + p->qe = (int32_t)gs->a[q->off + q->cnt - 1].y + 1; + p->pe = g->seg[q->v>>1].len - (int32_t)gs->a[q->off + q->cnt - 1].x - 1; // this is temporary + n_mini = (int32_t)(gs->a[q->off + q->cnt - 1].x>>32) - tmp + 1; + assert(p->n_anchor > 0); + + rest_pl = 0; // this value is never used if the first lchain is not empty (which should always be true) + last_a = &gs->a[gs->lc[p->off].off]; + for (j = 0; j < p->cnt; ++j) { // iterate over lchains + const mg_llchain_t *q = &gs->lc[p->off + j]; + int32_t vlen = g->seg[q->v>>1].len; + p->plen += vlen; + for (k = 0; k < q->cnt; ++k) { // iterate over anchors + const mg128_t *r = &gs->a[q->off + k]; + int32_t pl, ql = (int32_t)r->y - (int32_t)last_a->y; + int32_t span = (int32_t)(r->y>>32&0xff); + if (j == 0 && k == 0) { // the first anchor on the first lchain + pl = ql = span; + } else if (j > 0 && k == 0) { // the first anchor but not on the first lchain + pl = (int32_t)r->x + 1 + rest_pl; + } else { + pl = (int32_t)r->x - (int32_t)last_a->x; + } + if (ql < 0) ql = -ql, n_mini += (int32_t)(last_a->x>>32) - (int32_t)(r->x>>32); // dealing with overlapping query at junctions + p->blen += pl > ql? pl : ql; + p->mlen += pl > span && ql > span? span : pl < ql? pl : ql; + last_a = r; + } + if (q->cnt == 0) rest_pl += vlen; + else rest_pl = vlen - (int32_t)gs->a[q->off + q->cnt - 1].x - 1; + } + p->pe = p->plen - p->pe; + assert(p->pe >= p->ps); + // here n_mini >= p->n_anchor should stand almost all the time + p->div = n_mini >= p->n_anchor? log((double)n_mini / p->n_anchor) / q_span : log((double)p->n_anchor / n_mini) / q_span; + } +} + +/* + * Generate graph chains + */ +typedef struct { + void *km; + const gfa_t *g; + const gfa_edseq_t *es; + const char *qseq; + int32_t n_seg, n_llc, m_llc, n_a; + mg_llchain_t *llc; +} bridge_aux_t; + +static inline void copy_lchain(mg_llchain_t *q, const mg_lchain_t *p, int32_t *n_a, mg128_t *a_new, const mg128_t *a_old, int32_t ed) +{ + q->cnt = p->cnt, q->v = p->v, q->score = p->score, q->ed = ed; + memcpy(&a_new[*n_a], &a_old[p->off], q->cnt * sizeof(mg128_t)); + q->off = *n_a; + (*n_a) += q->cnt; +} + +static void bridge_shortk(bridge_aux_t *aux, const mg_lchain_t *l0, const mg_lchain_t *l1) +{ + int32_t s, n_pathv; + mg_path_dst_t dst; + mg_pathv_t *p; + memset(&dst, 0, sizeof(mg_path_dst_t)); + dst.v = l0->v ^ 1; + assert(l1->dist_pre >= 0); + dst.target_dist = l1->dist_pre; + dst.target_hash = l1->hash_pre; + dst.check_hash = 1; + p = mg_shortest_k(aux->km, aux->g, l1->v^1, 1, &dst, dst.target_dist, MG_MAX_SHORT_K, &n_pathv); + if (n_pathv == 0 || dst.target_hash != dst.hash) + fprintf(stderr, "%c%s[%d] -> %c%s[%d], dist=%d, target_dist=%d\n", "><"[(l1->v^1)&1], aux->g->seg[l1->v>>1].name, l1->v^1, "><"[(l0->v^1)&1], aux->g->seg[l0->v>>1].name, l0->v^1, dst.dist, dst.target_dist); + assert(n_pathv > 0); + assert(dst.target_hash == dst.hash); + for (s = n_pathv - 2; s >= 1; --s) { // path found in a backward way, so we need to reverse it + mg_llchain_t *q; + if (aux->n_llc == aux->m_llc) KEXPAND(aux->km, aux->llc, aux->m_llc); + q = &aux->llc[aux->n_llc++]; + q->off = q->cnt = q->score = 0; + q->v = p[s].v^1; // when reversing a path, we also need to flip the orientation + q->ed = -1; + } + kfree(aux->km, p); +} + +static int32_t bridge_gwfa(bridge_aux_t *aux, int32_t kmer_size, int32_t gdp_max_ed, const mg_lchain_t *l0, const mg_lchain_t *l1, int32_t *ed) +{ + uint32_t v0 = l0->v, v1 = l1->v; + int32_t qs = l0->qe - kmer_size, qe = l1->qs + kmer_size, end0, end1, j; + void *z; + gfa_edopt_t opt; + gfa_edrst_t r; + + *ed = -1; + end0 = l0->re - kmer_size; + end1 = l1->rs + kmer_size - 1; + + gfa_edopt_init(&opt); + opt.traceback = 1, opt.max_chk = 1000, opt.bw_dyn = 1000, opt.max_lag = gdp_max_ed/2; + opt.i_term = 500000000LL; + z = gfa_ed_init(aux->km, &opt, aux->g, aux->es, qe - qs, &aux->qseq[qs], v0, end0); + gfa_ed_step(z, v1, end1, gdp_max_ed, &r); + gfa_ed_destroy(z); + //fprintf(stdout, "qs=%d,qe=%d,v0=%c%s:%d:%d,v1=%c%s:%d,s=%d,nv=%d\n", qs, qe, "><"[v0&1], aux->g->seg[v0>>1].name, end0, aux->g->seg[v0>>1].len - end0 - 1, "><"[v1&1], aux->g->seg[v1>>1].name, end1, r.s, r.nv); + if (r.s < 0) return 0; + + for (j = 1; j < r.nv - 1; ++j) { + mg_llchain_t *q; + if (aux->n_llc == aux->m_llc) KEXPAND(aux->km, aux->llc, aux->m_llc); + q = &aux->llc[aux->n_llc++]; + q->off = q->cnt = q->score = 0; + q->v = r.v[j]; + q->ed = -1; + } + kfree(aux->km, r.v); + *ed = r.s; + return 1; +} + +static void bridge_lchains(mg_gchains_t *gc, bridge_aux_t *aux, int32_t kmer_size, int32_t gdp_max_ed, const mg_lchain_t *l0, const mg_lchain_t *l1, const mg128_t *a) +{ + if (!l1->inner_pre) { // bridging two segments + int32_t ed = -1; + if (aux->n_seg > 1 || !bridge_gwfa(aux, kmer_size, gdp_max_ed, l0, l1, &ed)) + bridge_shortk(aux, l0, l1); + if (aux->n_llc == aux->m_llc) KEXPAND(aux->km, aux->llc, aux->m_llc); + copy_lchain(&aux->llc[aux->n_llc++], l1, &aux->n_a, gc->a, a, ed); + } else { // on one segment + int32_t k; + mg_llchain_t *t = &aux->llc[aux->n_llc - 1]; + assert(l0->v == l1->v); + for (k = 0; k < l1->cnt; ++k) { // FIXME: this part is made redundant by resolve_overlap() + const mg128_t *ak = &a[l1->off + k]; + if ((int32_t)ak->x > l0->re && (int32_t)ak->y > l0->qe) + break; + } + assert(k < l1->cnt); + t->cnt += l1->cnt - k, t->score += l1->score; + memcpy(&gc->a[aux->n_a], &a[l1->off + k], (l1->cnt - k) * sizeof(mg128_t)); + aux->n_a += l1->cnt - k; + } +} + +static void resolve_overlap(mg_lchain_t *l0, mg_lchain_t *l1, const mg128_t *a) +{ + int32_t j, x, y, shift0, shift1; + // check the end of l0 + x = (int32_t)a[l1->off].x; + y = (int32_t)a[l1->off].y; + for (j = l0->cnt - 1; j >= 0; --j) + if ((int32_t)a[l0->off + j].y <= y && (l0->v != l1->v || (int32_t)a[l0->off + j].x <= x)) + break; + shift0 = l0->cnt - 1 - j; + // check the start of l1 + x = (int32_t)a[l0->off + l0->cnt - 1].x; + y = (int32_t)a[l0->off + l0->cnt - 1].y; + for (j = 0; j < l1->cnt; ++j) + if ((int32_t)a[l1->off + j].y >= y && (l0->v != l1->v || (int32_t)a[l1->off + j].x >= x)) + break; + shift1 = j; + assert(shift1 < l1->cnt); // this should never happen, or it is a bug + // update + if (shift0 > 0) { + l0->cnt -= shift0; + if (l0->cnt) { // l0->cnt may be 0 as the start of l0 may be changed and go into l1 + l0->qe = (int32_t)a[l0->off + l0->cnt - 1].y + 1; + l0->re = (int32_t)a[l0->off + l0->cnt - 1].x + 1; + } + } + if (shift1 > 0) { + l1->off += shift1, l1->cnt -= shift1; + l1->qs = (int32_t)a[l1->off].y + 1 - (int32_t)(a[l1->off].y>>32&0xff); + l1->rs = (int32_t)a[l1->off].x + 1 - (int32_t)(a[l1->off].y>>32&0xff); + } + if (l0->cnt == 0) l0->qs = l0->qe = l1->qs, l0->rs = l0->re = l1->rs; // this line should have no effect +} + +mg_gchains_t *mg_gchain_gen(void *km_dst, void *km, const gfa_t *g, const gfa_edseq_t *es, int32_t n_u, const uint64_t *u, + mg_lchain_t *lc, const mg128_t *a, uint32_t hash, int32_t min_gc_cnt, int32_t min_gc_score, + int32_t gdp_max_ed, int32_t n_seg, const char *qseq) +{ + mg_gchains_t *gc; + int32_t i, j, k, st, kmer_size; + bridge_aux_t aux; + + // preallocate gc->gc and gc->a + KCALLOC(km_dst, gc, 1); + for (i = 0, st = 0; i < n_u; ++i) { + int32_t m = 0, nui = (int32_t)u[i]; + for (j = 0; j < nui; ++j) m += lc[st + j].cnt; // m is the number of anchors in this gchain + if (m >= min_gc_cnt && u[i]>>32 >= min_gc_score) + gc->n_gc++, gc->n_a += m; + st += nui; + } + if (gc->n_gc == 0) return gc; + gc->km = km_dst; + KCALLOC(km_dst, gc->gc, gc->n_gc); + KMALLOC(km_dst, gc->a, gc->n_a); + + // core loop + memset(&aux, 0, sizeof(aux)); + aux.km = km, aux.g = g, aux.es = es, aux.n_seg = n_seg, aux.qseq = qseq; + kmer_size = a[0].y>>32&0xff; + for (i = k = 0, st = 0, aux.n_a = 0; i < n_u; ++i) { + int32_t n_a0 = aux.n_a, n_llc0 = aux.n_llc, m = 0, nui = (int32_t)u[i]; + for (j = 0; j < nui; ++j) m += lc[st + j].cnt; + if (m >= min_gc_cnt && u[i]>>32 >= min_gc_score) { + uint32_t h = hash; + int32_t j0; + gc->gc[k].score = u[i]>>32; + gc->gc[k].off = n_llc0; + for (j = 0; j < nui; ++j) { + const mg_lchain_t *p = &lc[st + j]; + h += kh_hash_uint32(p->qs) + kh_hash_uint32(p->re) + kh_hash_uint32(p->v); + } + gc->gc[k].hash = kh_hash_uint32(h); + + for (j = 1; j < nui; ++j) + resolve_overlap(&lc[st + j - 1], &lc[st + j], a); + + if (aux.n_llc == aux.m_llc) KEXPAND(aux.km, aux.llc, aux.m_llc); + copy_lchain(&aux.llc[aux.n_llc++], &lc[st], &aux.n_a, gc->a, a, -1); // copy the first lchain + for (j0 = 0, j = 1; j < nui; ++j) { + const mg_lchain_t *l0 = &lc[st + j0], *l1 = &lc[st + j]; + if (l1->cnt > 0) { + bridge_lchains(gc, &aux, kmer_size, gdp_max_ed, l0, l1, a); + j0 = j; + } + } + + gc->gc[k].cnt = aux.n_llc - n_llc0; + gc->gc[k].n_anchor = aux.n_a - n_a0; + ++k; + } + st += nui; + } + assert(aux.n_a <= gc->n_a); + + gc->n_a = aux.n_a; + gc->n_lc = aux.n_llc; + KMALLOC(km_dst, gc->lc, aux.n_llc); + memcpy(gc->lc, aux.llc, aux.n_llc * sizeof(mg_llchain_t)); + kfree(km, aux.llc); + + mg_gchain_extra(g, gc); + mg_gchain_sort_by_score(km, gc); + return gc; +} + +void mg_gchain_free(mg_gchains_t *gs) +{ + void *km; + int32_t i; + if (gs == 0) return; + km = gs->km; + for (i = 0; i < gs->n_gc; ++i) + if (gs->gc[i].p) kfree(km, gs->gc[i].p); + kfree(km, gs->gc); kfree(km, gs->a); kfree(km, gs->lc); + kfree(km, gs); +} diff --git a/gcmisc.c b/gcmisc.c new file mode 100644 index 0000000..a9820ee --- /dev/null +++ b/gcmisc.c @@ -0,0 +1,223 @@ +#include +#include +#include +#include "mgpriv.h" +#include "kalloc.h" + +// reorder gcs->a[] and gcs->lc[] such that they are in the same order as gcs->gc[] +void mg_gchain_restore_order(void *km, mg_gchains_t *gcs) +{ + int32_t i, n_a, n_lc; + mg_llchain_t *lc; + mg128_t *a; + KMALLOC(km, lc, gcs->n_lc); + KMALLOC(km, a, gcs->n_a); + for (i = 0, n_a = n_lc = 0; i < gcs->n_gc; ++i) { + mg_gchain_t *gc = &gcs->gc[i]; + assert(gc->cnt > 0); + memcpy(&lc[n_lc], &gcs->lc[gc->off], gc->cnt * sizeof(mg_llchain_t)); + memcpy(&a[n_a], &gcs->a[gcs->lc[gc->off].off], gc->n_anchor * sizeof(mg128_t)); + n_lc += gc->cnt, n_a += gc->n_anchor; + } + memcpy(gcs->lc, lc, gcs->n_lc * sizeof(mg_llchain_t)); + memcpy(gcs->a, a, gcs->n_a * sizeof(mg128_t)); + kfree(km, lc); kfree(km, a); + for (i = 0, n_lc = 0; i < gcs->n_gc; ++i) { + mg_gchain_t *gc = &gcs->gc[i]; + gc->off = n_lc; + n_lc += gc->cnt; + } + for (i = 0, n_a = 0; i < gcs->n_lc; ++i) { + mg_llchain_t *lc = &gcs->lc[i]; + lc->off = n_a; + n_a += lc->cnt; + } +} + +// recompute gcs->gc[].{off,n_anchor} and gcs->lc[].off, ASSUMING they are properly ordered (see mg_gchain_restore_order) +void mg_gchain_restore_offset(mg_gchains_t *gcs) +{ + int32_t i, j, n_a, n_lc; + for (i = 0, n_a = n_lc = 0; i < gcs->n_gc; ++i) { + mg_gchain_t *gc = &gcs->gc[i]; + gc->off = n_lc; + for (j = 0, gc->n_anchor = 0; j < gc->cnt; ++j) { + mg_llchain_t *lc = &gcs->lc[n_lc + j]; + lc->off = n_a; + n_a += lc->cnt; + gc->n_anchor += lc->cnt; + } + n_lc += gc->cnt; + } + assert(n_lc == gcs->n_lc && n_a == gcs->n_a); +} + +// sort chains by score +void mg_gchain_sort_by_score(void *km, mg_gchains_t *gcs) +{ + mg128_t *z; + mg_gchain_t *gc; + int32_t i; + KMALLOC(km, z, gcs->n_gc); + KMALLOC(km, gc, gcs->n_gc); + for (i = 0; i < gcs->n_gc; ++i) + z[i].x = (uint64_t)gcs->gc[i].score << 32 | gcs->gc[i].hash, z[i].y = i; + radix_sort_128x(z, z + gcs->n_gc); + for (i = gcs->n_gc - 1; i >= 0; --i) + gc[gcs->n_gc - 1 - i] = gcs->gc[z[i].y]; + memcpy(gcs->gc, gc, gcs->n_gc * sizeof(mg_gchain_t)); + kfree(km, z); kfree(km, gc); + mg_gchain_restore_order(km, gcs); // this put gcs in the proper order +} + +// set r[].{id,parent,subsc}, ASSUMING r[] is sorted by score +void mg_gchain_set_parent(void *km, float mask_level, int n, mg_gchain_t *r, int sub_diff, int hard_mask_level) +{ + int i, j, k, *w; + uint64_t *cov; + if (n <= 0) return; + for (i = 0; i < n; ++i) r[i].id = i; + cov = (uint64_t*)kmalloc(km, n * sizeof(uint64_t)); + w = (int*)kmalloc(km, n * sizeof(int)); + w[0] = 0, r[0].parent = 0; + for (i = 1, k = 1; i < n; ++i) { + mg_gchain_t *ri = &r[i]; + int si = ri->qs, ei = ri->qe, n_cov = 0, uncov_len = 0; + if (hard_mask_level) goto skip_uncov; + for (j = 0; j < k; ++j) { // traverse existing primary hits to find overlapping hits + mg_gchain_t *rp = &r[w[j]]; + int sj = rp->qs, ej = rp->qe; + if (ej <= si || sj >= ei) continue; + if (sj < si) sj = si; + if (ej > ei) ej = ei; + cov[n_cov++] = (uint64_t)sj<<32 | ej; + } + if (n_cov == 0) { + goto set_parent_test; // no overlapping primary hits; then i is a new primary hit + } else if (n_cov > 0) { // there are overlapping primary hits; find the length not covered by existing primary hits + int j, x = si; + radix_sort_gfa64(cov, cov + n_cov); + for (j = 0; j < n_cov; ++j) { + if ((int)(cov[j]>>32) > x) uncov_len += (cov[j]>>32) - x; + x = (int32_t)cov[j] > x? (int32_t)cov[j] : x; + } + if (ei > x) uncov_len += ei - x; + } +skip_uncov: + for (j = 0; j < k; ++j) { // traverse existing primary hits again + mg_gchain_t *rp = &r[w[j]]; + int sj = rp->qs, ej = rp->qe, min, max, ol; + if (ej <= si || sj >= ei) continue; // no overlap + min = ej - sj < ei - si? ej - sj : ei - si; + max = ej - sj > ei - si? ej - sj : ei - si; + ol = si < sj? (ei < sj? 0 : ei < ej? ei - sj : ej - sj) : (ej < si? 0 : ej < ei? ej - si : ei - si); // overlap length; TODO: this can be simplified + if ((float)ol / min - (float)uncov_len / max > mask_level) { + int cnt_sub = 0; + ri->parent = rp->parent; + rp->subsc = rp->subsc > ri->score? rp->subsc : ri->score; + if (ri->cnt >= rp->cnt) cnt_sub = 1; + if (cnt_sub) ++rp->n_sub; + break; + } + } +set_parent_test: + if (j == k) w[k++] = i, ri->parent = i, ri->n_sub = 0; + } + kfree(km, cov); + kfree(km, w); +} + +// set r[].flt, i.e. mark weak suboptimal chains as filtered +int mg_gchain_flt_sub(float pri_ratio, int min_diff, int best_n, int n, mg_gchain_t *r) +{ + if (pri_ratio > 0.0f && n > 0) { + int i, k, n_2nd = 0; + for (i = k = 0; i < n; ++i) { + int p = r[i].parent; + if (p == i) { // primary + r[i].flt = 0, ++k; + } else if ((r[i].score >= r[p].score * pri_ratio || r[i].score + min_diff >= r[p].score) && n_2nd < best_n) { + if (!(r[i].qs == r[p].qs && r[i].qe == r[p].qe && r[i].ps == r[p].ps && r[i].pe == r[p].pe)) // not identical hits; TODO: check path as well + r[i].flt = 0, ++n_2nd, ++k; + else r[i].flt = 1; + } else r[i].flt = 1; + } + return k; + } + return n; +} + +// hard drop filtered chains, ASSUMING gcs is properly ordered +void mg_gchain_drop_flt(void *km, mg_gchains_t *gcs) +{ + int32_t i, n_gc, n_lc, n_a, n_lc0, n_a0, *o2n; + if (gcs->n_gc == 0) return; + KMALLOC(km, o2n, gcs->n_gc); + for (i = 0, n_gc = 0; i < gcs->n_gc; ++i) { + mg_gchain_t *r = &gcs->gc[i]; + o2n[i] = -1; + if (r->flt || r->cnt == 0) { + kfree(gcs->km, r->p); + continue; + } + o2n[i] = n_gc++; + } + n_gc = n_lc = n_a = 0; + n_lc0 = n_a0 = 0; + for (i = 0; i < gcs->n_gc; ++i) { + mg_gchain_t *r = &gcs->gc[i]; + if (o2n[i] >= 0) { + memmove(&gcs->a[n_a], &gcs->a[n_a0], r->n_anchor * sizeof(mg128_t)); + memmove(&gcs->lc[n_lc], &gcs->lc[n_lc0], r->cnt * sizeof(mg_llchain_t)); + gcs->gc[n_gc] = *r; + gcs->gc[n_gc].id = n_gc; + gcs->gc[n_gc].parent = o2n[gcs->gc[n_gc].parent]; + ++n_gc, n_lc += r->cnt, n_a += r->n_anchor; + } + n_lc0 += r->cnt, n_a0 += r->n_anchor; + } + assert(n_lc0 == gcs->n_lc && n_a0 == gcs->n_a); + kfree(km, o2n); + gcs->n_gc = n_gc, gcs->n_lc = n_lc, gcs->n_a = n_a; + if (n_a != n_a0) { + KREALLOC(gcs->km, gcs->a, gcs->n_a); + KREALLOC(gcs->km, gcs->lc, gcs->n_lc); + KREALLOC(gcs->km, gcs->gc, gcs->n_gc); + } + mg_gchain_restore_offset(gcs); +} + +// estimate mapping quality +void mg_gchain_set_mapq(void *km, mg_gchains_t *gcs, int qlen, int max_mini, int min_gc_score) +{ + static const float q_coef = 40.0f; + int64_t sum_sc = 0; + float uniq_ratio, r_sc, r_cnt; + int i, t_sc, t_cnt; + if (gcs == 0 || gcs->n_gc == 0) return; + t_sc = qlen < 100? qlen : 100; + t_cnt = max_mini < 10? max_mini : 10; + if (t_cnt < 5) t_cnt = 5; + r_sc = 1.0 / t_sc; + r_cnt = 1.0 / t_cnt; + for (i = 0; i < gcs->n_gc; ++i) + if (gcs->gc[i].parent == gcs->gc[i].id) + sum_sc += gcs->gc[i].score; + uniq_ratio = (float)sum_sc / (sum_sc + gcs->rep_len); + for (i = 0; i < gcs->n_gc; ++i) { + mg_gchain_t *r = &gcs->gc[i]; + if (r->parent == r->id) { + int mapq, subsc; + float pen_s1 = (r->score > t_sc? 1.0f : r->score * r_sc) * uniq_ratio; + float x, pen_cm = r->n_anchor > t_cnt? 1.0f : r->n_anchor * r_cnt; + pen_cm = pen_s1 < pen_cm? pen_s1 : pen_cm; + subsc = r->subsc > min_gc_score? r->subsc : min_gc_score; + x = (float)subsc / r->score; + mapq = (int)(pen_cm * q_coef * (1.0f - x) * logf(r->score)); + mapq -= (int)(4.343f * logf(r->n_sub + 1) + .499f); + mapq = mapq > 0? mapq : 0; + if (r->score > subsc && mapq == 0) mapq = 1; + r->mapq = mapq < 60? mapq : 60; + } else r->mapq = 0; + } +} diff --git a/gfa-aug.c b/gfa-aug.c new file mode 100644 index 0000000..a4bb155 --- /dev/null +++ b/gfa-aug.c @@ -0,0 +1,260 @@ +#include +#include +#include "gfa-priv.h" +#include "ksort.h" + +typedef struct { + uint32_t side; + uint32_t ins:31, end:1; +} gfa_split_t; + +#define split_key(p) ((p).side) +KRADIX_SORT_INIT(split, gfa_split_t, split_key, 4) + +static inline void create_first_arc_semi(gfa_t *g, const gfa_seg_t *seg, uint32_t v, uint32_t w, int32_t rank, uint64_t link_id, int is_comp) +{ + gfa_arc_t *a; + if (g->n_arc == g->m_arc) GFA_EXPAND(g->arc, g->m_arc); + a = &g->arc[g->n_arc++]; + a->v_lv = (uint64_t)v<<32 | seg[v>>1].len; + a->w = w; + a->rank = rank; + a->ov = a->ow = 0; + a->link_id = link_id; + a->del = 0; + a->comp = !!is_comp; +} + +static inline void create_first_arc(gfa_t *g, const gfa_seg_t *seg, uint32_t v, uint32_t w, int32_t rank) +{ + uint64_t link_id = g->n_arc; + create_first_arc_semi(g, seg, v, w, rank, link_id, 0); + create_first_arc_semi(g, seg, w^1, v^1, rank, link_id, 1); +} + +void gfa_augment(gfa_t *g, int32_t n_ins, const gfa_ins_t *ins, int32_t n_ctg, const char *const* name, const char *const* seq) +{ + int32_t i, j, k, *scnt, *soff, n_ctg_seg, n_old_seg, n_seg; + gfa_split_t *sp; + gfa_seg_t *seg; + char buf[16]; + uint64_t t, n_old_arc = g->n_arc, *ins_side, *oldcnt; + + if (n_ins <= 0 || n_ctg <= 0) return; + + // set soff[] + GFA_CALLOC(scnt, g->n_seg); + for (i = 0; i < n_ins; ++i) + ++scnt[ins[i].v[0]>>1], ++scnt[ins[i].v[1]>>1]; + GFA_MALLOC(soff, g->n_seg + 1); + for (j = 1, soff[0] = 0; j <= g->n_seg; ++j) + soff[j] = soff[j-1] + scnt[j-1]; + + // populate sp[] + GFA_MALLOC(sp, soff[g->n_seg]); + GFA_BZERO(scnt, g->n_seg); + for (i = 0, n_ctg_seg = 0; i < n_ins; ++i) { + const gfa_ins_t *p = &ins[i]; + for (k = 0; k < 2; ++k) { + uint32_t vlen = g->seg[p->v[k]>>1].len; + gfa_split_t *q = &sp[soff[p->v[k]>>1] + scnt[p->v[k]>>1]]; + q->ins = i, q->end = k; + q->side = (p->v[k]&1? vlen - p->voff[k] : p->voff[k]) << 1 | ((p->v[k]&1) ^ k); + assert(q->side != (0<<1|0) && q->side != (vlen<<1|1)); // not possible to link such sides + ++scnt[p->v[k]>>1]; + } + if (p->coff[1] > p->coff[0]) + ++n_ctg_seg; + } + free(scnt); + + // sort sp[] + for (j = 0, n_old_seg = 0; j < g->n_seg; ++j) + if (soff[j+1] - soff[j] > 1) + radix_sort_split(&sp[soff[j]], &sp[soff[j+1]]); + + // precompute the number of segments after split + for (j = 0, n_old_seg = 0; j < g->n_seg; ++j) { + int32_t i0; + for (i0 = soff[j], i = i0 + 1, k = 0; i <= soff[j+1]; ++i) + if (i == soff[j+1] || sp[i0].side>>1 != sp[i].side>>1) { + if (sp[i0].side>>1 != 0 && sp[i0].side>>1 != g->seg[j].len) // otherwise no new segment will be created + ++k; + i0 = i; + } + n_old_seg += k + 1; + } + + // compute ins_side[] and split old segments + n_seg = n_old_seg + n_ctg_seg; + GFA_CALLOC(seg, n_seg); + GFA_CALLOC(ins_side, n_ins); + GFA_MALLOC(oldcnt, g->n_seg); + for (j = 0, k = 0; j < g->n_seg; ++j) { + int32_t i0, l, off = 0, k0 = k; + gfa_seg_t *s = &g->seg[j]; + gfa_seg_t *t = &seg[k]; // this is so far a placeholder + // create the first half of a new segment + snprintf(buf, 15, "s%d", k + 1); + t->name = gfa_strdup(buf); + t->snid = s->snid, t->soff = s->soff, t->rank = s->rank; + // iterate over splits + for (i0 = soff[j], i = i0 + 1; i <= soff[j+1]; ++i) { + if (i == soff[j+1] || sp[i].side>>1 != sp[i0].side>>1) { + gfa_split_t *q0 = &sp[i0]; + for (l = i0; l < i; ++l) { + gfa_split_t *q = &sp[l]; + int32_t shift = q->end == 0? 32 : 0; // first end on the higher 32 bits + int32_t side = q->side & 1; + int32_t which = q->side>>1 == 0? 0 : side; // special-casing when q->side==1, because no new segment created in this case + ins_side[q->ins] |= (uint64_t)((uint32_t)(k + which) << 1 | (side^q->end)) << shift; + } + if (q0->side>>1 != 0 && q0->side>>1 != g->seg[j].len) { // create a new segment + t->len = (q0->side>>1) - off; + GFA_MALLOC(t->seq, t->len + 1); + memcpy(t->seq, &s->seq[off], t->len); + t->seq[t->len] = 0; + off += t->len; + t = &seg[++k]; // create a new segment + snprintf(buf, 15, "s%d", k + 1); + t->name = gfa_strdup(buf); + t->snid = s->snid, t->soff = s->soff + off, t->rank = s->rank; + } + i0 = i; + } + } + // finish the last segment + t->len = s->len - off; + GFA_MALLOC(t->seq, t->len + 1); + memcpy(t->seq, &s->seq[off], t->len); + t->seq[t->len] = 0; + ++k; + oldcnt[j] = (uint64_t)k0 << 32 | (k - k0); + // add new arcs between newly created segments + for (i = 0; i < k - k0 - 1; ++i) + create_first_arc(g, seg, (uint32_t)(k0+i)<<1, (uint32_t)(k0+i+1)<<1, s->rank); + } + assert(k == n_old_seg); + free(soff); + free(sp); + + // update existing g->arc[] + for (t = 0; t < n_old_arc; ++t) { + gfa_arc_t *a = &g->arc[t]; + uint32_t v = a->v_lv >> 32; + uint32_t off = oldcnt[v>>1]>>32, cnt = (uint32_t)oldcnt[v>>1]; + v = (v&1) == 0? (off+cnt-1)<<1 : off<<1 | 1; + a->v_lv = (uint64_t)v << 32 | seg[v>>1].len; + off = oldcnt[a->w>>1]>>32, cnt = (uint32_t)oldcnt[a->w>>1]; + a->w = (a->w&1) == 0? off<<1 : (off+cnt-1)<<1 | 1; + } + free(oldcnt); + + // create newly inserted segments + for (i = 0, k = n_old_seg; i < n_ins; ++i) { + const gfa_ins_t *p = &ins[i]; + if (p->coff[0] < p->coff[1]) { // not a pure deletion + gfa_seg_t *t = &seg[k]; + snprintf(buf, 15, "s%d", k + 1); + t->name = gfa_strdup(buf); + GFA_MALLOC(t->seq, p->coff[1] - p->coff[0] + 1); + for (j = 0; j < p->coff[1] - p->coff[0]; ++j) + t->seq[j] = seq[p->ctg][p->coff[0] + j]; + t->seq[j] = 0; + t->len = j; + t->snid = gfa_sseq_add(g, name[p->ctg]); + t->soff = p->coff[0]; + t->rank = g->max_rank + 1; // TODO: to deal with SN/SO/SR tags somewhere + gfa_sseq_update(g, t); + create_first_arc(g, seg, ins_side[i]>>32, (uint32_t)k<<1, t->rank); + create_first_arc(g, seg, (uint32_t)k<<1, (uint32_t)ins_side[i], t->rank); + ++k; + } else { // a pure deletion + create_first_arc(g, seg, ins_side[i]>>32, (uint32_t)ins_side[i], g->max_rank + 1); + } + } + free(ins_side); + + // update *g + for (j = 0; j < g->n_seg; ++j) { + free(g->seg[j].name); + free(g->seg[j].seq); + free(g->seg[j].aux.aux); + } + free(g->seg); + g->seg = seg, g->n_seg = g->m_seg = n_seg; + ++g->max_rank; + GFA_REALLOC(g->link_aux, g->m_arc); + GFA_BZERO(&g->link_aux[n_old_arc], g->m_arc - n_old_arc); + gfa_arc_sort(g); + gfa_arc_index(g); + gfa_fix_multi(g); + // k = gfa_fix_symm(g); assert(k == 0); // for debugging; the graph should be symmetric +} + +static int32_t gfa_ins_shrink_semi(const gfa_t *g, int32_t pen, uint32_t v, int32_t voff, int32_t coff, uint32_t vv, int32_t vend, int32_t cend, const char *seq) +{ + int32_t i, j, l, dir, score, max, max_l; + if (cend == coff) return 0; + dir = cend > coff? +1 : -1; + for (i = coff, j = voff, l = max_l = 0, score = max = 0; i != cend; i += dir, j += dir) { + int32_t cg, vlen = g->seg[v>>1].len; + if (j == vlen || j == -1) break; + if (vv == v && j == vend) break; + ++l; + cg = (v&1) == 0? g->seg[v>>1].seq[j] : gfa_comp_table[(uint8_t)g->seg[v>>1].seq[vlen - 1 - j]]; + score += tolower(cg) == tolower(seq[i])? +1 : -pen; + if (score > max) max = score, max_l = l; + if (score < max - pen * pen) break; // X-drop + } + return max_l; +} + +int gfa_ins_adj(const gfa_t *g, int pen, gfa_ins_t *ins, const char *seq) // min_len is NOT used for now +{ + int32_t l, tot = 0; + l = gfa_ins_shrink_semi(g, pen, ins->v[0], ins->voff[0], ins->coff[0], ins->v[1], ins->voff[1], ins->coff[1], seq); + ins->voff[0] += l, ins->coff[0] += l, tot += l; + l = gfa_ins_shrink_semi(g, pen, ins->v[1], ins->voff[1] - 1, ins->coff[1] - 1, ins->v[0], ins->voff[0] - 1, ins->coff[0] - 1, seq); + ins->voff[1] -= l, ins->coff[1] -= l, tot += l; + return tot; +} + +static inline int check_multi(const gfa_t *g, const gfa_ins_t *ins) +{ + if (ins->v[0] != ins->v[1] && ins->coff[1] - ins->coff[0] == 0) { + const gfa_seg_t *s[2]; + uint32_t v[2]; + s[0] = &g->seg[ins->v[0]>>1]; + s[1] = &g->seg[ins->v[1]>>1]; + if (ins->voff[0] != 0 && ins->voff[0] != s[0]->len) return 0; + if (ins->voff[1] != 0 && ins->voff[1] != s[1]->len) return 0; + v[0] = ins->voff[0] == 0? ins->v[0]^1 : ins->v[0]; + v[1] = ins->voff[1] == 0? ins->v[1] : ins->v[1]^1; + if (gfa_find_arc(g, v[0], v[1]) >= 0) return 1; + return 0; + } else return 0; +} + +int32_t gfa_ins_filter(const gfa_t *g, int32_t n_ins, gfa_ins_t *ins) // filter out impossible inserts +{ + int32_t i, k, n; + for (i = 0, n = 0; i < n_ins; ++i) { + gfa_ins_t *p = &ins[i]; + for (k = 0; k < 2; ++k) { + uint32_t vlen = g->seg[p->v[k]>>1].len; + uint32_t side = (p->v[k]&1? vlen - p->voff[k] : p->voff[k]) << 1 | ((p->v[k]&1) ^ k); + if (side == (0<<1|0) || side == (vlen<<1|1)) + break; + } + if (k != 2 || check_multi(g, p)) { // multi-link may happen due to inconsistency between graph chaining and WFA alignment + if (gfa_verbose >= 2) + fprintf(stderr, "[W::%s] %s between %c%s and %c%s derived from the %d-th query at %d-%d\n", + __func__, k != 2? "impossible insert" : "multi-link", + "><"[p->v[0]&1], g->seg[p->v[0]>>1].name, "><"[p->v[1]&1], g->seg[p->v[1]>>1].name, p->ctg, p->coff[0], p->coff[1]); + continue; + } + ins[n++] = ins[i]; + } + return n; +} diff --git a/gfa-base.c b/gfa-base.c new file mode 100644 index 0000000..b2f175e --- /dev/null +++ b/gfa-base.c @@ -0,0 +1,526 @@ +#include +#include +#include +#include "gfa-priv.h" +#include "kstring.h" + +#include "khashl.h" +KHASHL_MAP_INIT(KH_LOCAL, h_s2i_t, h_s2i, kh_cstr_t, uint32_t, kh_hash_str, kh_eq_str) + +#include "ksort.h" +#define gfa_arc_key(a) ((a).v_lv) +KRADIX_SORT_INIT(arc, gfa_arc_t, gfa_arc_key, 8) + +#define generic_key(x) (x) +KRADIX_SORT_INIT(gfa64, uint64_t, generic_key, 8) + +int gfa_verbose = 2; + +gfa_t *gfa_init(void) +{ + gfa_t *g; + g = (gfa_t*)calloc(1, sizeof(gfa_t)); + g->h_names = h_s2i_init(); + g->h_snames = h_s2i_init(); + return g; +} + +void gfa_destroy(gfa_t *g) +{ + uint32_t i, j; + uint64_t k; + if (g == 0) return; + h_s2i_destroy((h_s2i_t*)g->h_names); + for (i = 0; i < g->n_seg; ++i) { + gfa_seg_t *s = &g->seg[i]; + free(s->name); + free(s->seq); + free(s->aux.aux); + if (s->utg) { + for (j = 0; j < s->utg->n; ++j) + free(s->utg->name[j]); + free(s->utg->name); + free(s->utg->a); + free(s->utg); + } + } + for (i = 0; i < g->n_sseq; ++i) free(g->sseq[i].name); + h_s2i_destroy((h_s2i_t*)g->h_snames); + if (g->link_aux) + for (k = 0; k < g->n_arc; ++k) + free(g->link_aux[k].aux); + free(g->idx); free(g->seg); free(g->arc); free(g->link_aux); free(g->sseq); + free(g); +} + +char *gfa_strdup(const char *src) +{ + int32_t len; + char *dst; + len = strlen(src); + GFA_MALLOC(dst, len + 1); + memcpy(dst, src, len + 1); + return dst; +} + +char *gfa_strndup(const char *src, size_t n) +{ + char *dst; + GFA_MALLOC(dst, n + 1); + strncpy(dst, src, n); + dst[n] = 0; + return dst; +} + +int32_t gfa_add_seg(gfa_t *g, const char *name) +{ + khint_t k; + int absent; + h_s2i_t *h = (h_s2i_t*)g->h_names; + k = h_s2i_put(h, name, &absent); + if (absent) { + gfa_seg_t *s; + if (g->n_seg == g->m_seg) { + uint32_t old_m = g->m_seg; + g->m_seg = g->m_seg? g->m_seg<<1 : 16; + g->seg = (gfa_seg_t*)realloc(g->seg, g->m_seg * sizeof(gfa_seg_t)); + memset(&g->seg[old_m], 0, (g->m_seg - old_m) * sizeof(gfa_seg_t)); + } + s = &g->seg[g->n_seg++]; + kh_key(h, k) = s->name = gfa_strdup(name); + s->del = s->len = 0; + s->snid = s->soff = s->rank = -1; + kh_val(h, k) = g->n_seg - 1; + } + return kh_val(h, k); +} + +int32_t gfa_sseq_add(gfa_t *g, const char *sname) +{ + h_s2i_t *h = (h_s2i_t*)g->h_snames; + khint_t k; + int absent; + k = h_s2i_put(h, sname, &absent); + if (absent) { + gfa_sseq_t *ss; + if (g->n_sseq == g->m_sseq) GFA_EXPAND(g->sseq, g->m_sseq); + ss = &g->sseq[g->n_sseq++]; + kh_val(h, k) = g->n_sseq - 1; + kh_key(h, k) = ss->name = gfa_strdup(sname); + ss->min = -1, ss->max = -1, ss->rank = -1; + } + return kh_val(h, k); +} + +int32_t gfa_sseq_get(const gfa_t *g, const char *sname) +{ + h_s2i_t *h = (h_s2i_t*)g->h_snames; + khint_t k; + k = h_s2i_get(h, sname); + return k == kh_end(h)? -1 : kh_val(h, k); +} + +void gfa_sseq_update(gfa_t *g, const gfa_seg_t *s) +{ + gfa_sseq_t *ps; + if (s->snid < 0 || s->snid >= g->n_sseq) return; + ps = &g->sseq[s->snid]; + if (ps->min < 0 || s->soff < ps->min) ps->min = s->soff; + if (ps->max < 0 || s->soff + s->len > ps->max) ps->max = s->soff + s->len; + if (ps->rank < 0) ps->rank = s->rank; + else if (ps->rank != s->rank) { + if (gfa_verbose >= 2) + fprintf(stderr, "[W] stable sequence '%s' associated with different ranks on segment '%s': %d != %d\n", ps->name, s->name, ps->rank, s->rank); + } +} + +int32_t gfa_name2id(const gfa_t *g, const char *name) +{ + h_s2i_t *h = (h_s2i_t*)g->h_names; + khint_t k; + k = h_s2i_get(h, name); + return k == kh_end(h)? -1 : kh_val(h, k); +} + +gfa_arc_t *gfa_add_arc1(gfa_t *g, uint32_t v, uint32_t w, int32_t ov, int32_t ow, int64_t link_id, int comp) +{ + gfa_arc_t *a; + if (g->m_arc == g->n_arc) { + uint64_t old_m = g->m_arc; + g->m_arc = g->m_arc? g->m_arc<<1 : 16; + g->arc = (gfa_arc_t*)realloc(g->arc, g->m_arc * sizeof(gfa_arc_t)); + memset(&g->arc[old_m], 0, (g->m_arc - old_m) * sizeof(gfa_arc_t)); + g->link_aux = (gfa_aux_t*)realloc(g->link_aux, g->m_arc * sizeof(gfa_aux_t)); + memset(&g->link_aux[old_m], 0, (g->m_arc - old_m) * sizeof(gfa_aux_t)); + } + a = &g->arc[g->n_arc++]; + a->v_lv = (uint64_t)v << 32; + a->w = w, a->ov = ov, a->ow = ow, a->rank = -1; + a->link_id = link_id >= 0? link_id : g->n_arc - 1; + if (link_id >= 0) a->rank = g->arc[link_id].rank; // TODO: this is not always correct! + a->del = a->strong = 0; + a->comp = comp; + return a; +} + +int gfa_arc_is_sorted(const gfa_t *g) +{ + uint64_t e; + for (e = 1; e < g->n_arc; ++e) + if (g->arc[e-1].v_lv > g->arc[e].v_lv) + break; + return (e == g->n_arc); +} + +void gfa_arc_sort(gfa_t *g) +{ + radix_sort_arc(g->arc, g->arc + g->n_arc); +} + +uint64_t *gfa_arc_index_core(size_t max_seq, size_t n, const gfa_arc_t *a) +{ + size_t i, last; + uint64_t *idx; + idx = (uint64_t*)calloc(max_seq * 2, 8); + for (i = 1, last = 0; i <= n; ++i) + if (i == n || gfa_arc_head(a[i-1]) != gfa_arc_head(a[i])) + idx[gfa_arc_head(a[i-1])] = (uint64_t)last<<32 | (i - last), last = i; + return idx; +} + +void gfa_arc_index(gfa_t *g) +{ + if (g->idx) free(g->idx); + g->idx = gfa_arc_index_core(g->n_seg, g->n_arc, g->arc); +} + +/******************** + * Fix graph issues * + ********************/ + +uint32_t gfa_fix_no_seg(gfa_t *g) +{ + uint32_t i, n_err = 0; + for (i = 0; i < g->n_seg; ++i) { + gfa_seg_t *s = &g->seg[i]; + if (s->len == 0) { + ++n_err, s->del = 1; + if (gfa_verbose >= 2) + fprintf(stderr, "[W] segment '%s' is used on an L-line but not defined on an S-line\n", s->name); + } + } + return n_err; +} + +void gfa_fix_arc_len(gfa_t *g) +{ + uint64_t k; + for (k = 0; k < g->n_arc; ++k) { + gfa_arc_t *a = &g->arc[k]; + uint32_t v = gfa_arc_head(*a), w = gfa_arc_tail(*a); + const gfa_seg_t *sv = &g->seg[v>>1]; + if (!sv->del && sv->len < a->ov) { + if (gfa_verbose >= 2) + fprintf(stderr, "[W] overlap length longer than segment length for '%s': %d > %d\n", sv->name, a->ov, sv->len); + a->ov = sv->len; + } + if (sv->del || g->seg[w>>1].del) { + a->del = 1; + } else { + a->v_lv |= sv->len - a->ov; + } + } +} + +uint32_t gfa_fix_semi_arc(gfa_t *g) +{ + uint32_t n_err = 0, v, n_vtx = gfa_n_vtx(g); + int i, j; + for (v = 0; v < n_vtx; ++v) { + int nv = gfa_arc_n(g, v); + gfa_arc_t *av = gfa_arc_a(g, v); + for (i = 0; i < nv; ++i) { + if (!av[i].del && (av[i].ow == INT32_MAX || av[i].ov == INT32_MAX)) { // overlap length is missing + uint32_t w = av[i].w^1; + int is_multi = 0, c, jv = -1, nw = gfa_arc_n(g, w); + gfa_arc_t *aw = gfa_arc_a(g, w); + for (j = 0, c = 0; j < nw; ++j) + if (!aw[j].del && aw[j].w == (v^1)) ++c, jv = j; + if (c == 1) { + if (av[i].ov != INT32_MAX && aw[jv].ow != INT32_MAX && av[i].ov != aw[jv].ow) is_multi = 1; + if (av[i].ow != INT32_MAX && aw[jv].ov != INT32_MAX && av[i].ow != aw[jv].ov) is_multi = 1; + } + if (c == 1 && !is_multi) { + if (aw[jv].ov != INT32_MAX) av[i].ow = aw[jv].ov; + if (aw[jv].ow != INT32_MAX) av[i].ov = aw[jv].ow; + } else { + if (gfa_verbose >= 2) + fprintf(stderr, "[W] can't infer overlap length for %s%c -> %s%c\n", + g->seg[v>>1].name, "+-"[v&1], g->seg[w>>1].name, "+-"[(w^1)&1]); + ++n_err; + av[i].del = 1; + } + } + } + } + return n_err; +} + +uint32_t gfa_fix_symm_add(gfa_t *g) +{ + uint32_t n_err = 0, v, n_vtx = gfa_n_vtx(g); + int i; + for (v = 0; v < n_vtx; ++v) { + int nv = gfa_arc_n(g, v); + gfa_arc_t *av = gfa_arc_a(g, v); + for (i = 0; i < nv; ++i) { + int j, nw; + gfa_arc_t *aw, *avi = &av[i]; + if (avi->del || avi->comp) continue; + nw = gfa_arc_n(g, avi->w^1); + aw = gfa_arc_a(g, avi->w^1); + for (j = 0; j < nw; ++j) { + gfa_arc_t *awj = &aw[j]; + if (awj->del || awj->comp) continue; + if (awj->w == (v^1) && awj->ov == avi->ow && awj->ow == avi->ov) { // complement found + awj->comp = 1; + awj->link_id = avi->link_id; + break; + } + } + if (j == nw) { + gfa_arc_t *arc_old = g->arc, *arc_new; + arc_new = gfa_add_arc1(g, avi->w^1, v^1, avi->ow, avi->ov, avi->link_id, 1); + if (arc_old != g->arc) av = gfa_arc_a(g, v); // g->arc may be reallocated + arc_new->rank = av[i].rank; + } + } + } + if (n_vtx < gfa_n_vtx(g)) { + gfa_arc_sort(g); + gfa_arc_index(g); + } + return n_err; +} + +void gfa_arc_rm(gfa_t *g) +{ + uint32_t e, n; + for (e = n = 0; e < g->n_arc; ++e) { + uint32_t u = g->arc[e].v_lv>>32, v = g->arc[e].w; + if (!g->arc[e].del && !g->seg[u>>1].del && !g->seg[v>>1].del) + g->arc[n++] = g->arc[e]; + else { + gfa_aux_t *aux = g->arc[e].link_id < g->n_arc? &g->link_aux[g->arc[e].link_id] : 0; + if (aux) { + free(aux->aux); + aux->aux = 0, aux->l_aux = aux->m_aux = 0; + } + } + } + if (n < g->n_arc) { // arc index is out of sync + if (g->idx) free(g->idx); + g->idx = 0; + } + g->n_arc = n; +} + +void gfa_cleanup(gfa_t *g) +{ + gfa_arc_rm(g); + if (!gfa_arc_is_sorted(g)) { + gfa_arc_sort(g); + if (g->idx) free(g->idx); + g->idx = 0; + } + if (g->idx == 0) gfa_arc_index(g); +} + +int32_t gfa_check_multi(const gfa_t *g) +{ + uint32_t v, n_vtx = gfa_n_vtx(g); + int32_t max_nv = -1, n_multi = 0; + uint64_t *buf; // actually, uint32_t is enough + for (v = 0; v < n_vtx; ++v) { + int32_t nv = gfa_arc_n(g, v); + max_nv = max_nv > nv? max_nv : nv; + } + if (max_nv == 1 || max_nv < 0) return 0; + GFA_MALLOC(buf, max_nv); + for (v = 0; v < n_vtx; ++v) { + int32_t i, s, nv = gfa_arc_n(g, v); + const gfa_arc_t *av = gfa_arc_a(g, v); + for (i = 0; i < nv; ++i) buf[i] = av[i].w; + radix_sort_gfa64(buf, buf + nv); + for (s = 0, i = 1; i <= nv; ++i) + if (i == nv || buf[i] != buf[s]) + n_multi += i - s - 1, s = i; + } + free(buf); + return n_multi; +} + +uint32_t gfa_fix_multi(gfa_t *g) +{ + uint32_t v, n_vtx = gfa_n_vtx(g), n_rm = 0; + int32_t max_nv = -1; + uint64_t *buf; // actually, uint32_t is enough + for (v = 0; v < n_vtx; ++v) { + int32_t nv = gfa_arc_n(g, v); + max_nv = max_nv > nv? max_nv : nv; + } + if (max_nv == 1) return 0; + GFA_MALLOC(buf, max_nv); + for (v = 0; v < n_vtx; ++v) { + int32_t i, j, s, nv = gfa_arc_n(g, v), nb; + gfa_arc_t *av = gfa_arc_a(g, v); + for (i = j = 0; i < nv; ++i) + if (!av[i].del) buf[j++] = (uint64_t)av[i].w<<32 | i; + nb = j; + if (nb < 1) continue; + radix_sort_gfa64(buf, buf + nb); + for (s = 0, i = 1; i <= nb; ++i) { + if (i == nv || buf[i]>>32 != buf[s]>>32) { + if (i - s > 1) { + int32_t k = (int32_t)buf[s], min_rank = av[k].rank; // prefer longest overlap + for (j = s + 1; j < i; ++j) { // rank has higher priority + int32_t t = (int32_t)buf[j]; + if (av[t].rank >= 0 && av[t].rank < min_rank) + min_rank = av[t].rank, k = t; + } + if (av[k].w == (v^1)) { // a weird loop + if (gfa_verbose >= 2) + fprintf(stderr, "[W::%s] can't fix multiple edges due to '>v -- seg[v>>1].name); + } else { + int32_t nw = gfa_arc_n(g, av[k].w^1), n_wdel; + gfa_arc_t *aw = gfa_arc_a(g, av[k].w^1); + uint64_t link_id = av[k].link_id; + n_rm += i - s - 1; + for (j = s + 1; j < i; ++j) + av[(int32_t)buf[j]].del = 1; + for (j = 0, n_wdel = 0; j < nw; ++j) + if (aw[j].w == (v^1) && aw[j].link_id != link_id) + aw[j].del = 1, ++n_wdel; + assert(n_wdel == i - s - 1); + } + } + s = i; + } + } + } + free(buf); + if (n_rm > 0) { + if (gfa_verbose >= 2) + fprintf(stderr, "[W::%s] removed %d multiple link(s)\n", __func__, n_rm); + gfa_arc_rm(g); + gfa_arc_index(g); + } + return n_rm; +} + +void gfa_finalize(gfa_t *g) +{ + gfa_fix_no_seg(g); + gfa_arc_sort(g); + gfa_arc_index(g); + gfa_fix_semi_arc(g); + gfa_fix_symm_add(g); + gfa_fix_arc_len(g); + gfa_cleanup(g); +} + +/******************** + * Tag manipulation * + ********************/ + +static inline int gfa_aux_type2size(int x) +{ + if (x == 'C' || x == 'c' || x == 'A') return 1; + else if (x == 'S' || x == 's') return 2; + else if (x == 'I' || x == 'i' || x == 'f') return 4; + else return 0; +} + +#define __skip_tag(s) do { \ + int type = *(s); \ + ++(s); \ + if (type == 'Z') { while (*(s)) ++(s); ++(s); } \ + else if (type == 'B') (s) += 5 + gfa_aux_type2size(*(s)) * (*(int32_t*)((s)+1)); \ + else (s) += gfa_aux_type2size(type); \ + } while(0) + +uint8_t *gfa_aux_get(int l_data, const uint8_t *data, const char tag[2]) +{ + const uint8_t *s = data; + int y = tag[0]<<8 | tag[1]; + while (s < data + l_data) { + int x = (int)s[0]<<8 | s[1]; + s += 2; + if (x == y) return (uint8_t*)s; + __skip_tag(s); + } + return 0; +} + +// s MUST BE returned by gfa_aux_get() +int gfa_aux_del(int l_data, uint8_t *data, uint8_t *s) +{ + uint8_t *p; + p = s - 2; + __skip_tag(s); + memmove(p, s, l_data - (s - data)); + return l_data - (s - p); +} + +void gfa_aux_update_f(gfa_aux_t *a, const char tag[2], float x) +{ + uint8_t *p = 0; + if (a->l_aux > 0) + p = gfa_aux_get(a->l_aux, a->aux, "cv"); + if (p) { + memcpy(p + 1, &x, 4); + } else { + kstring_t str; + str.l = a->l_aux, str.m = a->m_aux, str.s = (char*)a->aux; + ks_resize(&str, str.l + 7); + kputsn_(tag, 2, &str); + kputc_('f', &str); + kputsn_(&x, 4, &str); + a->l_aux = str.l, a->m_aux = str.m, a->aux = (uint8_t*)str.s; + } +} + +void gfa_aux_update_cv(gfa_t *g, const char *tag, const double *cov_seg, const double *cov_link) +{ + int64_t i; + if (cov_seg) + for (i = 0; i < g->n_seg; ++i) + gfa_aux_update_f(&g->seg[i].aux, tag, cov_seg[i]); + if (cov_link) + for (i = 0; i < g->n_arc; ++i) + if (g->arc[i].comp == 0) + gfa_aux_update_f(&g->link_aux[g->arc[i].link_id], tag, cov_link[i]); +} + +/********************* + * Translation table * + *********************/ + +unsigned char gfa_comp_table[256] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 'T', 'V', 'G', 'H', 'E', 'F', 'C', 'D', 'I', 'J', 'M', 'L', 'K', 'N', 'O', + 'P', 'Q', 'Y', 'S', 'A', 'A', 'B', 'W', 'X', 'R', 'Z', 91, 92, 93, 94, 95, + 96, 't', 'v', 'g', 'h', 'e', 'f', 'c', 'd', 'i', 'j', 'm', 'l', 'k', 'n', 'o', + 'p', 'q', 'y', 's', 'a', 'a', 'b', 'w', 'x', 'r', 'z', 123, 124, 125, 126, 127, + 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, + 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, + 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, + 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, + 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, + 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, + 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 +}; diff --git a/gfa-bbl.c b/gfa-bbl.c new file mode 100644 index 0000000..1bb1885 --- /dev/null +++ b/gfa-bbl.c @@ -0,0 +1,372 @@ +#include +#include +#include "gfa-priv.h" +#include "kalloc.h" +#include "ksort.h" +#include "kvec.h" + +#define generic_key(x) (x) +KRADIX_SORT_INIT(gfa32, uint32_t, generic_key, 4) + +void gfa_sort_ref_arc(gfa_t *g) +{ + uint32_t v, n_vtx = gfa_n_vtx(g); + for (v = 0; v < n_vtx; ++v) { + gfa_seg_t *s = &g->seg[v>>1]; + int32_t i, nv; + gfa_arc_t *av, b; + if (s->rank != 0) continue; + nv = gfa_arc_n(g, v); + av = gfa_arc_a(g, v); + for (i = 0; i < nv; ++i) { + uint32_t w = av[i].w; + gfa_seg_t *t = &g->seg[w>>1]; + if (t->rank == 0 && t->snid == s->snid && (v&1) == (w&1)) { + if (((v&1) == 0 && s->soff + s->len == t->soff) || ((v&1) == 1 && t->soff + t->len == s->soff)) + break; + } + } + if (nv > 0 && i == nv) fprintf(stderr, "X\t%c%s\t%d\t%s\t%d\n", "><"[v&1], s->name, i, g->sseq[s->snid].name, s->soff); + assert(nv == 0 || i < nv); + if (i > 0 && i < nv) b = av[i], av[i] = av[0], av[0] = b; + } +} + +void gfa_sub_print(FILE *fp, const gfa_t *g, const gfa_sub_t *sub) +{ + int32_t i, j; + for (i = 0; i < sub->n_v; ++i) { + gfa_subv_t *p = &sub->v[i]; + fprintf(fp, "[%d]\t%d\t%c%s\t%d\t%d", i, p->v, "><"[p->v&1], g->seg[p->v>>1].name, p->d, p->n); + if (p->n > 0) { + fputc('\t', fp); + for (j = 0; j < p->n; ++j) { + if (j) fputc(',', fp); + fprintf(fp, "%d", (uint32_t)(sub->a[p->off + j]>>32)); + } + } + fputc('\n', fp); + } +} + +/**************** + * Tarjan's SCC * + ****************/ + +typedef struct { + uint32_t index, low:31, stack:1; + uint32_t i; // index in gfa_sub_t::v[]; a temporary field + uint32_t start; // starting vertex +} gfa_scinfo_t; + +struct gfa_scbuf_s { + uint32_t index; + gfa_scinfo_t *a; // node information + kvec_t(uint32_t) ts; // Tarjan's stack + kvec_t(uint64_t) ds; // DFS stack +}; + +gfa_scbuf_t *gfa_scbuf_init(const gfa_t *g) +{ + uint32_t v, n_vtx = gfa_n_vtx(g); + gfa_scbuf_t *b; + GFA_CALLOC(b, 1); + GFA_CALLOC(b->a, n_vtx); + for (v = 0; v < n_vtx; ++v) + b->a[v].index = b->a[v].start = (uint32_t)-1; + return b; +} + +void gfa_scbuf_destroy(gfa_scbuf_t *b) +{ + free(b->a); free(b->ts.a); free(b->ds.a); free(b); +} + +gfa_sub_t *gfa_scc1(void *km0, const gfa_t *g, gfa_scbuf_t *b, uint32_t v0) +{ + gfa_sub_t *sub; + uint32_t k, off, m_v = 0; + + KCALLOC(km0, sub, 1); + sub->km = km0; + + kv_push(uint64_t, b->ds, (uint64_t)v0<<32); + while (b->ds.n > 0) { + uint64_t x = kv_pop(b->ds); + uint32_t i = (uint32_t)x, v = x>>32, nv; + if (i == 0) { // i is the number of outgoing edges already visited + b->a[v].low = b->a[v].index = b->index++; + b->a[v].stack = 1; + kv_push(uint32_t, b->ts, v); + } + nv = gfa_arc_n(g, v); + if (i == nv) { // done with v + if (b->a[v].low == b->a[v].index) { + int32_t i, j = b->ts.n - 1; + while (b->ts.a[j] != v) --j; + for (i = b->ts.n - 1; i >= j; --i) { + uint32_t w = b->ts.a[i]; + gfa_subv_t *p; + //fprintf(stderr, "V\t%c%s\t%d\t%c%s\t%d\t%d\n", "><"[v&1], g->seg[v>>1].name, i, "><"[w&1], g->seg[w>>1].name, b->a[w^1].stack, b->a[w].index); + if (sub->n_v == m_v) KEXPAND(sub->km, sub->v, m_v); + p = &sub->v[sub->n_v++]; + p->v = w; + b->a[w].stack = 0; + } + b->ts.n = j; + } + if (b->ds.n > 0) { // if the DFS stack is not empty, update the top element + uint32_t w = v; + v = b->ds.a[b->ds.n - 1] >> 32; + b->a[v].low = b->a[v].low < b->a[w].low? b->a[v].low : b->a[w].low; + } + } else { // process v's neighbor av[i].w + gfa_arc_t *av = gfa_arc_a(g, v); + uint32_t w = av[i].w; + kv_push(uint64_t, b->ds, (uint64_t)v<<32 | (i+1)); // update the old top of the stack + if (b->a[w].index == (uint32_t)-1 && b->a[w^1].stack == 0) + kv_push(uint64_t, b->ds, (uint64_t)w<<32); + else if (b->a[w].stack) + b->a[v].low = b->a[v].low < b->a[w].index? b->a[v].low : b->a[w].index; + } + } + + // reverse the vertex array + for (k = 0; k < sub->n_v>>1; ++k) { + gfa_subv_t x; + x = sub->v[k], sub->v[k] = sub->v[sub->n_v - k - 1], sub->v[sub->n_v - k - 1] = x; + } + + // fill other fields in sub + for (k = 0; k < sub->n_v; ++k) + b->a[sub->v[k].v].start = v0, b->a[sub->v[k].v].i = k; + for (k = 0, off = 0; k < sub->n_v; ++k) { // precompute the length of gfa_sub_t::a[] + uint32_t v = sub->v[k].v; + int32_t i, nv = gfa_arc_n(g, v); + gfa_arc_t *av = gfa_arc_a(g, v); + for (i = 0; i < nv; ++i) + if (b->a[av[i].w].start == v0) + ++off; + } + sub->n_a = off; + KCALLOC(sub->km, sub->a, sub->n_a); + for (k = 0, off = 0; k < sub->n_v; ++k) { + uint32_t o0, v = sub->v[k].v; + int32_t i, nv = gfa_arc_n(g, v); + gfa_arc_t *av = gfa_arc_a(g, v); + for (i = 0, o0 = off; i < nv; ++i) + if (b->a[av[i].w].start == v0) + sub->a[off++] = (uint64_t)b->a[av[i].w].i << 32 | (&av[i] - g->arc); + sub->v[k].d = 0; + sub->v[k].off = o0; + sub->v[k].n = off - o0; + if (o0 < off) { + radix_sort_gfa64(&sub->a[o0], &sub->a[off]); + if (sub->a[o0]>>32 <= k) sub->is_dag = 0; + } + } + return sub; +} + +void gfa_scc_all(const gfa_t *g) +{ + uint32_t v, n_vtx = gfa_n_vtx(g); + gfa_scbuf_t *b; + b = gfa_scbuf_init(g); + for (v = 0; v < n_vtx; ++v) + if (b->a[v].index == (uint32_t)-1 && b->a[v^1].index == (uint32_t)-1) { + gfa_sub_t *sub; + sub = gfa_scc1(0, g, b, v); + gfa_sub_print(stderr, g, sub); + gfa_sub_destroy(sub); + } + gfa_scbuf_destroy(b); +} + +void gfa_sub_destroy(gfa_sub_t *sub) +{ + void *km; + if (sub == 0) return; + km = sub->km; + kfree(km, sub->v); kfree(km, sub->a); kfree(km, sub); +} + +/****************** + * Bubble calling * + ******************/ + +typedef struct { + int32_t ld, sd, rd; + int32_t lp, sp; + float lf, sf, rf; +} bb_aux_t; + +static void bb_write_seq(const gfa_t *g, int32_t n, const uint32_t *v, int32_t l_seq, char *seq) +{ + int32_t k, l; + for (k = n - 1, l = 0; k >= 0; --k) { + const gfa_seg_t *s = &g->seg[v[k]>>1]; + if (v[k]&1) { + int32_t p; + for (p = s->len - 1; p >= 0; --p) + seq[l++] = gfa_comp_table[(uint8_t)s->seq[p]]; + } else { + memcpy(&seq[l], s->seq, s->len); + l += s->len; + } + } + assert(l == l_seq); + seq[l] = 0; +} + +static int32_t bb_n_paths(const gfa_t *g, const gfa_sub_t *sub, int32_t js, int32_t je) +{ + int32_t j, k; + int64_t *cnt, c; + GFA_CALLOC(cnt, je - js + 1); + cnt[0] = 1; + for (j = js; j < je; ++j) { + const gfa_subv_t *t = &sub->v[j]; + for (k = 0; k < t->n; ++k) { + uint64_t a = sub->a[t->off + k]; + int32_t jv = (int32_t)(a>>32); + if (jv <= j || jv > je) continue; + if (cnt[jv - js] + cnt[j - js] > INT32_MAX) + cnt[jv - js] = INT32_MAX; + else cnt[jv - js] += cnt[j - js]; + } + } + c = cnt[je - js]; + free(cnt); + return c < INT32_MAX? c : INT32_MAX; +} + +gfa_bubble_t *gfa_bubble(const gfa_t *g, int32_t *n_bb_) +{ + uint32_t i, *vs, *vmin, *vtmp = 0; + int32_t n_bb = 0, m_bb = 0, m_vtmp = 0; + gfa_bubble_t *bb = 0; + gfa_scbuf_t *scbuf; + + GFA_MALLOC(vs, g->n_sseq); + GFA_MALLOC(vmin, g->n_sseq); + for (i = 0; i < g->n_sseq; ++i) + vs[i] = (uint32_t)-1, vmin[i] = UINT32_MAX; + for (i = 0; i < g->n_seg; ++i) { + const gfa_seg_t *s = &g->seg[i]; + if (s->rank != 0 || s->snid < 0) continue; + if ((uint32_t)s->soff < vmin[s->snid]) + vmin[s->snid] = s->soff, vs[s->snid] = i<<1; + } + free(vmin); + + scbuf = gfa_scbuf_init(g); + for (i = 0; i < g->n_sseq; ++i) { + gfa_sub_t *sub; + int32_t j, jst, max_a, max_soff; + bb_aux_t *ba; + + if (vs[i] == (uint32_t)-1) continue; + #if 0 + sub = gfa_sub_from(0, g, vs[i], 0); + #else + sub = gfa_scc1(0, g, scbuf, vs[i]); + #endif + //gfa_sub_print(stderr, g, sub); + GFA_CALLOC(ba, sub->n_v); + for (j = 0; j < sub->n_v; ++j) + ba[j].sd = INT32_MAX, ba[j].lp = ba[j].sp = -1; + ba[0].sd = 0; + for (j = 0; j < sub->n_v; ++j) { + gfa_subv_t *t = &sub->v[j]; + int32_t k; + for (k = 0; k < t->n; ++k) { + uint64_t a = sub->a[t->off + k]; + int32_t jv = (int32_t)(a>>32); + int32_t l = (int32_t)g->arc[(uint32_t)a].v_lv; + if (jv <= j) continue; // skip loop or cycle + if (ba[jv].sd >= ba[j].sd + l) + ba[jv].sd = ba[j].sd + l, ba[jv].sp = j; + if (ba[jv].ld < ba[j].ld + l) + ba[jv].ld = ba[j].ld + l, ba[jv].lp = j; + } + } + for (j = 0, jst = 0, max_a = max_soff = -1; j < sub->n_v; ++j) { + gfa_subv_t *t = &sub->v[j]; + int32_t k; + if (j == max_a && g->seg[t->v>>1].soff > max_soff) { + const gfa_seg_t *sst = &g->seg[sub->v[jst].v>>1]; + const gfa_seg_t *sen = &g->seg[t->v>>1]; + if (sst->snid == i && sen->snid == i) { + int32_t n, l; + uint32_t *v; + gfa_bubble_t *b; + + // basic information + if (n_bb == m_bb) GFA_EXPAND(bb, m_bb); + b = &bb[n_bb++]; + b->snid = i; + b->vs = sub->v[jst].v; + b->ve = t->v; + b->ss = sst->soff + sst->len; + b->se = sen->soff; + b->len_min = ba[j].sd - ba[jst].sd - sst->len; + b->len_max = ba[j].ld - ba[jst].ld - sst->len; + b->n_paths = bb_n_paths(g, sub, jst, j); + //fprintf(stderr, "X\t%s[%d]\tvs=%c%s\tve=%c%s\tlen_min=%d\n", g->sseq[i].name, i, "><"[b->vs&1], g->seg[b->vs>>1].name, "><"[b->ve&1], g->seg[b->ve>>1].name, b->len_min); + assert(b->len_min >= 0); + assert(b->len_max >= 0 && b->len_max >= b->len_min); + b->n_seg = j - jst + 1; + l = (b->len_min + 1) + (b->len_max + 1); + l = (l + 3) / 4 + b->n_seg; + GFA_CALLOC(b->v, l); + b->seq_min = (char*)(b->v + b->n_seg); + b->seq_max = b->seq_min + b->len_min + 1; + for (k = jst; k <= j; ++k) + b->v[k - jst] = sub->v[k].v; + + // test bubble involving both strands (mostly inversions) + if (b->n_seg > m_vtmp) { + m_vtmp = b->n_seg; + kroundup32(m_vtmp); + GFA_REALLOC(vtmp, m_vtmp); + } + for (k = 0; k < b->n_seg; ++k) vtmp[k] = b->v[k]>>1; + radix_sort_gfa32(vtmp, vtmp + b->n_seg); + for (k = 1; k < b->n_seg; ++k) + if (vtmp[k] == vtmp[k-1]) break; + b->is_bidir = (k < b->n_seg); + + // generate sequences and cf_min/cf_max + GFA_MALLOC(v, j - jst); + k = j, n = 0; + while (k > jst) { + if (k < j) v[n++] = sub->v[k].v; + k = ba[k].sp; + } + bb_write_seq(g, n, v, b->len_min, b->seq_min); + k = j, n = 0; + while (k > jst) { + if (k < j) v[n++] = sub->v[k].v; + k = ba[k].lp; + } + bb_write_seq(g, n, v, b->len_max, b->seq_max); + free(v); + } // ~if(sst->snid==i&&sen->snid==i) + max_a = max_soff = -1, jst = j; + } // ~if(j==max_a) + for (k = 0; k < t->n; ++k) + if ((int32_t)(sub->a[t->off + k]>>32) > max_a) + max_a = sub->a[t->off + k]>>32; + if (g->seg[t->v>>1].snid == i && g->seg[t->v>>1].soff > max_soff) + max_soff = g->seg[t->v>>1].soff; + } + free(ba); + gfa_sub_destroy(sub); + } + free(vtmp); + gfa_scbuf_destroy(scbuf); + free(vs); + *n_bb_ = n_bb; + return bb; +} diff --git a/gfa-ed.c b/gfa-ed.c new file mode 100644 index 0000000..d31b808 --- /dev/null +++ b/gfa-ed.c @@ -0,0 +1,617 @@ +#include +#include +#include +#include "gfa-priv.h" +#include "kalloc.h" +#include "ksort.h" +#include "khashl.h" // make it compatible with kalloc +#include "kdq.h" +#include "kvec-km.h" + +int gfa_ed_dbg = 0; + +/*************** + * Preparation * + ***************/ + +void gfa_edopt_init(gfa_edopt_t *opt) +{ + memset(opt, 0, sizeof(gfa_edopt_t)); + opt->bw_dyn = opt->max_lag = opt->s_term = -1; + opt->max_chk = 1000; +} + +gfa_edseq_t *gfa_edseq_init(const gfa_t *g) +{ + uint32_t i, n_vtx = gfa_n_vtx(g); + gfa_edseq_t *es; + GFA_MALLOC(es, n_vtx); + for (i = 0; i < g->n_seg; ++i) { + const gfa_seg_t *s = &g->seg[i]; + char *t; + int32_t j; + GFA_MALLOC(t, s->len + 1); + for (j = 0; j < s->len; ++j) + t[s->len - j - 1] = gfa_comp_table[(uint8_t)s->seq[j]]; + t[s->len] = 0; + es[i<<1].seq = (char*)s->seq; + es[i<<1|1].seq = t; + es[i<<1].len = es[i<<1|1].len = s->len; + } + return es; +} + +void gfa_edseq_destroy(int32_t n_seg, gfa_edseq_t *es) +{ + int32_t i; + for (i = 0; i < n_seg; ++i) + free((char*)es[i<<1|1].seq); + free(es); +} + +/***************** + * Edit distance * + *****************/ + +#define GWF_DIAG_SHIFT 0x40000000 + +static inline uint64_t gwf_gen_vd(uint32_t v, int32_t d) +{ + return (uint64_t)v<<32 | (GWF_DIAG_SHIFT + d); +} + +/* + * Diagonal interval + */ +typedef struct { + uint64_t vd0, vd1; +} gwf_intv_t; + +typedef kvec_t(gwf_intv_t) gwf_intv_v; + +#define intvd_key(x) ((x).vd0) +KRADIX_SORT_INIT(gwf_intv, gwf_intv_t, intvd_key, 8) + +static int gwf_intv_is_sorted(int32_t n_a, const gwf_intv_t *a) +{ + int32_t i; + for (i = 1; i < n_a; ++i) + if (a[i-1].vd0 > a[i].vd0) break; + return (i == n_a); +} + +// merge overlapping intervals; input must be sorted +static size_t gwf_intv_merge_adj(size_t n, gwf_intv_t *a) +{ + size_t i, k; + uint64_t st, en; + if (n == 0) return 0; + st = a[0].vd0, en = a[0].vd1; + for (i = 1, k = 0; i < n; ++i) { + if (a[i].vd0 > en) { + a[k].vd0 = st, a[k++].vd1 = en; + st = a[i].vd0, en = a[i].vd1; + } else en = en > a[i].vd1? en : a[i].vd1; + } + a[k].vd0 = st, a[k++].vd1 = en; + return k; +} + +// merge two sorted interval lists +static size_t gwf_intv_merge2(gwf_intv_t *a, size_t n_b, const gwf_intv_t *b, size_t n_c, const gwf_intv_t *c) +{ + size_t i = 0, j = 0, k = 0; + while (i < n_b && j < n_c) { + if (b[i].vd0 <= c[j].vd0) + a[k++] = b[i++]; + else a[k++] = c[j++]; + } + while (i < n_b) a[k++] = b[i++]; + while (j < n_c) a[k++] = c[j++]; + return gwf_intv_merge_adj(k, a); +} + +/* + * Diagonal + */ +typedef struct { // a diagonal + uint64_t vd; // higher 32 bits: vertex ID; lower 32 bits: diagonal+0x4000000 + int32_t k; + int32_t len; + uint32_t xo; // higher 31 bits: anti diagonal; lower 1 bit: out-of-order or not + int32_t t; +} gwf_diag_t; + +typedef kvec_t(gwf_diag_t) gwf_diag_v; + +#define ed_key(x) ((x).vd) +KRADIX_SORT_INIT(gwf_ed, gwf_diag_t, ed_key, 8) + +KDQ_INIT(gwf_diag_t) + +// push (v,d,k) to the end of the queue +static inline void gwf_diag_push(void *km, gwf_diag_v *a, uint32_t v, int32_t d, int32_t k, uint32_t x, uint32_t ooo, int32_t t) +{ + gwf_diag_t *p; + kv_pushp(gwf_diag_t, km, *a, &p); + p->vd = gwf_gen_vd(v, d), p->k = k, p->xo = x<<1|ooo, p->t = t; +} + +// determine the wavefront on diagonal (v,d) +static inline int32_t gwf_diag_update(gwf_diag_t *p, uint32_t v, int32_t d, int32_t k, uint32_t x, uint32_t ooo, int32_t t) +{ + uint64_t vd = gwf_gen_vd(v, d); + if (p->vd == vd) { + p->xo = p->k > k? p->xo : x<<1|ooo; + p->t = p->k > k? p->t : t; + p->k = p->k > k? p->k : k; + return 0; + } + return 1; +} + +static int gwf_diag_is_sorted(int32_t n_a, const gwf_diag_t *a) +{ + int32_t i; + for (i = 1; i < n_a; ++i) + if (a[i-1].vd > a[i].vd) break; + return (i == n_a); +} + +// sort a[]. This uses the gwf_diag_t::ooo field to speed up sorting. +static void gwf_diag_sort(int32_t n_a, gwf_diag_t *a, void *km, gwf_diag_v *ooo) +{ + int32_t i, j, k, n_b, n_c; + gwf_diag_t *b, *c; + + kv_resize(gwf_diag_t, km, *ooo, n_a); + for (i = 0, n_c = 0; i < n_a; ++i) + if (a[i].xo&1) ++n_c; + n_b = n_a - n_c; + b = ooo->a, c = b + n_b; + for (i = j = k = 0; i < n_a; ++i) { + if (a[i].xo&1) c[k++] = a[i]; + else b[j++] = a[i]; + } + radix_sort_gwf_ed(c, c + n_c); + for (k = 0; k < n_c; ++k) c[k].xo &= 0xfffffffeU; + + i = j = k = 0; + while (i < n_b && j < n_c) { + if (b[i].vd <= c[j].vd) + a[k++] = b[i++]; + else a[k++] = c[j++]; + } + while (i < n_b) a[k++] = b[i++]; + while (j < n_c) a[k++] = c[j++]; +} + +// remove diagonals not on the wavefront +static int32_t gwf_diag_dedup(int32_t n_a, gwf_diag_t *a, void *km, gwf_diag_v *ooo) +{ + int32_t i, n, st; + if (!gwf_diag_is_sorted(n_a, a)) + gwf_diag_sort(n_a, a, km, ooo); + for (i = 1, st = 0, n = 0; i <= n_a; ++i) { + if (i == n_a || a[i].vd != a[st].vd) { + int32_t j, max_j = st; + if (st + 1 < i) + for (j = st + 1; j < i; ++j) // choose the far end (i.e. the wavefront) + if (a[max_j].k < a[j].k) max_j = j; + a[n++] = a[max_j]; + st = i; + } + } + return n; +} + +// use forbidden bands to remove diagonals not on the wavefront +static int32_t gwf_mixed_dedup(int32_t n_a, gwf_diag_t *a, int32_t n_b, gwf_intv_t *b) +{ + int32_t i = 0, j = 0, k = 0; + while (i < n_a && j < n_b) { + if (a[i].vd >= b[j].vd0 && a[i].vd < b[j].vd1) ++i; + else if (a[i].vd >= b[j].vd1) ++j; + else a[k++] = a[i++]; + } + while (i < n_a) a[k++] = a[i++]; + return k; +} + +/* + * Traceback stack + */ +KHASHL_MAP_INIT(KH_LOCAL, gwf_map64_t, gwf_map64, uint64_t, int32_t, kh_hash_uint64, kh_eq_generic) + +typedef struct { + int32_t v; + int32_t pre; +} gwf_trace_t; + +typedef kvec_t(gwf_trace_t) gwf_trace_v; + +static int32_t gwf_trace_push(void *km, gwf_trace_v *a, int32_t v, int32_t pre, gwf_map64_t *h) +{ + uint64_t key = (uint64_t)v << 32 | (uint32_t)pre; + khint_t k; + int absent; + k = gwf_map64_put(h, key, &absent); + if (absent) { + gwf_trace_t *p; + kv_pushp(gwf_trace_t, km, *a, &p); + p->v = v, p->pre = pre; + kh_val(h, k) = a->n - 1; + return a->n - 1; + } + return kh_val(h, k); +} + +/* + * Core GWFA routine + */ +KHASHL_INIT(KH_LOCAL, gwf_set64_t, gwf_set64, uint64_t, kh_hash_dummy, kh_eq_generic) + +typedef struct { + void *km; + gwf_set64_t *ha; // hash table for adjacency + gwf_map64_t *ht; // hash table for traceback + gwf_intv_v intv; + gwf_intv_v tmp, swap; + gwf_diag_v ooo; + gwf_trace_v t; +} gwf_edbuf_t; + +// remove diagonals not on the wavefront +static int32_t gwf_dedup(gwf_edbuf_t *buf, int32_t n_a, gwf_diag_t *a) +{ + if (buf->intv.n + buf->tmp.n > 0) { + if (!gwf_intv_is_sorted(buf->tmp.n, buf->tmp.a)) + radix_sort_gwf_intv(buf->tmp.a, buf->tmp.a + buf->tmp.n); + kv_copy(gwf_intv_t, buf->km, buf->swap, buf->intv); + kv_resize(gwf_intv_t, buf->km, buf->intv, buf->intv.n + buf->tmp.n); + buf->intv.n = gwf_intv_merge2(buf->intv.a, buf->swap.n, buf->swap.a, buf->tmp.n, buf->tmp.a); + } + n_a = gwf_diag_dedup(n_a, a, buf->km, &buf->ooo); + if (buf->intv.n > 0) + n_a = gwf_mixed_dedup(n_a, a, buf->intv.n, buf->intv.a); + return n_a; +} + +// remove diagonals that lag far behind the furthest wavefront +static int32_t gwf_prune(int32_t n_a, gwf_diag_t *a, uint32_t max_lag, int32_t bw_dyn) +{ + int32_t i, j, iq, dq, max_i = -1; + uint32_t max_x = 0; + gwf_diag_t *q; + for (i = 0; i < n_a; ++i) + if (a[i].xo>>1 > max_x) + max_x = a[i].xo>>1, max_i = i; + q = &a[max_i]; + iq = (int32_t)q->vd - GWF_DIAG_SHIFT + q->k; + dq = (int32_t)(q->xo>>1) - iq - iq; + for (i = j = 0; i < n_a; ++i) { + gwf_diag_t *p = &a[i]; + int32_t ip = (int32_t)p->vd - GWF_DIAG_SHIFT + p->k; + int32_t dp = (int32_t)(p->xo>>1) - ip - ip; + int32_t w = dp > dq? dp - dq : dq - dp; + if (bw_dyn >= 0 && w > bw_dyn) continue; + if ((p->xo>>1) + max_lag < max_x) continue; + a[j++] = *p; + } + return j; +} + +// reach the wavefront +static inline int32_t gwf_extend1(int32_t d, int32_t k, int32_t vl, const char *ts, int32_t ql, const char *qs) +{ + int32_t max_k = (ql - d < vl? ql - d : vl) - 1; + const char *ts_ = ts + 1, *qs_ = qs + d + 1; +#if 0 + // int32_t i = k + d; while (k + 1 < vl && i + 1 < ql && ts[k+1] == q[i+1]) ++k, ++i; + while (k < max_k && *(ts_ + k) == *(qs_ + k)) + ++k; +#else + uint64_t cmp = 0; + while (k + 7 < max_k) { + uint64_t x = *(uint64_t*)(ts_ + k); // warning: unaligned memory access + uint64_t y = *(uint64_t*)(qs_ + k); + cmp = x ^ y; + if (cmp == 0) k += 8; + else break; + } + if (cmp) + k += __builtin_ctzl(cmp) >> 3; // on x86, this is done via the BSR instruction: https://www.felixcloutier.com/x86/bsr + else if (k + 7 >= max_k) + while (k < max_k && *(ts_ + k) == *(qs_ + k)) // use this for generic CPUs. It is slightly faster than the unoptimized version + ++k; +#endif + return k; +} + +// This is essentially Landau-Vishkin for linear sequences. The function speeds up alignment to long vertices. Not really necessary. +static void gwf_ed_extend_batch(void *km, const gfa_t *g, const gfa_edseq_t *es, int32_t ql, const char *q, int32_t n, gwf_diag_t *a, gwf_diag_v *B, + kdq_t(gwf_diag_t) *A, gwf_intv_v *tmp_intv, gfa_edrst_t *r) +{ + int32_t j, m; + int32_t v = a->vd>>32; + int32_t vl = es[v].len; + const char *ts = es[v].seq; + gwf_diag_t *b; + + // wfa_extend + for (j = 0; j < n; ++j) { + int32_t k; + k = gwf_extend1((int32_t)a[j].vd - GWF_DIAG_SHIFT, a[j].k, vl, ts, ql, q); + a[j].len = k - a[j].k; + a[j].xo += a[j].len << 2; + a[j].k = k; + } + + // wfa_next + kv_resize(gwf_diag_t, km, *B, B->n + n + 2); + b = &B->a[B->n]; + b[0].vd = a[0].vd - 1; + b[0].xo = a[0].xo + 2; // 2 == 1<<1 + b[0].k = a[0].k + 1; + b[0].t = a[0].t; + b[1].vd = a[0].vd; + b[1].xo = n == 1 || a[0].k > a[1].k? a[0].xo + 4 : a[1].xo + 2; + b[1].t = n == 1 || a[0].k > a[1].k? a[0].t : a[1].t; + b[1].k = (n == 1 || a[0].k > a[1].k? a[0].k : a[1].k) + 1; + for (j = 1; j < n - 1; ++j) { + uint32_t x = a[j-1].xo + 2; + int32_t k = a[j-1].k, t = a[j-1].t; + x = k > a[j].k + 1? x : a[j].xo + 4; + t = k > a[j].k + 1? t : a[j].t; + k = k > a[j].k + 1? k : a[j].k + 1; + x = k > a[j+1].k + 1? x : a[j+1].xo + 2; + t = k > a[j+1].k + 1? t : a[j+1].t; + k = k > a[j+1].k + 1? k : a[j+1].k + 1; + b[j+1].vd = a[j].vd, b[j+1].k = k, b[j+1].xo = x, b[j+1].t = t; + } + if (n >= 2) { + b[n].vd = a[n-1].vd; + b[n].xo = a[n-2].k > a[n-1].k + 1? a[n-2].xo + 2 : a[n-1].xo + 4; + b[n].t = a[n-2].k > a[n-1].k + 1? a[n-2].t : a[n-1].t; + b[n].k = a[n-2].k > a[n-1].k + 1? a[n-2].k : a[n-1].k + 1; + } + b[n+1].vd = a[n-1].vd + 1; + b[n+1].xo = a[n-1].xo + 2; + b[n+1].t = a[n-1].t; + b[n+1].k = a[n-1].k; + + // drop out-of-bound cells + //if (a[n-1].k == vl - 1) b[n+1].k = vl; // insertion to the end of a vertex is handled elsewhere. FIXME: this line leads to wrong result for MHC-57 and MHC-HG002.2 + for (j = 0; j < n; ++j) { + gwf_diag_t *p = &a[j]; + if (p->k == vl - 1 || (int32_t)p->vd - GWF_DIAG_SHIFT + p->k == ql - 1) + p->xo |= 1, *kdq_pushp(gwf_diag_t, A) = *p; + } + for (j = 0, m = 0; j < n + 2; ++j) { + gwf_diag_t *p = &b[j]; + int32_t d = (int32_t)p->vd - GWF_DIAG_SHIFT; + if (d + p->k < ql && p->k < vl) { + b[m++] = *p; + } else if (p->k == vl) { + gwf_intv_t *q; + kv_pushp(gwf_intv_t, km, *tmp_intv, &q); + q->vd0 = gwf_gen_vd(v, d), q->vd1 = q->vd0 + 1; + } + } + B->n += m; +} + +// wfa_extend and wfa_next combined +static gwf_diag_t *gwf_ed_extend(gwf_edbuf_t *buf, const gfa_edopt_t *opt, const gfa_t *g, const gfa_edseq_t *es, int32_t s, int32_t ql, const char *q, + uint32_t v1, int32_t off1, int32_t *end_tb, int32_t *n_a_, gwf_diag_t *a, gfa_edrst_t *r) +{ + int32_t i, x, n = *n_a_, do_dedup = 1; + kdq_t(gwf_diag_t) *A; + gwf_diag_v B = {0,0,0}; + gwf_diag_t *b; + + r->end_v = (uint32_t)-1; + r->end_off = *end_tb = -1; + buf->tmp.n = 0; + gwf_set64_clear(buf->ha); // hash table $h to avoid visiting a vertex twice + for (i = 0, x = 1; i < 32; ++i, x <<= 1) + if (x >= n) break; + if (i < 4) i = 4; + A = kdq_init2(gwf_diag_t, buf->km, i); // $A is a queue + kv_resize(gwf_diag_t, buf->km, B, n * 2); +#if 0 // unoptimized version without calling gwf_ed_extend_batch() at all. The final result will be the same. + A->count = n; + memcpy(A->a, a, n * sizeof(*a)); +#else // optimized for long vertices. + for (x = 0, i = 1; i <= n; ++i) { + if (i == n || a[i].vd != a[i-1].vd + 1) { + gwf_ed_extend_batch(buf->km, g, es, ql, q, i - x, &a[x], &B, A, &buf->tmp, r); + x = i; + } + } + if (kdq_size(A) == 0) do_dedup = 0; +#endif + kfree(buf->km, a); // $a is not used as it has been copied to $A + + while (kdq_size(A)) { + gwf_diag_t t; + uint32_t v, x0; + int32_t ooo, d, k, i, vl; + + t = *kdq_shift(gwf_diag_t, A); + ooo = t.xo&1, v = t.vd >> 32; // vertex + d = (int32_t)t.vd - GWF_DIAG_SHIFT; // diagonal + k = t.k; // wavefront position on the vertex + vl = es[v].len; // $vl is the vertex length + k = gwf_extend1(d, k, vl, es[v].seq, ql, q); + i = k + d; // query position + x0 = (t.xo >> 1) + ((k - t.k) << 1); // current anti diagonal + + if (k + 1 < vl && i + 1 < ql) { // the most common case: the wavefront is in the middle + int32_t push1 = 1, push2 = 1; + if (B.n >= 2) push1 = gwf_diag_update(&B.a[B.n - 2], v, d-1, k+1, x0 + 1, ooo, t.t); + if (B.n >= 1) push2 = gwf_diag_update(&B.a[B.n - 1], v, d, k+1, x0 + 2, ooo, t.t); + if (push1) gwf_diag_push(buf->km, &B, v, d-1, k+1, x0 + 1, 1, t.t); + if (push2 || push1) gwf_diag_push(buf->km, &B, v, d, k+1, x0 + 2, 1, t.t); + gwf_diag_push(buf->km, &B, v, d+1, k, x0 + 1, ooo, t.t); + } else if (i + 1 < ql) { // k + 1 == g->len[v]; reaching the end of the vertex but not the end of query + int32_t nv = gfa_arc_n(g, v), j, n_ext = 0, tw = -1; + gfa_arc_t *av = gfa_arc_a(g, v); + gwf_intv_t *p; + kv_pushp(gwf_intv_t, buf->km, buf->tmp, &p); + p->vd0 = gwf_gen_vd(v, d), p->vd1 = p->vd0 + 1; + if (opt->traceback) tw = gwf_trace_push(buf->km, &buf->t, v, t.t, buf->ht); + for (j = 0; j < nv; ++j) { // traverse $v's neighbors + uint32_t w = av[j].w; // $w is next to $v + int32_t ol = av[j].ow; + int absent; + gwf_set64_put(buf->ha, (uint64_t)w<<32 | (i + 1), &absent); // test if ($w,$i) has been visited + if (q[i + 1] == es[w].seq[ol]) { // can be extended to the next vertex without a mismatch + ++n_ext; + if (absent) { + gwf_diag_t *p; + p = kdq_pushp(gwf_diag_t, A); + p->vd = gwf_gen_vd(w, i + 1 - ol), p->k = ol, p->xo = (x0+2)<<1 | 1, p->t = tw; + } + } else if (absent) { + gwf_diag_push(buf->km, &B, w, i - ol, ol, x0 + 1, 1, tw); + gwf_diag_push(buf->km, &B, w, i + 1 - ol, ol, x0 + 2, 1, tw); + } + } + if (nv == 0 || n_ext != nv) // add an insertion to the target; this *might* cause a duplicate in corner cases + gwf_diag_push(buf->km, &B, v, d+1, k, x0 + 1, 1, t.t); + } else if (v1 == (uint32_t)-1 || (v == v1 && k == off1)) { // i + 1 == ql + r->end_v = v, r->end_off = k, r->wlen = x0 - i - 1, *end_tb = t.t, *n_a_ = 0; + kdq_destroy(gwf_diag_t, A); + kfree(buf->km, B.a); + return 0; + } else if (k + 1 < vl) { // i + 1 == ql; reaching the end of the query but not the end of the vertex + gwf_diag_push(buf->km, &B, v, d-1, k+1, x0 + 1, ooo, t.t); // add an deletion; this *might* case a duplicate in corner cases + } else if (v != v1) { // i + 1 == ql && k + 1 == g->len[v]; not reaching the last vertex $v1 + int32_t nv = gfa_arc_n(g, v), j, tw = -1; + const gfa_arc_t *av = gfa_arc_a(g, v); + if (opt->traceback) tw = gwf_trace_push(buf->km, &buf->t, v, t.t, buf->ht); + for (j = 0; j < nv; ++j) + gwf_diag_push(buf->km, &B, av[j].w, i - av[j].ow, av[j].ow, x0 + 1, 1, tw); // deleting the first base on the next vertex + } else { // may come here when k>off1 (due to banding); do nothing in this case + } + } + + kdq_destroy(gwf_diag_t, A); + *n_a_ = n = B.n, b = B.a; + + if (do_dedup) *n_a_ = n = gwf_dedup(buf, n, b); + if (opt->max_lag > 0 && n > opt->max_chk && ((s+1)&0xf) == 0) + *n_a_ = n = gwf_prune(n, b, opt->max_lag, opt->bw_dyn); + return b; +} + +static void gwf_traceback(gwf_edbuf_t *buf, int32_t end_v, int32_t end_tb, gfa_edrst_t *path) +{ + int32_t i = end_tb, n = 1; + while (i >= 0 && buf->t.a[i].v >= 0) + ++n, i = buf->t.a[i].pre; + KMALLOC(buf->km, path->v, n); + i = end_tb, n = 0; + path->v[n++] = end_v; + while (i >= 0 && buf->t.a[i].v >= 0) + path->v[n++] = buf->t.a[i].v, i = buf->t.a[i].pre; + path->nv = n; + for (i = 0; i < path->nv>>1; ++i) + n = path->v[i], path->v[i] = path->v[path->nv - 1 - i], path->v[path->nv - 1 - i] = n; +} + +static void gwf_ed_print_diag(const gfa_t *g, size_t n, gwf_diag_t *a) // for debugging only +{ + size_t i; + for (i = 0; i < n; ++i) { + int32_t d = (int32_t)a[i].vd - GWF_DIAG_SHIFT; + printf("Z\t%d\t%s\t%d\t%d\t%d\n", d + a[i].k, g->seg[(a[i].vd>>32)>>1].name, d, a[i].k, a[i].xo>>1); + } +} + +static void gwf_ed_print_intv(size_t n, gwf_intv_t *a) // for debugging only +{ + size_t i; + for (i = 0; i < n; ++i) + printf("Z\t%d\t%d\t%d\n", (int32_t)(a[i].vd0>>32), (int32_t)a[i].vd0 - GWF_DIAG_SHIFT, (int32_t)a[i].vd1 - GWF_DIAG_SHIFT); +} + +typedef struct { + const gfa_t *g; + const gfa_edseq_t *es; + const gfa_edopt_t *opt; + int32_t ql; + const char *q; + gwf_edbuf_t buf; + int32_t s, n_a; + gwf_diag_t *a; + int32_t end_tb; +} gfa_edbuf_t; + +void *gfa_ed_init(void *km, const gfa_edopt_t *opt, const gfa_t *g, const gfa_edseq_t *es, int32_t ql, const char *q, uint32_t v0, int32_t off0) +{ + gfa_edbuf_t *z; + KCALLOC(km, z, 1); + z->buf.km = km; + z->opt = opt; + z->g = g, z->es = es; + z->ql = ql, z->q = q; + z->buf.ha = gwf_set64_init2(km); + z->buf.ht = gwf_map64_init2(km); + kv_resize(gwf_trace_t, km, z->buf.t, 16); + KCALLOC(km, z->a, 1); + z->a[0].vd = gwf_gen_vd(v0, -off0), z->a[0].k = off0 - 1, z->a[0].xo = 0; + if (z->opt->traceback) z->a[0].t = gwf_trace_push(km, &z->buf.t, -1, -1, z->buf.ht); + z->n_a = 1; + return z; +} + +void gfa_ed_step(void *z_, uint32_t v1, int32_t off1, int32_t s_term, gfa_edrst_t *r) +{ + gfa_edbuf_t *z = (gfa_edbuf_t*)z_; + const gfa_edopt_t *opt = z->opt; + if (s_term < 0 && z->opt->s_term >= 0) s_term = z->opt->s_term; + r->n_end = 0, r->n_iter = 0; + while (z->n_a > 0) { + z->a = gwf_ed_extend(&z->buf, opt, z->g, z->es, z->s, z->ql, z->q, v1, off1, &z->end_tb, &z->n_a, z->a, r); + r->n_iter += z->n_a; // + z->buf.intv.n; + if (r->end_off >= 0 || z->n_a == 0) break; + if (r->n_end > 0) break; + if (s_term >= 0 && z->s >= s_term) break; + if (z->opt->i_term > 0 && r->n_iter > z->opt->i_term) break; + ++z->s; + if (gfa_ed_dbg >= 1) { + printf("[%s] dist=%d, n=%d, n_intv=%ld, n_tb=%ld\n", __func__, z->s, z->n_a, z->buf.intv.n, z->buf.t.n); + if (gfa_ed_dbg == 2) gwf_ed_print_diag(z->g, z->n_a, z->a); + if (gfa_ed_dbg == 3) gwf_ed_print_intv(z->buf.intv.n, z->buf.intv.a); + } + } + if (opt->traceback && r->end_off >= 0) + gwf_traceback(&z->buf, r->end_v, z->end_tb, r); + r->s = r->end_v != (uint32_t)-1? z->s : -1; +} + +void gfa_ed_destroy(void *z_) +{ + gfa_edbuf_t *z = (gfa_edbuf_t*)z_; + void *km = z->buf.km; + kfree(km, z->a); + gwf_set64_destroy(z->buf.ha); + gwf_map64_destroy(z->buf.ht); + kfree(km, z->buf.ooo.a); + kfree(km, z->buf.intv.a); + kfree(km, z->buf.tmp.a); + kfree(km, z->buf.swap.a); + kfree(km, z->buf.t.a); + kfree(km, z); +} + +int32_t gfa_edit_dist(void *km, const gfa_edopt_t *opt, const gfa_t *g, const gfa_edseq_t *es, int32_t ql, const char *q, uint32_t v0, int32_t off0, gfa_edrst_t *rst) +{ + void *z; + z = gfa_ed_init(km, opt, g, es, ql, q, v0, off0); + gfa_ed_step(z, (uint32_t)-1, -1, -1, rst); + gfa_ed_destroy(z); + return rst->s; +} diff --git a/gfa-io.c b/gfa-io.c new file mode 100644 index 0000000..4c19ecd --- /dev/null +++ b/gfa-io.c @@ -0,0 +1,395 @@ +#include +#include +#include +#include +#include +#include "kstring.h" +#include "gfa-priv.h" + +#include "kseq.h" +KSTREAM_INIT(gzFile, gzread, 65536) + +/*********** + * Tag I/O * + ***********/ + +int gfa_aux_parse(char *s, uint8_t **data, int *max) +{ + char *q, *p; + kstring_t str; + if (s == 0) return 0; + str.l = 0, str.m = *max, str.s = (char*)*data; + if (*s == '\t') ++s; + for (p = q = s;; ++p) { + if (*p == 0 || *p == '\t') { + int c = *p; + *p = 0; + if (p - q >= 5 && q[2] == ':' && q[4] == ':' && (q[3] == 'A' || q[3] == 'i' || q[3] == 'f' || q[3] == 'Z' || q[3] == 'B')) { + int type = q[3]; + kputsn_(q, 2, &str); + q += 5; + if (type == 'A') { + kputc_('A', &str); + kputc_(*q, &str); + } else if (type == 'i') { + int32_t x; + x = strtol(q, &q, 10); + kputc_(type, &str); kputsn_((char*)&x, 4, &str); + } else if (type == 'f') { + float x; + x = strtod(q, &q); + kputc_('f', &str); kputsn_(&x, 4, &str); + } else if (type == 'Z') { + kputc_('Z', &str); kputsn_(q, p - q + 1, &str); // note that this include the trailing NULL + } else if (type == 'B') { + type = *q++; // q points to the first ',' following the typing byte + if (p - q >= 2 && (type == 'c' || type == 'C' || type == 's' || type == 'S' || type == 'i' || type == 'I' || type != 'f')) { + int32_t n; + char *r; + for (r = q, n = 0; *r; ++r) + if (*r == ',') ++n; + kputc_('B', &str); kputc_(type, &str); kputsn_(&n, 4, &str); + // TODO: to evaluate which is faster: a) aligned array and then memmove(); b) unaligned array; c) kputsn_() + if (type == 'c') while (q + 1 < p) { int8_t x = strtol(q + 1, &q, 0); kputc_(x, &str); } + else if (type == 'C') while (q + 1 < p) { uint8_t x = strtol(q + 1, &q, 0); kputc_(x, &str); } + else if (type == 's') while (q + 1 < p) { int16_t x = strtol(q + 1, &q, 0); kputsn_(&x, 2, &str); } + else if (type == 'S') while (q + 1 < p) { uint16_t x = strtol(q + 1, &q, 0); kputsn_(&x, 2, &str); } + else if (type == 'i') while (q + 1 < p) { int32_t x = strtol(q + 1, &q, 0); kputsn_(&x, 4, &str); } + else if (type == 'I') while (q + 1 < p) { uint32_t x = strtol(q + 1, &q, 0); kputsn_(&x, 4, &str); } + else if (type == 'f') while (q + 1 < p) { float x = strtod(q + 1, &q); kputsn_(&x, 4, &str); } + } + } // should not be here, as we have tested all types + } + q = p + 1; + if (c == 0) break; + } + } + if (str.l > 0 && str.l == str.m) ks_resize(&str, str.l + 1); + if (str.s) str.s[str.l] = 0; + *max = str.m, *data = (uint8_t*)str.s; + return str.l; +} + +int gfa_aux_format(int l_aux, const uint8_t *aux, char **t, int *max) +{ + kstring_t str; + const uint8_t *s = aux; + str.l = 0, str.s = *t, str.m = *max; + while (s < aux + l_aux) { + uint8_t type, key[2]; + key[0] = s[0]; key[1] = s[1]; + s += 2; type = *s++; + kputc('\t', &str); kputsn((char*)key, 2, &str); kputc(':', &str); + if (type == 'A') { kputsn("A:", 2, &str); kputc(*s, &str); ++s; } + else if (type == 'i') { kputsn("i:", 2, &str); kputw(*(int32_t*)s, &str); s += 4; } + else if (type == 'f') { ksprintf(&str, "f:%g", *(float*)s); s += 4; } + else if (type == 'Z') { kputc(type, &str); kputc(':', &str); while (*s) kputc(*s++, &str); ++s; } + else if (type == 'B') { + uint8_t sub_type = *(s++); + int32_t i, n; + memcpy(&n, s, 4); + s += 4; // no point to the start of the array + kputsn("B:", 2, &str); kputc(sub_type, &str); // write the typing + for (i = 0; i < n; ++i) { // FIXME: for better performance, put the loop after "if" + kputc(',', &str); + if ('c' == sub_type) { kputw(*(int8_t*)s, &str); ++s; } + else if ('C' == sub_type) { kputw(*(uint8_t*)s, &str); ++s; } + else if ('s' == sub_type) { kputw(*(int16_t*)s, &str); s += 2; } + else if ('S' == sub_type) { kputw(*(uint16_t*)s, &str); s += 2; } + else if ('i' == sub_type) { kputw(*(int32_t*)s, &str); s += 4; } + else if ('I' == sub_type) { kputuw(*(uint32_t*)s, &str); s += 4; } + else if ('f' == sub_type) { ksprintf(&str, "%g", *(float*)s); s += 4; } + } + } + } + *t = str.s, *max = str.m; + return str.l; +} + +/**************** + * Line parsers * + ****************/ + +int gfa_parse_S(gfa_t *g, char *s) +{ + int i, is_ok = 0; + char *p, *q, *seg = 0, *seq = 0, *rest = 0; + uint32_t sid, len = 0; + for (i = 0, p = q = s + 2;; ++p) { + if (*p == 0 || *p == '\t') { + int c = *p; + *p = 0; + if (i == 0) seg = q; + else if (i == 1) { + seq = q[0] == '*'? 0 : gfa_strdup(q); + is_ok = 1, rest = c? p + 1 : 0; + break; + } + ++i, q = p + 1; + if (c == 0) break; + } + } + if (is_ok) { // all mandatory fields read + int l_aux, m_aux = 0, LN = -1; + uint8_t *aux = 0, *s_LN = 0; + gfa_seg_t *s; + l_aux = gfa_aux_parse(rest, &aux, &m_aux); // parse optional tags + s_LN = l_aux? gfa_aux_get(l_aux, aux, "LN") : 0; + if (s_LN && s_LN[0] == 'i') { + LN = *(int32_t*)(s_LN + 1); + l_aux = gfa_aux_del(l_aux, aux, s_LN); + } + if (seq == 0) { + if (LN >= 0) len = LN; + } else len = strlen(seq); + if (LN >= 0 && len != LN && gfa_verbose >= 2) + fprintf(stderr, "[W] for segment '%s', LN:i:%d tag is different from sequence length %d\n", seg, LN, len); + sid = gfa_add_seg(g, seg); + s = &g->seg[sid]; + s->len = len, s->seq = seq; + if (l_aux > 0) { + uint8_t *s_SN = 0, *s_SO = 0, *s_SR = 0; + s_SN = gfa_aux_get(l_aux, aux, "SN"); + if (s_SN && *s_SN == 'Z') { // then parse stable tags + s->snid = gfa_sseq_add(g, (char*)(s_SN + 1)), s->soff = 0; + l_aux = gfa_aux_del(l_aux, aux, s_SN); + s_SO = gfa_aux_get(l_aux, aux, "SO"); + if (s_SO && *s_SO == 'i') { + s->soff = *(int32_t*)(s_SO + 1); + l_aux = gfa_aux_del(l_aux, aux, s_SO); + } + } + s_SR = gfa_aux_get(l_aux, aux, "SR"); + if (s_SR && *s_SR == 'i') { + s->rank = *(int32_t*)(s_SR + 1); + if (s->rank > g->max_rank) g->max_rank = s->rank; + l_aux = gfa_aux_del(l_aux, aux, s_SR); + } + gfa_sseq_update(g, s); + } + if (l_aux > 0) + s->aux.m_aux = m_aux, s->aux.l_aux = l_aux, s->aux.aux = aux; + else if (aux) + free(aux); + } else return -1; + return 0; +} + +int gfa_parse_L(gfa_t *g, char *s) +{ + int i, oriv = -1, oriw = -1, is_ok = 0; + char *p, *q, *segv = 0, *segw = 0, *rest = 0; + int32_t ov = INT32_MAX, ow = INT32_MAX; + for (i = 0, p = q = s + 2;; ++p) { + if (*p == 0 || *p == '\t') { + int c = *p; + *p = 0; + if (i == 0) { + segv = q; + } else if (i == 1) { + if (*q != '+' && *q != '-') return -2; + oriv = (*q != '+'); + } else if (i == 2) { + segw = q; + } else if (i == 3) { + if (*q != '+' && *q != '-') return -2; + oriw = (*q != '+'); + } else if (i == 4) { + if (*q == '*') { + ov = ow = 0; + } else if (*q == ':') { + ov = INT32_MAX; + ow = isdigit(*(q+1))? strtol(q+1, &q, 10) : INT32_MAX; + } else if (isdigit(*q)) { + char *r; + ov = strtol(q, &r, 10); + if (isupper(*r)) { // CIGAR + ov = ow = 0; + do { + long l; + l = strtol(q, &q, 10); + if (*q == 'M' || *q == 'D' || *q == 'N') ov += l; + if (*q == 'M' || *q == 'I' || *q == 'S') ow += l; + ++q; + } while (isdigit(*q)); + } else if (*r == ':') { // overlap lengths + ow = isdigit(*(r+1))? strtol(r+1, &r, 10) : INT32_MAX; + } else break; + } else break; + is_ok = 1, rest = c? p + 1 : 0; + break; + } + ++i, q = p + 1; + if (c == 0) break; + } + } + if (i == 4 && is_ok == 0) ov = ow = 0, is_ok = 1; // no overlap field + if (is_ok) { + uint32_t v, w; + int l_aux, m_aux = 0; + uint8_t *aux = 0; + gfa_arc_t *arc; + v = gfa_add_seg(g, segv) << 1 | oriv; + w = gfa_add_seg(g, segw) << 1 | oriw; + arc = gfa_add_arc1(g, v, w, ov, ow, -1, 0); + l_aux = gfa_aux_parse(rest, &aux, &m_aux); // parse optional tags + if (l_aux) { + gfa_aux_t *a = &g->link_aux[arc->link_id]; + uint8_t *s_L1, *s_L2, *s_SR; + a->l_aux = l_aux, a->m_aux = m_aux, a->aux = aux; + s_SR = gfa_aux_get(a->l_aux, a->aux, "SR"); + if (s_SR && s_SR[0] == 'i') { + arc->rank = *(int32_t*)(s_SR+1); + a->l_aux = gfa_aux_del(a->l_aux, a->aux, s_SR); + } + s_L1 = gfa_aux_get(a->l_aux, a->aux, "L1"); + if (s_L1) { + if (ov != INT32_MAX && s_L1[0] == 'i') + g->seg[v>>1].len = g->seg[v>>1].len > ov + *(int32_t*)(s_L1+1)? g->seg[v>>1].len : ov + *(int32_t*)(s_L1+1); + a->l_aux = gfa_aux_del(a->l_aux, a->aux, s_L1); + } + s_L2 = gfa_aux_get(a->l_aux, a->aux, "L2"); + if (s_L2) { + if (ow != INT32_MAX && s_L2[0] == 'i') + g->seg[w>>1].len = g->seg[w>>1].len > ow + *(int32_t*)(s_L2+1)? g->seg[w>>1].len : ow + *(int32_t*)(s_L2+1); + a->l_aux = gfa_aux_del(a->l_aux, a->aux, s_L2); + } + if (a->l_aux == 0) { + free(a->aux); + a->aux = 0, a->m_aux = 0; + } + } + } else return -1; + return 0; +} + +static gfa_seg_t *gfa_parse_fa_hdr(gfa_t *g, char *s) +{ + int32_t i; + char buf[16]; + gfa_seg_t *seg; + for (i = 0; s[i]; ++i) + if (isspace(s[i])) break; + s[i] = 0; + sprintf(buf, "s%d", g->n_seg + 1); + i = gfa_add_seg(g, buf); + seg = &g->seg[i]; + seg->snid = gfa_sseq_add(g, s + 1); + seg->soff = seg->rank = 0; + return seg; +} + +static void gfa_update_fa_seq(gfa_t *g, gfa_seg_t *seg, int32_t l_seq, const char *seq) +{ + if (seg == 0) return; + seg->seq = gfa_strdup(seq); + seg->len = l_seq; + gfa_sseq_update(g, seg); +} + +/**************** + * User-end I/O * + ****************/ + +gfa_t *gfa_read(const char *fn) +{ + gzFile fp; + gfa_t *g; + kstring_t s = {0,0,0}, fa_seq = {0,0,0}; + kstream_t *ks; + int dret, is_fa = 0; + gfa_seg_t *fa_seg = 0; + uint64_t lineno = 0; + + fp = fn && strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(0, "r"); + if (fp == 0) return 0; + ks = ks_init(fp); + g = gfa_init(); + while (ks_getuntil(ks, KS_SEP_LINE, &s, &dret) >= 0) { + int ret = 0; + ++lineno; + if (s.l > 0 && s.s[0] == '>') { // FASTA header + is_fa = 1; + if (fa_seg) gfa_update_fa_seq(g, fa_seg, fa_seq.l, fa_seq.s); + fa_seg = gfa_parse_fa_hdr(g, s.s); + fa_seq.l = 0; + } else if (is_fa) { // FASTA mode + if (s.l >= 3 && s.s[1] == '\t') { // likely a GFA line + gfa_update_fa_seq(g, fa_seg, fa_seq.l, fa_seq.s); // finalize fa_seg + fa_seg = 0; + is_fa = 0; + } else kputsn(s.s, s.l, &fa_seq); // likely a FASTA sequence line + } + if (is_fa) continue; + if (s.l < 3 || s.s[1] != '\t') continue; // empty line + if (s.s[0] == 'S') ret = gfa_parse_S(g, s.s); + else if (s.s[0] == 'L') ret = gfa_parse_L(g, s.s); + if (ret < 0 && gfa_verbose >= 1) + fprintf(stderr, "[E] invalid %c-line at line %ld (error code %d)\n", s.s[0], (long)lineno, ret); + } + if (is_fa && fa_seg) gfa_update_fa_seq(g, fa_seg, fa_seq.l, fa_seq.s); + free(fa_seq.s); + free(s.s); + gfa_finalize(g); + ks_destroy(ks); + gzclose(fp); + return g; +} + +void gfa_print(const gfa_t *g, FILE *fp, int flag) +{ + uint32_t i; + uint64_t k; + for (i = 0; i < g->n_seg; ++i) { + const gfa_seg_t *s = &g->seg[i]; + if (s->del) continue; + fprintf(fp, "S\t%s\t", s->name); + if (s->seq && !(flag & GFA_O_NO_SEQ)) fputs(s->seq, fp); + else fputc('*', fp); + fprintf(fp, "\tLN:i:%d", s->len); + if (s->snid >= 0 && s->soff >= 0) + fprintf(fp, "\tSN:Z:%s\tSO:i:%d", g->sseq[s->snid].name, s->soff); + if (s->rank >= 0) + fprintf(fp, "\tSR:i:%d", s->rank); + if (s->utg && s->utg->n) fprintf(fp, "\tRC:i:%d\tlc:i:%d", s->utg->n, s->utg->len_comp); + if (s->aux.l_aux > 0) { + char *t = 0; + int max = 0; + gfa_aux_format(s->aux.l_aux, s->aux.aux, &t, &max); + fputs(t, fp); + free(t); + } + fputc('\n', fp); + if (s->utg && s->utg->n) { + uint32_t j, l; + for (j = l = 0; j < s->utg->n; ++j) { + const gfa_utg_t *u = s->utg; + fprintf(fp, "A\t%s\t%d\t%c\t%s\t%d\t%d\n", s->name, l, "+-"[u->a[j]>>32&1], u->name[j], (int32_t)(u->r[j]>>32), (int32_t)u->r[j]); + l += (uint32_t)u->a[j]; + } + } + } + for (k = 0; k < g->n_arc; ++k) { + const gfa_arc_t *a = &g->arc[k]; + const gfa_aux_t *aux = a->link_id < g->n_arc? &g->link_aux[a->link_id] : 0; + if (a->del || a->comp) continue; + fprintf(fp, "L\t%s\t%c\t%s\t%c", g->seg[a->v_lv>>33].name, "+-"[a->v_lv>>32&1], g->seg[a->w>>1].name, "+-"[a->w&1]); + if (!(flag & GFA_O_OV_EXT)) { + fprintf(fp, "\t%dM", a->ov < a->ow? a->ov : a->ow); + } else { + if (a->ov == a->ow) fprintf(fp, "\t%dM", a->ov); + else fprintf(fp, "\t%d:%d", a->ov, a->ow); + } + if (a->rank >= 0) fprintf(fp, "\tSR:i:%d", a->rank); + fprintf(fp, "\tL1:i:%d", gfa_arc_len(*a)); + fprintf(fp, "\tL2:i:%d", gfa_arc_lw(g, *a)); + if (aux && aux->l_aux) { + char *t = 0; + int max = 0; + gfa_aux_format(aux->l_aux, aux->aux, &t, &max); + if (t) fputs(t, fp); + free(t); + } + fputc('\n', fp); + } +} diff --git a/gfa-priv.h b/gfa-priv.h new file mode 100644 index 0000000..2159dce --- /dev/null +++ b/gfa-priv.h @@ -0,0 +1,154 @@ +#ifndef __GFA_PRIV_H__ +#define __GFA_PRIV_H__ + +#include +#include "gfa.h" + +#define GFA_MALLOC(ptr, len) ((ptr) = (__typeof__(ptr))malloc((len) * sizeof(*(ptr)))) +#define GFA_CALLOC(ptr, len) ((ptr) = (__typeof__(ptr))calloc((len), sizeof(*(ptr)))) +#define GFA_REALLOC(ptr, len) ((ptr) = (__typeof__(ptr))realloc((ptr), (len) * sizeof(*(ptr)))) +#define GFA_BZERO(ptr, len) memset((ptr), 0, (len) * sizeof(*(ptr))) +#define GFA_EXPAND(a, m) do { \ + (m) = (m)? (m) + ((m)>>1) : 16; \ + GFA_REALLOC((a), (m)); \ + } while (0) + +typedef struct { uint64_t x, y; } gfa128_t; + +// linearized subgraphs + +typedef struct { + uint32_t v, d; + int32_t off, n; +} gfa_subv_t; + +typedef struct { + int32_t n_v, n_a, is_dag; + gfa_subv_t *v; + uint64_t *a; // high 32 bits: point to the neighbor; low 32 bit: arc index in the graph + void *km; +} gfa_sub_t; + +typedef struct { + int32_t snid, ss, se; + uint32_t vs, ve; + int32_t is_bidir, n_seg, len_max, len_min; + uint32_t *v, n_paths; + char *seq_max, *seq_min; // seq_max and seq_min point to v[] +} gfa_bubble_t; + +struct gfa_scbuf_s; +typedef struct gfa_scbuf_s gfa_scbuf_t; + +#ifdef __cplusplus +extern "C" { +#endif + +char *gfa_strdup(const char *src); +char *gfa_strndup(const char *src, size_t n); +void radix_sort_gfa64(uint64_t *st, uint64_t *en); + +// add/delete one segment/arc/stable sequence +int32_t gfa_add_seg(gfa_t *g, const char *name); +gfa_arc_t *gfa_add_arc1(gfa_t *g, uint32_t v, uint32_t w, int32_t ov, int32_t ow, int64_t link_id, int comp); +int32_t gfa_sseq_get(const gfa_t *g, const char *sname); +int32_t gfa_sseq_add(gfa_t *g, const char *sname); +void gfa_sseq_update(gfa_t *g, const gfa_seg_t *s); + +// whole graph operations +void gfa_arc_sort(gfa_t *g); +void gfa_arc_index(gfa_t *g); +uint32_t gfa_fix_symm_add(gfa_t *g); +void gfa_fix_symm_del(gfa_t *g); // delete multiple edges and restore skew-symmetry +void gfa_arc_rm(gfa_t *g); +void gfa_cleanup(gfa_t *g); // permanently delete arcs marked as deleted, sort and then index +void gfa_finalize(gfa_t *g); +int32_t gfa_check_multi(const gfa_t *g); +uint32_t gfa_fix_multi(gfa_t *g); + +int gfa_arc_del_multi_risky(gfa_t *g); +int gfa_arc_del_asymm_risky(gfa_t *g); + +// edit distance +typedef struct { + int32_t traceback; + int32_t bw_dyn, max_lag, max_chk; + int32_t s_term; + int64_t i_term; +} gfa_edopt_t; + +typedef struct { + int32_t s; + uint32_t end_v; + int32_t end_off; + int32_t wlen; // length of walk + int32_t n_end; + int32_t nv; + int64_t n_iter; + int32_t *v; +} gfa_edrst_t; + +void gfa_edopt_init(gfa_edopt_t *opt); +void *gfa_ed_init(void *km, const gfa_edopt_t *opt, const gfa_t *g, const gfa_edseq_t *es, int32_t ql, const char *q, uint32_t v0, int32_t off0); +void gfa_ed_step(void *z_, uint32_t v1, int32_t off1, int32_t s_term, gfa_edrst_t *r); +void gfa_ed_destroy(void *z_); + +int32_t gfa_edit_dist(void *km, const gfa_edopt_t *opt, const gfa_t *g, const gfa_edseq_t *es, int32_t ql, const char *q, uint32_t v0, int32_t off0, gfa_edrst_t *rst); + +// assembly related routines +int gfa_arc_del_trans(gfa_t *g, int fuzz); // transitive reduction +int gfa_arc_del_weak(gfa_t *g); +int gfa_arc_pair_strong(gfa_t *g); +int gfa_arc_del_short(gfa_t *g, int min_ovlp_len, float drop_ratio); // delete short arcs +int gfa_drop_tip(gfa_t *g, int tip_cnt, int tip_len); // cut tips +int gfa_drop_internal(gfa_t *g, int max_ext); +int gfa_cut_z(gfa_t *g, int32_t min_dist, int32_t max_dist); +int gfa_topocut(gfa_t *g, float drop_ratio, int32_t tip_cnt, int32_t tip_len); +int gfa_bub_simple(gfa_t *g, int min_side, int max_side); +int gfa_pop_bubble(gfa_t *g, int radius, int max_del, int protect_tip); // bubble popping +gfa_t *gfa_ug_gen(const gfa_t *g); +void gfa_scc_all(const gfa_t *g); + +// subset, modifying the graph +void gfa_sub(gfa_t *g, int n, char *const* seg, int step); +char **gfa_query_by_reg(const gfa_t *g, int32_t n_bb, const gfa_bubble_t *bb, const char *reg, int *n_seg); + +// subset, without modifying the graph +gfa_sub_t *gfa_sub_from(void *km0, const gfa_t *g, uint32_t v0, int32_t max_dist); +void gfa_sub_destroy(gfa_sub_t *sub); +void gfa_sub_print(FILE *fp, const gfa_t *g, const gfa_sub_t *sub); + +gfa_scbuf_t *gfa_scbuf_init(const gfa_t *g); +gfa_sub_t *gfa_scc1(void *km0, const gfa_t *g, gfa_scbuf_t *b, uint32_t v0); +void gfa_scbuf_destroy(gfa_scbuf_t *b); + +// graph augmentation +int gfa_ins_adj(const gfa_t *g, int min_len, gfa_ins_t *ins, const char *seq); +int32_t gfa_ins_filter(const gfa_t *g, int32_t n_ins, gfa_ins_t *ins); +void gfa_augment(gfa_t *g, int32_t n_ins, const gfa_ins_t *ins, int32_t n_ctg, const char *const* name, const char *const* seq); + +gfa_sfa_t *gfa_gfa2sfa(const gfa_t *g, int32_t *n_sfa_, int32_t write_seq); + +void gfa_sort_ref_arc(gfa_t *g); +gfa_bubble_t *gfa_bubble(const gfa_t *g, int32_t *n_); // FIXME: doesn't work with translocation + +void gfa_gt_simple_print(const gfa_t *g, float min_dc, int32_t is_path); // FIXME: doesn't work with translocations + +void gfa_aux_update_cv(gfa_t *g, const char *tag, const double *cov_seg, const double *cov_link); + +void gfa_sql_write(FILE *fp, const gfa_t *g, int write_seq); + +static inline int64_t gfa_find_arc(const gfa_t *g, uint32_t v, uint32_t w) +{ + uint32_t i, nv = gfa_arc_n(g, v), nw = 0, k = (uint32_t)-1; + gfa_arc_t *av = gfa_arc_a(g, v); + for (i = 0; i < nv; ++i) + if (av[i].w == w) ++nw, k = i; + return nw == 1? (int64_t)(&av[k] - g->arc) : nw == 0? -1 : -2; +} + +#ifdef __cplusplus +} +#endif + +#endif // ~__GFA_PRIV_H__ diff --git a/gfa.h b/gfa.h new file mode 100644 index 0000000..02bc6af --- /dev/null +++ b/gfa.h @@ -0,0 +1,166 @@ +#ifndef __GFA_H__ +#define __GFA_H__ + +#include +#include + +#define GFA_VERSION "0.5-r247-dirty" + +#define GFA_O_OV_EXT 0x1 +#define GFA_O_NO_SEQ 0x2 + +/* + A segment is a sequence. A vertex is one side of a segment. In the code, + segment_id is an integer, and vertex_id=segment_id<<1|orientation. The + convention is to use variable u, v or w for a vertex, not for a segment. An + arc is a directed edge between two vertices in the graph. Each arc has a + complement arc. A link represents an arc and its complement. The following + diagram shows an arc v->w, and the lengths used in the gfa_arc_t struct: + + |<--- lv --->|<-- ov -->| + v: ------------------------> + ||overlap||| + w: --------------------------> + |<-- ow -->|<---- lw ---->| + + The graph topology is solely represented by an array of gfa_arc_t objects + (see gfa_t::arc[]), where both an arc and its complement are present. The + array is sorted by gfa_arc_t::v_lv and indexed by gfa_t::idx[] most of time. + gfa_arc_a(g, v), of size gfa_arc_n(g, v), gives the array of arcs that leaves + a vertex v in the graph g. +*/ + +typedef struct { + uint64_t v_lv; // higher 32 bits: vertex_id; lower 32 bits: lv; packed together for sorting + uint32_t w; + int32_t rank; + int32_t ov, ow; + uint64_t link_id:61, strong:1, del:1, comp:1; // link_id: a pair of dual arcs are supposed to have the same link_id +} gfa_arc_t; + +#define gfa_arc_head(a) ((uint32_t)((a).v_lv>>32)) +#define gfa_arc_tail(a) ((a).w) +#define gfa_arc_len(a) ((uint32_t)(a).v_lv) // different from the original string graph +#define gfa_arc_lw(g, a) ((g)->seg[(a).w>>1].len - (a).ow) + +#define gfa_arc_n(g, v) ((uint32_t)(g)->idx[(v)]) +#define gfa_arc_a(g, v) (&(g)->arc[(g)->idx[(v)]>>32]) + +typedef struct { + uint32_t m_aux, l_aux; + uint8_t *aux; +} gfa_aux_t; + +typedef struct { + uint32_t start, end; // start: starting vertex in the string graph; end: ending vertex + uint32_t len_comp, dummy; // len_comp: the length of the complement unitig + uint32_t m, n; // number of reads + uint64_t *a; // list of reads + uint64_t *r; // start and end on each read + char **name; +} gfa_utg_t; + +typedef struct { + int32_t len; + uint32_t del:16, circ:16; + int32_t snid; // stable name ID + int32_t soff; // stable start position + int32_t rank; // stable rank + char *name, *seq; + gfa_utg_t *utg; + gfa_aux_t aux; +} gfa_seg_t; + +typedef struct { + int32_t len, snid, soff, rank; + uint64_t end[2]; + char *seq; +} gfa_sfa_t; + +typedef struct { + char *name; + int32_t min, max, rank; +} gfa_sseq_t; + +#define gfa_n_vtx(g) ((g)->n_seg << 1) + +typedef struct { + // segments + uint32_t m_seg, n_seg, max_rank; + gfa_seg_t *seg; + void *h_names; + // persistent names + uint32_t m_sseq, n_sseq; + gfa_sseq_t *sseq; + void *h_snames; + // links + uint64_t m_arc, n_arc; + gfa_arc_t *arc; + gfa_aux_t *link_aux; + uint64_t *idx; +} gfa_t; + +typedef struct { + const char *seq; + int32_t len; +} gfa_edseq_t; + +// graph augmentation + +typedef struct { + uint32_t v[2]; + int32_t voff[2]; + int32_t coff[2], ctg; +} gfa_ins_t; + +extern int gfa_verbose; +extern unsigned char gfa_comp_table[256]; + +#ifdef __cplusplus +extern "C" { +#endif + +gfa_t *gfa_init(void); +void gfa_destroy(gfa_t *g); +gfa_t *gfa_read(const char *fn); +void gfa_print(const gfa_t *g, FILE *fp, int M_only); + +gfa_edseq_t *gfa_edseq_init(const gfa_t *g); +void gfa_edseq_destroy(int32_t n_seg, gfa_edseq_t *es); + +int32_t gfa_name2id(const gfa_t *g, const char *name); +uint8_t *gfa_aux_get(int l_data, const uint8_t *data, const char tag[2]); +int gfa_aux_del(int l_data, uint8_t *data, uint8_t *s); + +#ifdef __cplusplus +} +#endif + +#ifndef kroundup32 +#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +#endif + +static inline void gfa_arc_del(gfa_t *g, uint32_t v, uint32_t w, int del) +{ + uint32_t i, nv = gfa_arc_n(g, v); + gfa_arc_t *av = gfa_arc_a(g, v); + for (i = 0; i < nv; ++i) + if (av[i].w == w) av[i].del = !!del; +} + +static inline void gfa_seg_del(gfa_t *g, uint32_t s) +{ + uint32_t k; + g->seg[s].del = 1; + for (k = 0; k < 2; ++k) { + uint32_t i, v = s<<1 | k; + uint32_t nv = gfa_arc_n(g, v); + gfa_arc_t *av = gfa_arc_a(g, v); + for (i = 0; i < nv; ++i) { + av[i].del = 1; + gfa_arc_del(g, av[i].w^1, v^1, 1); + } + } +} + +#endif diff --git a/ggen.c b/ggen.c new file mode 100644 index 0000000..520b6dd --- /dev/null +++ b/ggen.c @@ -0,0 +1,182 @@ +#include +#include +#include +#include +#include "kthread.h" +#include "kalloc.h" +#include "sys.h" +#include "bseq.h" +#include "ggen.h" +#include "mgpriv.h" +#include "gfa-priv.h" + +typedef struct { + int n_seq; + mg_bseq1_t *seq; + mg_gchains_t **gcs; +} maprst_t; + +typedef struct { + const mg_mapopt_t *opt; + const mg_idx_t *gi; + mg_tbuf_t **buf; + maprst_t *r; +} step_t; + +static void worker_for(void *_data, long i, int tid) // kt_for() callback +{ + step_t *s = (step_t*)_data; + if (mg_dbg_flag & MG_DBG_QNAME) + fprintf(stderr, "QR\t%s\t%d\t%d\n", s->r->seq[i].name, tid, s->r->seq[i].l_seq); + if ((s->opt->flag & MG_M_SKIP_GCHECK) == 0 && mg_verbose >= 2) { + if (gfa_sseq_get(s->gi->g, s->r->seq[i].name) >= 0) + fprintf(stderr, "[W::%s] stable sequence \"%s\" already present in the graph. This will lead to inconsistent rGFA.\n", + __func__, s->r->seq[i].name); + } + s->r->gcs[i] = mg_map(s->gi, s->r->seq[i].l_seq, s->r->seq[i].seq, s->buf[tid], s->opt, s->r->seq[i].name); +} + +static maprst_t *ggen_map(const mg_idx_t *gi, const mg_mapopt_t *opt, const char *fn, int n_threads) +{ + mg_bseq_file_t *fp; + maprst_t *r; + step_t s; + int i; + + fp = mg_bseq_open(fn); + if (fp == 0) return 0; + + KCALLOC(0, r, 1); + r->seq = mg_bseq_read(fp, 1ULL<<62, 0, 0, 0, &r->n_seq); + mg_bseq_close(fp); + if (mg_verbose >= 3) + fprintf(stderr, "[M::%s::%.3f*%.2f] loaded file \"%s\"\n", __func__, + realtime() - mg_realtime0, cputime() / (realtime() - mg_realtime0), fn); + for (i = 0; i < r->n_seq; ++i) { + r->seq[i].rid = i; + mg_toupper(r->seq[i].l_seq, r->seq[i].seq); + } + KCALLOC(0, r->gcs, r->n_seq); + + s.gi = gi, s.opt = opt, s.r = r; + KCALLOC(0, s.buf, n_threads); + for (i = 0; i < n_threads; ++i) s.buf[i] = mg_tbuf_init(); + kt_for(n_threads, worker_for, &s, r->n_seq); + if (mg_verbose >= 3) + fprintf(stderr, "[M::%s::%.3f*%.2f] mapped %d sequence(s) to the graph\n", __func__, + realtime() - mg_realtime0, cputime() / (realtime() - mg_realtime0), r->n_seq); + for (i = 0; i < n_threads; ++i) mg_tbuf_destroy(s.buf[i]); + free(s.buf); + return r; +} + +static void mg_free_maprst(maprst_t *r) +{ + int i; + for (i = 0; i < r->n_seq; ++i) { + mg_gchain_free(r->gcs[i]); + free(r->seq[i].seq); free(r->seq[i].name); + } + free(r->gcs); free(r->seq); + free(r); +} + +int mg_ggen_aug(gfa_t *g, int32_t n_fn, const char **fn, const mg_idxopt_t *ipt, const mg_mapopt_t *opt0, const mg_ggopt_t *go, int n_threads) +{ + int i; + mg_mapopt_t opt = *opt0; + if (g == 0) return -1; + for (i = 0; i < n_fn; ++i) { + mg_idx_t *gi; + maprst_t *r; + if ((gi = mg_index(g, ipt, n_threads, &opt)) == 0) return -1; + r = ggen_map(gi, &opt, fn[i], n_threads); + if (opt0->flag & MG_M_CIGAR) + mg_ggsimple_cigar(0, go, g, r->n_seq, r->seq, r->gcs); + else + mg_ggsimple(0, go, g, r->n_seq, r->seq, r->gcs); + mg_free_maprst(r); + mg_idx_destroy(gi); + } + return 0; +} + +int mg_ggen_cov(gfa_t *g, int32_t n_fn, const char **fn, const mg_idxopt_t *ipt, const mg_mapopt_t *opt0, const mg_ggopt_t *go, int n_threads) +{ + int32_t i; + mg_mapopt_t opt = *opt0; + mg_idx_t *gi; + double *cov_seg, *cov_link; + int64_t j; + if ((gi = mg_index(g, ipt, n_threads, &opt)) == 0) return -1; + KCALLOC(0, cov_seg, g->n_seg); + KCALLOC(0, cov_link, g->n_arc); + for (i = 0; i < n_fn; ++i) { + maprst_t *r; + r = ggen_map(gi, &opt, fn[i], n_threads); + mg_cov_asm(g, r->n_seq, r->gcs, go->min_mapq, go->min_map_len, cov_seg, cov_link); + mg_free_maprst(r); + } + mg_idx_destroy(gi); + for (j = 0; j < g->n_seg; ++j) cov_seg[j] /= n_fn; + for (j = 0; j < g->n_arc; ++j) cov_link[j] /= n_fn; + gfa_aux_update_cv(g, "cf", cov_seg, cov_link); + free(cov_seg); free(cov_link); + return 0; +} + +int mg_ggen_call(gfa_t *g, const char *fn, const mg_idxopt_t *ipt, const mg_mapopt_t *opt0, const mg_ggopt_t *go, int n_threads) +{ + mg_mapopt_t opt = *opt0; + mg_idx_t *gi; + maprst_t *r; + if ((gi = mg_index(g, ipt, n_threads, &opt)) == 0) return -1; + r = ggen_map(gi, &opt, fn, n_threads); + mg_call_asm(g, r->n_seq, r->seq, r->gcs, go->min_mapq, go->min_map_len); + mg_free_maprst(r); + mg_idx_destroy(gi); + return 0; +} + +int mg_ggen(gfa_t *g, int32_t n_fn, const char **fn, const mg_idxopt_t *ipt, const mg_mapopt_t *opt, const mg_ggopt_t *go, int n_threads) +{ + if (go->flag & MG_G_CALL) return mg_ggen_call(g, fn[0], ipt, opt, go, n_threads); + else if (go->flag & MG_G_CAL_COV) return mg_ggen_cov(g, n_fn, fn, ipt, opt, go, n_threads); + else return mg_ggen_aug(g, n_fn, fn, ipt, opt, go, n_threads); +} + +int32_t mg_path2seq(void *km, const gfa_t *g, const mg_gchains_t *gcs, int32_t ls, int32_t le, int32_t voff[2], char **seq_, int32_t *cap_) // NB: [ls,le] is a CLOSED interval +{ + extern unsigned char gfa_comp_table[256]; + int32_t i, k, l = 0, cap = *cap_; + char *seq = *seq_; + assert(0 <= ls && ls <= le && le < gcs->n_lc); + for (k = ls; k <= le; ++k) { + uint32_t v = gcs->lc[k].v, len = g->seg[v>>1].len; + int32_t st = 0, en = len, tmp; + if (k == ls) st = voff[0]; + if (k == le) en = voff[1]; + assert(0 <= st && st <= en && en <= len); + if (en - st + l + 1 > cap) { + cap = en - st + l + 1; + kroundup32(cap); + KREALLOC(km, seq, cap); + } + if (v&1) { + uint8_t *ss = (uint8_t*)g->seg[v>>1].seq; + tmp = st, st = len - en, en = len - tmp; + for (i = en - 1; i >= st; --i) + seq[l++] = gfa_comp_table[ss[i]]; + } else { + memcpy(&seq[l], &g->seg[v>>1].seq[st], en - st); + l += en - st; + } + } + if (l == 0 && cap == 0) { + cap = 8; + KREALLOC(km, seq, cap); + } + seq[l] = 0; + *seq_ = seq, *cap_ = cap; + return l; +} diff --git a/ggen.h b/ggen.h new file mode 100644 index 0000000..57ebebf --- /dev/null +++ b/ggen.h @@ -0,0 +1,21 @@ +#ifndef MG_GGEN_H +#define MG_GGEN_H + +#include "minigraph.h" +#include "bseq.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int32_t mg_path2seq(void *km, const gfa_t *g, const mg_gchains_t *gcs, int32_t ls, int32_t le, int32_t voff[2], char **seq_, int32_t *cap_); +void mg_ggsimple(void *km, const mg_ggopt_t *opt, gfa_t *g, int32_t n_seq, const mg_bseq1_t *seq, mg_gchains_t *const* gcs); +void mg_ggsimple_cigar(void *km, const mg_ggopt_t *opt, gfa_t *g, int32_t n_seq, const mg_bseq1_t *seq, mg_gchains_t *const* gcs); + +void mg_call_asm(const gfa_t *g, int32_t n_seq, const mg_bseq1_t *seq, mg_gchains_t *const *gcs, int32_t min_mapq, int32_t min_blen); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/ggsimple.c b/ggsimple.c new file mode 100644 index 0000000..66d82cf --- /dev/null +++ b/ggsimple.c @@ -0,0 +1,570 @@ +#include +#include "mgpriv.h" +#include "gfa-priv.h" +#include "kalloc.h" +#include "bseq.h" +#include "algo.h" +#include "sys.h" +#include "ggen.h" +#include "kvec-km.h" + +int32_t mg_gc_index(void *km, int min_mapq, int min_map_len, int min_depth_len, const gfa_t *g, int32_t n_seq, mg_gchains_t *const* gcs, + double *a_dens, int32_t **soff_, int32_t **qoff_, mg_intv_t **sintv_, mg_intv_t **qintv_) +{ + int32_t t, i, j, max_acnt, *scnt, *soff, *qcnt, *qoff; + int64_t sum_acnt, sum_alen; + mg_intv_t *sintv, *qintv; + + // count the number of intervals on each segment + KCALLOC(km, scnt, g->n_seg); + KCALLOC(km, qcnt, n_seq); + for (t = 0, max_acnt = 0; t < n_seq; ++t) { + const mg_gchains_t *gt = gcs[t]; + for (i = 0; i < gt->n_gc; ++i) { + const mg_gchain_t *gc = >->gc[i]; + if (gc->id != gc->parent) continue; + if (gc->blen < min_depth_len || gc->mapq < min_mapq) continue; + if (gc->n_anchor > max_acnt) max_acnt = gc->n_anchor; + ++qcnt[t]; + for (j = 0; j < gc->cnt; ++j) + ++scnt[gt->lc[gc->off + j].v>>1]; + } + } + if (max_acnt == 0) { // no gchain + kfree(km, scnt); kfree(km, qcnt); + return 0; + } + + // compute soff[] and qoff[] + KMALLOC(km, soff, g->n_seg + 1); + KMALLOC(km, qoff, n_seq + 1); + for (soff[0] = 0, i = 1; i <= g->n_seg; ++i) + soff[i] = soff[i - 1] + scnt[i - 1]; + for (qoff[0] = 0, i = 1; i <= n_seq; ++i) + qoff[i] = qoff[i - 1] + qcnt[i - 1]; + + // populate the interval list + memset(scnt, 0, 4 * g->n_seg); + memset(qcnt, 0, 4 * n_seq); + KMALLOC(km, sintv, soff[g->n_seg]); + KMALLOC(km, qintv, qoff[n_seq]); + sum_acnt = sum_alen = 0; + for (t = 0; t < n_seq; ++t) { + const mg_gchains_t *gt = gcs[t]; + for (i = 0; i < gt->n_gc; ++i) { + const mg_gchain_t *gc = >->gc[i]; + mg_intv_t *p; + if (gc->id != gc->parent) continue; + if (gc->blen < min_depth_len || gc->mapq < min_mapq) continue; + p = &qintv[qoff[t] + qcnt[t]]; + ++qcnt[t]; + p->st = gc->qs, p->en = gc->qe, p->rev = 0, p->far = -1, p->i = -1; + for (j = 0; j < gc->cnt; ++j) { + const mg_llchain_t *lc = >->lc[gc->off + j]; + int32_t rs, re, tmp; + if (lc->cnt > 0) { // compute start and end on the forward strand on the segment + const mg128_t *qs = >->a[lc->off]; + const mg128_t *qe = >->a[lc->off + lc->cnt - 1]; + int32_t rs0 = (int32_t)qs->x + 1 - (int32_t)(qs->y>>32&0xff); + int32_t re0 = (int32_t)qe->x; + assert(rs0 >= 0 && re0 > rs0 && re0 < g->seg[lc->v>>1].len); + sum_alen += re0 - rs0, sum_acnt += (qe->x>>32) - (qs->x>>32) + 1; + rs = 0, re = g->seg[lc->v>>1].len; + if (j == 0) rs = gc->p? gc->p->ss : rs0; + if (j == gc->cnt - 1) re = gc->p? gc->p->ee : re0; + if (lc->v&1) // swap rs and re + tmp = rs, rs = g->seg[lc->v>>1].len - re, re = g->seg[lc->v>>1].len - tmp; + } else rs = 0, re = g->seg[lc->v>>1].len; + p = &sintv[soff[lc->v>>1] + scnt[lc->v>>1]]; + ++scnt[lc->v>>1]; + p->st = rs, p->en = re, p->rev = lc->v&1, p->far = -1, p->i = -1; + } + } + } + *a_dens = (double)sum_acnt / sum_alen; + + // sort and index intervals + for (i = 0; i < g->n_seg; ++i) { + assert(soff[i+1] - soff[i] == scnt[i]); + mg_intv_index(soff[i+1] - soff[i], &sintv[soff[i]]); + } + kfree(km, scnt); + for (i = 0; i < n_seq; ++i) { + assert(qoff[i+1] - qoff[i] == qcnt[i]); + mg_intv_index(qoff[i+1] - qoff[i], &qintv[qoff[i]]); + } + kfree(km, qcnt); + + *sintv_ = sintv, *qintv_ = qintv; + *soff_ = soff, *qoff_ = qoff; + return max_acnt; +} + +/********************** + * Graph augmentation * + **********************/ + +void mg_ggsimple(void *km, const mg_ggopt_t *opt, gfa_t *g, int32_t n_seq, const mg_bseq1_t *seq, mg_gchains_t *const* gcs) +{ + int32_t t, i, j, *soff, *qoff, max_acnt, *sc, m_ovlp = 0, *ovlp = 0, n_ins, m_ins, n_inv; + int32_t l_pseq, m_pseq; + uint64_t *meta; + mg_intv_t *sintv, *qintv; + double a_dens; + gfa_ins_t *ins; + char *pseq; + + max_acnt = mg_gc_index(km, opt->min_mapq, opt->min_map_len, opt->min_depth_len, g, n_seq, gcs, &a_dens, &soff, &qoff, &sintv, &qintv); + if (max_acnt == 0) return; + + // extract poorly regions + m_pseq = l_pseq = 0, pseq = 0; + m_ins = n_ins = 0, ins = 0; + n_inv = 0; + KMALLOC(km, sc, max_acnt); + KMALLOC(km, meta, max_acnt); + for (t = 0; t < n_seq; ++t) { + const mg_gchains_t *gt = gcs[t]; + for (i = 0; i < gt->n_gc; ++i) { + const mg_gchain_t *gc = >->gc[i]; + int32_t off_a, off_l, n_ss, far_q; + mg_msseg_t *ss; + if (gc->id != gc->parent) continue; + if (gc->blen < opt->min_map_len || gc->mapq < opt->min_mapq) continue; + assert(gc->cnt > 0); + + // fill sc[]. This part achieves a similar goal to the one in mg_gchain_extra(). It makes more assumptions, but is logically simpler. + off_l = gc->off; + off_a = gt->lc[off_l].off + 1; + far_q = 0; + for (j = 1; j < gc->n_anchor; ++j, ++off_a) { + const mg128_t *q = >->a[off_a - 1], *p = >->a[off_a]; + const mg_llchain_t *lc = >->lc[off_l]; + int32_t s, ed = -1, off_l0 = off_l, pd, qd = (int32_t)p->y - (int32_t)q->y, c = (int32_t)(p->x>>32) - (int32_t)(q->x>>32) - 1; + if ((int32_t)q->y > far_q) far_q = (int32_t)q->y; // far_q keeps the rightmost query position seen so far + if (off_a == lc->off + lc->cnt) { // we are at the end of the current lchain + pd = g->seg[lc->v>>1].len - (int32_t)q->x - 1; + for (++off_l; off_l < gc->off + gc->cnt && gt->lc[off_l].cnt == 0; ++off_l) + pd += g->seg[gt->lc[off_l].v>>1].len; + assert(off_l < gc->off + gc->cnt); + if (gt->lc[off_l].ed >= 0) ed = gt->lc[off_l].ed; + pd += (int32_t)p->x + 1; + } else pd = (int32_t)p->x - (int32_t)q->x; + if ((opt->flag&MG_G_NO_QOVLP) && (int32_t)p->y < far_q) s = 1; // query overlap + else if (pd == qd && c == 0) s = -opt->match_pen; + else if (ed >= 0) { + int32_t min_d = pd < qd? pd : qd; + double t = 1. / (1.01 - opt->ggs_max_iden); + if (t > 10.) t = 10.; + s = (int32_t)(ed * t - min_d); + } else if (pd > qd) { + double x = qd * a_dens; + x = x > c? x : c; + s = (int32_t)(x + (pd - qd) * a_dens + .499); + } else { + s = (int32_t)(qd * a_dens + .499); + s = s > c? s : c; + } + sc[j - 1] = s; + meta[j-1] = (uint64_t)pd<<32 | off_l0; + } + + // get regions to insert + ss = mg_mss_all(0, gc->n_anchor - 1, sc, 10, 0, &n_ss); + off_a = gt->lc[gc->off].off; + for (j = 0; j < n_ss; ++j) { + const mg128_t *p, *q; + int32_t st, en, ls, le, span, pd, k, n_ovlp, min_len, is_inv = 0; + gfa_ins_t I; + + // find the initial positions + min_len = opt->ggs_min_end_cnt > 0? opt->ggs_min_end_cnt : 0; + if (min_len < ss[j].sc * opt->ggs_min_end_frac) min_len = ss[j].sc * opt->ggs_min_end_frac; + if (ss[j].st <= min_len || ss[j].en >= gc->n_anchor - 1 - min_len) continue; // too close to ends + st = ss[j].st, en = ss[j].en; + q = >->a[off_a + st]; + p = >->a[off_a + en]; + span = p->y>>32&0xff; + I.ctg = t; + ls = (int32_t)meta[st], le = (int32_t)meta[en]; // first and last lchain; CLOSED interval + assert(ls <= le); + I.v[0] = gt->lc[ls].v; + I.v[1] = gt->lc[le].v; + I.voff[0] = (int32_t)q->x + 1 - span; + I.voff[1] = (int32_t)p->x + 1; + I.coff[0] = (int32_t)q->y + 1 - span; + I.coff[1] = (int32_t)p->y + 1; + assert(I.voff[0] <= g->seg[I.v[0]>>1].len); + assert(I.voff[1] <= g->seg[I.v[1]>>1].len); + for (k = st, pd = span; k < en; ++k) + pd += meta[k]>>32; + + if (I.coff[0] > I.coff[1]) { + if (mg_verbose >= 2 && pd + (I.coff[0] - I.coff[1]) >= opt->min_var_len) + fprintf(stderr, "[W::%s] query overlap on gchain %d: [%c%s:%d,%c%s:%d|%d] <=> %s:[%d,%d|%d]\n", __func__, t, "><"[I.v[0]&1], g->seg[I.v[0]>>1].name, I.voff[0], "><"[I.v[1]&1], g->seg[I.v[1]>>1].name, I.voff[1], pd, seq[t].name, I.coff[0], I.coff[1], I.coff[1] - I.coff[0]); + continue; // such overlap can't be properly resolved + } + pd -= gfa_ins_adj(g, opt->ggs_shrink_pen, &I, seq[t].seq); + + min_len = pd > I.coff[1] - I.coff[0]? pd : I.coff[1] - I.coff[0]; + if (I.coff[0] <= min_len || I.coff[1] >= seq[t].l_seq - min_len) continue; // test if the event is close to ends again + + // filtering + if (I.coff[1] - I.coff[0] < opt->min_var_len && pd < opt->min_var_len) + continue; + for (k = I.coff[0]; k < I.coff[1]; ++k) { // test ambiguous bases + int c = seq[t].seq[k]; + if (c == 'n' || c == 'N') break; + } + if (k != I.coff[1]) continue; // no ambiguous bases on the insert + n_ovlp = mg_intv_overlap(km, qoff[t+1] - qoff[t], &qintv[qoff[t]], I.coff[0], I.coff[1], &ovlp, &m_ovlp); // test overlapping on the query + if (n_ovlp == 0) fprintf(stderr, "[W::%s] query interval %s:%d-%d is not covered\n", __func__, seq[t].name, I.coff[0], I.coff[1]); + if (n_ovlp != 1) continue; + for (k = ls; k <= le; ++k) { // find other mappings overlapping with the insert on the graph + uint32_t v = gt->lc[k].v, len = g->seg[v>>1].len; + int32_t s = 0, e = len, tmp; + if (k == ls) s = (int32_t)gt->a[off_a+st].x + 1 - (int32_t)(gt->a[off_a+st].y>>32&0xff); + if (k == le) e = (int32_t)gt->a[off_a+en].x + 1; + if (v&1) tmp = s, s = len - e, e = len - tmp; + n_ovlp = mg_intv_overlap(km, soff[(v>>1)+1] - soff[v>>1], &sintv[soff[v>>1]], s, e, &ovlp, &m_ovlp); + if (n_ovlp == 0) fprintf(stderr, "[W::%s] graph interval %s:%d-%d is not covered by %s:%d-%d\n", __func__, g->seg[v>>1].name, s, e, seq[t].name, I.coff[0], I.coff[1]); // this should be an assert() + if (n_ovlp != 1) break; + } + if (k <= le) continue; + if (pd - (I.coff[1] - I.coff[0]) < opt->min_var_len && (I.coff[1] - I.coff[0]) - pd < opt->min_var_len) { // if length difference > min_var_len, just insert + int32_t qd = I.coff[1] - I.coff[0], mlen, blen, score; + l_pseq = mg_path2seq(km, g, gt, ls, le, I.voff, &pseq, &m_pseq); + score = mg_wfa_cmp(km, l_pseq, pseq, qd, &seq[t].seq[I.coff[0]], 5000, &mlen, &blen); + if (score > 0) { + if (mlen > blen * opt->ggs_max_iden) continue; // make sure k-mer identity is small enough + if (blen - mlen < opt->min_var_len * opt->ggs_max_iden) continue; + } else if (!(opt->flag & MG_G_NO_INV)) { + mg_revcomp_seq(l_pseq, pseq); + score = mg_wfa_cmp(km, l_pseq, pseq, qd, &seq[t].seq[I.coff[0]], 5000, &mlen, &blen); + if (score > 0 && mlen > blen * opt->ggs_min_inv_iden) is_inv = 1; + } + } + if (mg_dbg_flag & MG_DBG_INSERT) { + int32_t mlen, blen, score, qd = I.coff[1] - I.coff[0]; + l_pseq = mg_path2seq(km, g, gt, ls, le, I.voff, &pseq, &m_pseq); + fprintf(stderr, "IN\t[%c%s:%d,%c%s:%d|%d] <=> %s:[%d,%d|%d] inv:%d\n", "><"[I.v[0]&1], g->seg[I.v[0]>>1].name, I.voff[0], "><"[I.v[1]&1], g->seg[I.v[1]>>1].name, I.voff[1], pd, seq[t].name, I.coff[0], I.coff[1], I.coff[1] - I.coff[0], is_inv); + fprintf(stderr, "IP\t%s\nIQ\t", pseq); + fwrite(&seq[t].seq[I.coff[0]], 1, qd, stderr); + if (pd - qd < opt->min_var_len && qd - pd < opt->min_var_len) { + score = mg_wfa_cmp(km, l_pseq, pseq, qd, &seq[t].seq[I.coff[0]], 5000, &mlen, &blen); + } else score = -1, mlen = 0, blen = pd > qd? pd : qd; + fprintf(stderr, "\nIS\t%d==%d\tnwcmp:%d\tmlen:%d\tblen:%d\n", pd, l_pseq, score, mlen, blen); + } + if (is_inv) { // turn one inversion to two events + gfa_ins_t I_inv[2]; + I_inv[0].ctg = I_inv[1].ctg = I.ctg; + // the first event + I_inv[0].coff[0] = I_inv[0].coff[1] = I.coff[0]; + I_inv[0].v[0] = I.v[0]; + I_inv[0].voff[0] = I.voff[0]; + I_inv[0].v[1] = I.v[1]^1; + I_inv[0].voff[1] = g->seg[I.v[1]>>1].len - I.voff[1]; + // the second event + I_inv[1].coff[0] = I_inv[1].coff[1] = I.coff[1]; + I_inv[1].v[0] = I.v[0]^1; + I_inv[1].voff[0] = g->seg[I.v[0]>>1].len - I.voff[0]; + I_inv[1].v[1] = I.v[1]; + I_inv[1].voff[1] = I.voff[1]; + // insert + if (n_ins == m_ins) KEXPAND(km, ins, m_ins); + ins[n_ins++] = I_inv[0]; + if (n_ins == m_ins) KEXPAND(km, ins, m_ins); + ins[n_ins++] = I_inv[1]; + ++n_inv; + } else { + if (n_ins == m_ins) KEXPAND(km, ins, m_ins); + ins[n_ins++] = I; + } + } + kfree(0, ss); + } + } + kfree(km, pseq); + kfree(km, ovlp); + kfree(km, sc); + kfree(km, meta); + kfree(km, soff); kfree(km, qoff); + kfree(km, sintv); kfree(km, qintv); + + if (n_ins > 0) { + char **names, **seqs; + KMALLOC(km, names, n_seq); + KMALLOC(km, seqs, n_seq); + for (i = 0; i < n_seq; ++i) + names[i] = seq[i].name, seqs[i] = seq[i].seq; + n_ins = gfa_ins_filter(g, n_ins, ins); + gfa_augment(g, n_ins, ins, n_seq, (const char*const*)names, (const char*const*)seqs); + kfree(km, ins); + kfree(km, names); + kfree(km, seqs); + } + if (mg_verbose >= 3) + fprintf(stderr, "[M::%s::%.3f*%.2f] inserted %d events, including %d inversions\n", __func__, + realtime() - mg_realtime0, cputime() / (realtime() - mg_realtime0), n_ins, n_inv); +} + +/********************** + * Graph augmentation * + **********************/ + +typedef struct { + int32_t lc, vo, qo, po, len, op, sc; +} ed_intv_t; + +static int32_t gg_count_intv(const gfa_t *g, const mg_gchains_t *gt, int32_t i) +{ + const mg_gchain_t *gc = >->gc[i]; + int32_t j, l = gc->off, x = gc->ps, n = 0; + assert(gc->p); + for (j = 0; j < gc->p->n_cigar; ++j) { + int32_t op = gc->p->cigar[j]&0xf, len = gc->p->cigar[j]>>4, rl = len; + assert(op == 1 || op == 2 || op == 7 || op == 8); + if (op == 2 || op == 7 || op == 8) { + while (x + rl > g->seg[gt->lc[l].v>>1].len) { + rl -= g->seg[gt->lc[l].v>>1].len - x; + ++n, ++l, x = 0; + } + x += rl; + } + ++n; + } + return n; +} + +static void gg_write_intv(const gfa_t *g, const mg_gchains_t *gt, int32_t i, ed_intv_t *intv) +{ + const mg_gchain_t *gc = >->gc[i]; + int32_t j, l = gc->off, pl = 0, x = gc->ps, y = gc->qs, n = 0; + ed_intv_t *p; + assert(gc->p); + for (j = 0; j < gc->p->n_cigar; ++j) { + int32_t op = gc->p->cigar[j]&0xf, len = gc->p->cigar[j]>>4, rl = len; + if (op == 2 || op == 7 || op == 8) { + while (x + rl > g->seg[gt->lc[l].v>>1].len) { + p = &intv[n++]; + p->lc = l, p->vo = x, p->qo = y, p->po = pl, p->len = g->seg[gt->lc[l].v>>1].len - x, p->op = op; + if (op == 7 || op == 8) y += p->len; + rl -= p->len, pl += p->len, ++l, x = 0; + } + } + p = &intv[n++]; + p->lc = l, p->vo = x, p->qo = y, p->po = pl, p->len = rl, p->op = op; + if (op == 7 || op == 8) x += rl, y += rl, pl += rl; + else if (op == 1) y += rl; + else if (op == 2) x += rl, pl += rl; + } + assert(y == gc->qe && pl == gc->pe - gc->ps); +} + +static void gg_score_intv(int32_t n_intv, ed_intv_t *intv) +{ + int32_t j; + for (j = 0; j < n_intv; ++j) { + int32_t s; + if (intv[j].op == 7) + s = intv[j].len >= 10? -intv[j].len : 0; + else s = intv[j].len; + intv[j].sc = s; + } +} + +static void gg_merge_seg(const ed_intv_t *intv, int32_t n_ss, mg_msseg_t *ss) +{ + int32_t j0, j; + for (j0 = 0, j = 1; j < n_ss; ++j) { + mg_msseg_t *s0 = &ss[j0], *s1 = &ss[j]; + int32_t i, mid = 0; + for (i = s0->en + 1; i < s1->st; ++i) + mid += intv[i].sc; + //fprintf(stderr, "XX\t%d\t%d\t%d\t%d\t%d\t%d\n", j, s0->sc, mid, s1->sc, s0->en+1, s1->st); + if (-mid < s0->sc * 0.2 && -mid < s1->sc * 0.2) { // FIXME: mid is sometimes 0 + s0->en = s1->en, s0->sc += s1->sc + mid; + s1->st = s1->en, s1->sc = 0; + } else j0 = j; + } +} + +void mg_ggsimple_cigar(void *km, const mg_ggopt_t *opt, gfa_t *g, int32_t n_seq, const mg_bseq1_t *seq, mg_gchains_t *const* gcs) +{ + int32_t t, i, *soff, *qoff, max_acnt, m_ovlp = 0, *ovlp = 0, n_ins = 0, m_ins, n_inv; + int32_t l_pseq, m_pseq; + mg_intv_t *sintv, *qintv; + double a_dens; + gfa_ins_t *ins; + char *pseq; + + max_acnt = mg_gc_index(km, opt->min_mapq, opt->min_map_len, opt->min_depth_len, g, n_seq, gcs, &a_dens, &soff, &qoff, &sintv, &qintv); + if (max_acnt == 0) return; + + // extract poorly regions + m_pseq = l_pseq = 0, pseq = 0; + m_ins = n_ins = 0, ins = 0; + n_inv = 0; + for (t = 0; t < n_seq; ++t) { + const mg_gchains_t *gt = gcs[t]; + for (i = 0; i < gt->n_gc; ++i) { + const mg_gchain_t *gc = >->gc[i]; + int32_t j, n_ss, n_intv, *sc; + ed_intv_t *intv; + mg_msseg_t *ss; + if (gc->id != gc->parent) continue; + if (gc->p == 0 || gc->blen < opt->min_map_len || gc->mapq < opt->min_mapq) continue; + assert(gc->cnt > 0); + + n_intv = gg_count_intv(g, gt, i); + KCALLOC(km, intv, n_intv); + gg_write_intv(g, gt, i, intv); + gg_score_intv(n_intv, intv); + KCALLOC(km, sc, n_intv); + for (j = 0; j < n_intv; ++j) sc[j] = intv[j].sc; + ss = mg_mss_all(0, n_intv, sc, opt->min_var_len, 2 * opt->min_var_len, &n_ss); + gg_merge_seg(intv, n_ss, ss); + + // get regions to insert + for (j = 0; j < n_ss; ++j) { + int32_t st, en, pd, k, n_ovlp, min_len, is_inv = 0, ls, le; + gfa_ins_t I; + ed_intv_t *is, *ie; + + // find the initial positions + st = ss[j].st, en = ss[j].en; // this is a CLOSED interval + if (st == en) continue; + is = &intv[st], ie = &intv[en - 1]; + assert(is->op != 7 && ie->op != 7); + + ls = is->lc, le = ie->lc; + I.ctg = t; + I.v[0] = gt->lc[ls].v; + I.v[1] = gt->lc[le].v; + I.voff[0] = is->vo; + I.voff[1] = ie->vo + (ie->op != 1? ie->len : 0); + I.coff[0] = is->qo; + I.coff[1] = ie->qo + (ie->op != 2? ie->len : 0); + assert(I.voff[0] <= g->seg[I.v[0]>>1].len); + assert(I.voff[1] <= g->seg[I.v[1]>>1].len); + + if (I.voff[0] == 0) { // if an insert starts at pos 0, make it start at the end of the previous vertex in the chain + assert(ls - 1 >= gc->off); + I.v[0] = gt->lc[--ls].v; + I.voff[0] = g->seg[I.v[0]>>1].len; + } + if (I.voff[1] == g->seg[I.v[1]>>1].len) { // if an insert ends at the end of the vertex, make it end at the beginning of the next vertex + assert(le + 1 < gc->off + gc->cnt); + I.v[1] = gt->lc[++le].v; + I.voff[1] = 0; + } + + pd = ie->po + (ie->op != 1? ie->len : 0) - is->po; + pd -= gfa_ins_adj(g, opt->ggs_shrink_pen, &I, seq[t].seq); + + min_len = pd > I.coff[1] - I.coff[0]? pd : I.coff[1] - I.coff[0]; + if (I.coff[0] <= min_len || I.coff[1] >= seq[t].l_seq - min_len) continue; // test if the event is close to ends again + + // filtering + if (I.coff[1] - I.coff[0] < opt->min_var_len && pd < opt->min_var_len) + continue; + for (k = I.coff[0]; k < I.coff[1]; ++k) { // test ambiguous bases + int c = seq[t].seq[k]; + if (c == 'n' || c == 'N') break; + } + if (k != I.coff[1]) continue; // no ambiguous bases on the insert + n_ovlp = mg_intv_overlap(km, qoff[t+1] - qoff[t], &qintv[qoff[t]], I.coff[0], I.coff[1], &ovlp, &m_ovlp); // test overlapping on the query + if (n_ovlp == 0) fprintf(stderr, "[W::%s] query interval %s:%d-%d is not covered\n", __func__, seq[t].name, I.coff[0], I.coff[1]); + if (n_ovlp != 1) continue; + for (k = is->lc; k <= ie->lc; ++k) { // find other mappings overlapping with the insert on the graph + uint32_t v = gt->lc[k].v, len = g->seg[v>>1].len; + int32_t s = 0, e = len, tmp; + if (k == is->lc) s = is->vo; + if (k == ie->lc) e = ie->vo + (ie->op != 1? ie->len : 0); + if (v&1) tmp = s, s = len - e, e = len - tmp; + if (s == e) { + if (s == 0) ++e; + else --s; + } + n_ovlp = mg_intv_overlap(km, soff[(v>>1)+1] - soff[v>>1], &sintv[soff[v>>1]], s, e, &ovlp, &m_ovlp); + if (n_ovlp == 0) fprintf(stderr, "[W::%s] graph interval %c%s:%d-%d is not covered by %s:%d-%d\n", __func__, "><"[v&1], g->seg[v>>1].name, s, e, seq[t].name, I.coff[0], I.coff[1]); // this should be an assert() + if (n_ovlp != 1) break; + } + if (k <= ie->lc) continue; + if (pd - (I.coff[1] - I.coff[0]) < opt->min_var_len && (I.coff[1] - I.coff[0]) - pd < opt->min_var_len) { // if length difference > min_var_len, just insert + int32_t qd = I.coff[1] - I.coff[0], mlen, blen, score = 0; + l_pseq = mg_path2seq(km, g, gt, ls, le, I.voff, &pseq, &m_pseq); + score = mg_wfa_cmp(km, l_pseq, pseq, qd, &seq[t].seq[I.coff[0]], 5000, &mlen, &blen); + if (score > 0) { + if (mlen > blen * opt->ggs_max_iden) continue; // make sure k-mer identity is small enough + if (blen - mlen < opt->min_var_len * opt->ggs_max_iden) continue; + } else if (!(opt->flag & MG_G_NO_INV)) { + mg_revcomp_seq(l_pseq, pseq); + score = mg_wfa_cmp(km, l_pseq, pseq, qd, &seq[t].seq[I.coff[0]], 5000, &mlen, &blen); + if (score > 0 && mlen > blen * opt->ggs_min_inv_iden) is_inv = 1; + } + } + if (mg_dbg_flag & MG_DBG_INSERT) { + int32_t mlen, blen, score, qd = I.coff[1] - I.coff[0]; + l_pseq = mg_path2seq(km, g, gt, ls, le, I.voff, &pseq, &m_pseq); + fprintf(stderr, "IN\t[%c%s:%d,%c%s:%d|%d] <=> %s:[%d,%d|%d] inv:%d\n", "><"[I.v[0]&1], g->seg[I.v[0]>>1].name, I.voff[0], "><"[I.v[1]&1], g->seg[I.v[1]>>1].name, I.voff[1], pd, seq[t].name, I.coff[0], I.coff[1], I.coff[1] - I.coff[0], is_inv); + fprintf(stderr, "IP\t%s\nIQ\t", pseq); + fwrite(&seq[t].seq[I.coff[0]], 1, qd, stderr); + if (pd - qd < opt->min_var_len && qd - pd < opt->min_var_len) { + score = mg_wfa_cmp(km, l_pseq, pseq, qd, &seq[t].seq[I.coff[0]], 5000, &mlen, &blen); + } else score = -1, mlen = 0, blen = pd > qd? pd : qd; + fprintf(stderr, "\nIS\t%d==%d\tnwcmp:%d\tmlen:%d\tblen:%d\n", pd, l_pseq, score, mlen, blen); + //if (I.voff[0] == 2305301) { for (k = st; k < en; ++k) fprintf(stderr, "%d%c", intv[k].len, "MIDNSHP=XB"[intv[k].op]); fprintf(stderr, "\n"); } + } + if (is_inv) { // turn one inversion to two events + gfa_ins_t I_inv[2]; + I_inv[0].ctg = I_inv[1].ctg = I.ctg; + // the first event + I_inv[0].coff[0] = I_inv[0].coff[1] = I.coff[0]; + I_inv[0].v[0] = I.v[0]; + I_inv[0].voff[0] = I.voff[0]; + I_inv[0].v[1] = I.v[1]^1; + I_inv[0].voff[1] = g->seg[I.v[1]>>1].len - I.voff[1]; + // the second event + I_inv[1].coff[0] = I_inv[1].coff[1] = I.coff[1]; + I_inv[1].v[0] = I.v[0]^1; + I_inv[1].voff[0] = g->seg[I.v[0]>>1].len - I.voff[0]; + I_inv[1].v[1] = I.v[1]; + I_inv[1].voff[1] = I.voff[1]; + // insert + if (n_ins == m_ins) KEXPAND(km, ins, m_ins); + ins[n_ins++] = I_inv[0]; + if (n_ins == m_ins) KEXPAND(km, ins, m_ins); + ins[n_ins++] = I_inv[1]; + ++n_inv; + } else { + if (n_ins == m_ins) KEXPAND(km, ins, m_ins); + ins[n_ins++] = I; + } + } + kfree(0, ss); // this is allocated from malloc() inside mg_mss_all() + kfree(km, intv); + kfree(km, sc); + } + } + kfree(km, pseq); + kfree(km, ovlp); + kfree(km, soff); kfree(km, qoff); + kfree(km, sintv); kfree(km, qintv); + + if (n_ins > 0) { + char **names, **seqs; + KMALLOC(km, names, n_seq); + KMALLOC(km, seqs, n_seq); + for (i = 0; i < n_seq; ++i) + names[i] = seq[i].name, seqs[i] = seq[i].seq; + n_ins = gfa_ins_filter(g, n_ins, ins); + gfa_augment(g, n_ins, ins, n_seq, (const char*const*)names, (const char*const*)seqs); + kfree(km, ins); + kfree(km, names); + kfree(km, seqs); + } + if (mg_verbose >= 3) + fprintf(stderr, "[M::%s::%.3f*%.2f] inserted %d events, including %d inversions\n", __func__, + realtime() - mg_realtime0, cputime() / (realtime() - mg_realtime0), n_ins, n_inv); +} diff --git a/gmap.c b/gmap.c new file mode 100644 index 0000000..91623f5 --- /dev/null +++ b/gmap.c @@ -0,0 +1,211 @@ +#include +#include +#include "kthread.h" +#include "kalloc.h" +#include "bseq.h" +#include "sys.h" +#include "mgpriv.h" +#include "gfa-priv.h" + +typedef struct { + int64_t mini_batch_size; + int n_processed, n_threads, n_fp; + const mg_mapopt_t *opt; + mg_bseq_file_t **fp; + const mg_idx_t *gi; + kstring_t str; + double *c_seg, *c_link; +} pipeline_t; + +typedef struct { + const pipeline_t *p; + int n_seq, n_frag; + mg_bseq1_t *seq; + int *seg_off, *n_seg; + mg_gchains_t **gcs; + mg_tbuf_t **buf; +} step_t; + +static void worker_for(void *_data, long i, int tid) // kt_for() callback +{ + step_t *s = (step_t*)_data; + int qlens[MG_MAX_SEG], j, off = s->seg_off[i], pe_ori = s->p->opt->pe_ori; + const char *qseqs[MG_MAX_SEG]; + mg_tbuf_t *b = s->buf[tid]; + assert(s->n_seg[i] <= MG_MAX_SEG); + if (mg_dbg_flag & MG_DBG_QNAME) + fprintf(stderr, "QR\t%s\t%d\t%d\n", s->seq[off].name, tid, s->seq[off].l_seq); + for (j = 0; j < s->n_seg[i]; ++j) { + if (s->n_seg[i] == 2 && ((j == 0 && (pe_ori>>1&1)) || (j == 1 && (pe_ori&1)))) + mg_revcomp_bseq(&s->seq[off + j]); + qlens[j] = s->seq[off + j].l_seq; + qseqs[j] = s->seq[off + j].seq; + } + if (s->p->opt->flag & MG_M_INDEPEND_SEG) { + for (j = 0; j < s->n_seg[i]; ++j) + mg_map_frag(s->p->gi, 1, &qlens[j], &qseqs[j], &s->gcs[off+j], b, s->p->opt, s->seq[off+j].name); + } else { + mg_map_frag(s->p->gi, s->n_seg[i], qlens, qseqs, &s->gcs[off], b, s->p->opt, s->seq[off].name); + } +#if 0 // for paired-end reads + for (j = 0; j < s->n_seg[i]; ++j) // flip the query strand and coordinate to the original read strand + if (s->n_seg[i] == 2 && ((j == 0 && (pe_ori>>1&1)) || (j == 1 && (pe_ori&1)))) { + int k, t; + mg_revcomp_bseq(&s->seq[off + j]); + for (k = 0; k < s->n_reg[off + j]; ++k) { + mg_lchain_t *r = &s->reg[off + j][k]; + t = r->qs; + r->qs = qlens[j] - r->qe; + r->qe = qlens[j] - t; + r->v ^= 1; + } + } +#endif +} + +static void *worker_pipeline(void *shared, int step, void *in) +{ + int i, j, k; + pipeline_t *p = (pipeline_t*)shared; + if (step == 0) { // step 0: read sequences + int with_qual = !(p->opt->flag & MG_M_NO_QUAL); + int with_comment = !!(p->opt->flag & MG_M_COPY_COMMENT); + int frag_mode = (p->n_fp > 1 || !!(p->opt->flag & MG_M_FRAG_MODE)); + step_t *s; + s = (step_t*)calloc(1, sizeof(step_t)); + if (p->n_fp > 1) s->seq = mg_bseq_read_frag(p->n_fp, p->fp, p->mini_batch_size, with_qual, with_comment, &s->n_seq); + else s->seq = mg_bseq_read(p->fp[0], p->mini_batch_size, with_qual, with_comment, frag_mode, &s->n_seq); + if (s->seq) { + s->p = p; + for (i = 0; i < s->n_seq; ++i) + mg_toupper(s->seq[i].l_seq, s->seq[i].seq); + for (i = 0; i < s->n_seq; ++i) + s->seq[i].rid = p->n_processed++; + s->buf = (mg_tbuf_t**)calloc(p->n_threads, sizeof(mg_tbuf_t*)); + for (i = 0; i < p->n_threads; ++i) + s->buf[i] = mg_tbuf_init(); + s->seg_off = (int*)calloc(2 * s->n_seq, sizeof(int)); + s->n_seg = s->seg_off + s->n_seq; // n_seg, rep_len and frag_gap are allocated together with seg_off + KCALLOC(0, s->gcs, s->n_seq); + for (i = 1, j = 0; i <= s->n_seq; ++i) + if (i == s->n_seq || !frag_mode || !mg_qname_same(s->seq[i-1].name, s->seq[i].name)) { + s->n_seg[s->n_frag] = i - j; + s->seg_off[s->n_frag++] = j; + j = i; + } + return s; + } else free(s); + } else if (step == 1) { // step 1: map + kt_for(p->n_threads, worker_for, in, ((step_t*)in)->n_frag); + return in; + } else if (step == 2) { // step 2: output + void *km = 0; + step_t *s = (step_t*)in; + for (i = 0; i < p->n_threads; ++i) mg_tbuf_destroy(s->buf[i]); + free(s->buf); + if (!(mg_dbg_flag & MG_DBG_NO_KALLOC)) km = km_init(); + for (k = 0; k < s->n_frag; ++k) { + int seg_st = s->seg_off[k], seg_en = s->seg_off[k] + s->n_seg[k]; + if ((p->opt->flag & MG_M_FRAG_MODE) && (p->opt->flag & MG_M_FRAG_MERGE)) { + mg_bseq1_t *t = &s->seq[seg_st]; + int32_t *qlens; + KMALLOC(km, qlens, seg_en - seg_st); // TODO: if this is an issue (quite unlikely), preallocate + for (i = seg_st; i < seg_en; ++i) + qlens[i - seg_st] = s->seq[i].l_seq; + if (p->opt->flag & MG_M_CAL_COV) + mg_cov_map(p->gi->g, s->gcs[seg_st], p->opt->min_cov_mapq, p->opt->min_cov_blen, p->c_seg, p->c_link, t->name); + else mg_write_gaf(&p->str, p->gi->g, s->gcs[seg_st], seg_en - seg_st, qlens, t->name, p->opt->flag, km); + kfree(km, qlens); + if (p->str.l) mg_err_fputs(p->str.s, stdout); + } else { + for (i = seg_st; i < seg_en; ++i) { + mg_bseq1_t *t = &s->seq[i]; + if (p->opt->flag & MG_M_CAL_COV) + mg_cov_map(p->gi->g, s->gcs[i], p->opt->min_cov_mapq, p->opt->min_cov_blen, p->c_seg, p->c_link, t->name); + else mg_write_gaf(&p->str, p->gi->g, s->gcs[i], 1, &t->l_seq, t->name, p->opt->flag, km); + if (p->str.l) mg_err_fputs(p->str.s, stdout); + } + } + for (i = seg_st; i < seg_en; ++i) { + mg_gchain_free(s->gcs[i]); + free(s->seq[i].seq); free(s->seq[i].name); + if (s->seq[i].qual) free(s->seq[i].qual); + if (s->seq[i].comment) free(s->seq[i].comment); + } + } + free(s->gcs); free(s->seg_off); free(s->seq); // n_seg, rep_len and frag_gap were allocated with seg_off; no memory leak here + if (km) km_destroy(km); + if (mg_verbose >= 3) + fprintf(stderr, "[M::%s::%.3f*%.2f] mapped %d sequences\n", __func__, realtime() - mg_realtime0, cputime() / (realtime() - mg_realtime0), s->n_seq); + free(s); + } + return 0; +} + +static mg_bseq_file_t **open_bseqs(int n, const char **fn) +{ + mg_bseq_file_t **fp; + int i, j; + fp = (mg_bseq_file_t**)calloc(n, sizeof(mg_bseq_file_t*)); + for (i = 0; i < n; ++i) { + if ((fp[i] = mg_bseq_open(fn[i])) == 0) { + if (mg_verbose >= 1) + fprintf(stderr, "ERROR: failed to open file '%s'\n", fn[i]); + for (j = 0; j < i; ++j) + mg_bseq_close(fp[j]); + free(fp); + return 0; + } + } + return fp; +} + +int mg_map_file_frag(const mg_idx_t *idx, int n_segs, const char **fn, const mg_mapopt_t *opt, int n_threads, double *c_seg, double *c_link) +{ + int i, pl_threads; + pipeline_t pl; + if (n_segs < 1) return -1; + memset(&pl, 0, sizeof(pipeline_t)); + pl.n_fp = n_segs; + pl.fp = open_bseqs(pl.n_fp, fn); + if (pl.fp == 0) return -1; + pl.opt = opt, pl.gi = idx; + pl.n_threads = n_threads > 1? n_threads : 1; + pl.mini_batch_size = opt->mini_batch_size; + pl.c_seg = c_seg, pl.c_link = c_link; + pl_threads = n_threads == 1? 1 : (opt->flag&MG_M_2_IO_THREADS)? 3 : 2; + kt_pipeline(pl_threads, worker_pipeline, &pl, 3); + + free(pl.str.s); + for (i = 0; i < pl.n_fp; ++i) + mg_bseq_close(pl.fp[i]); + free(pl.fp); + return 0; +} + +int mg_map_files(gfa_t *g, int n_fn, const char **fn, const mg_idxopt_t *ipt, const mg_mapopt_t *opt0, int n_threads) +{ + mg_mapopt_t opt = *opt0; + mg_idx_t *gi; + int i, ret = 0; + double *cov_seg = 0, *cov_link = 0; + if ((gi = mg_index(g, ipt, n_threads, &opt)) == 0) return -1; + if (opt.flag & MG_M_CAL_COV) { + KCALLOC(0, cov_seg, g->n_seg); + KCALLOC(0, cov_link, g->n_arc); + } + if (opt.flag & MG_M_FRAG_MODE) { + ret = mg_map_file_frag(gi, n_fn, fn, &opt, n_threads, cov_seg, cov_link); + } else { + for (i = 0; i < n_fn; ++i) { + ret = mg_map_file_frag(gi, 1, &fn[i], &opt, n_threads, cov_seg, cov_link); + if (ret != 0) break; + } + } + if (opt.flag & MG_M_CAL_COV) { + gfa_aux_update_cv(g, "dc", cov_seg, cov_link); + free(cov_seg); free(cov_link); + } + mg_idx_destroy(gi); + return ret; +} diff --git a/index.c b/index.c new file mode 100644 index 0000000..e94f58f --- /dev/null +++ b/index.c @@ -0,0 +1,230 @@ +#include +#include "mgpriv.h" +#include "khashl.h" +#include "kthread.h" +#include "kvec-km.h" +#include "sys.h" + +#define idx_hash(a) ((a)>>1) +#define idx_eq(a, b) ((a)>>1 == (b)>>1) +KHASHL_MAP_INIT(KH_LOCAL, idxhash_t, mg_hidx, uint64_t, uint64_t, idx_hash, idx_eq) + +typedef struct mg_idx_bucket_s { + mg128_v a; // (minimizer, position) array + int32_t n; // size of the _p_ array + uint64_t *p; // position array for minimizers appearing >1 times + void *h; // hash table indexing _p_ and minimizers appearing once +} mg_idx_bucket_t; + +mg_idx_t *mg_idx_init(int k, int w, int b) +{ + mg_idx_t *gi; + if (k*2 < b) b = k * 2; + if (w < 1) w = 1; + KCALLOC(0, gi, 1); + gi->w = w, gi->k = k, gi->b = b; + KCALLOC(0, gi->B, 1<B) { + for (i = 0; i < 1U<b; ++i) { + free(gi->B[i].p); + free(gi->B[i].a.a); + mg_hidx_destroy((idxhash_t*)gi->B[i].h); + } + free(gi->B); + } + gfa_edseq_destroy(gi->n_seg, gi->es); + free(gi); +} + +/**************** + * Index access * + ****************/ + +const uint64_t *mg_idx_hget(const void *h_, const uint64_t *q, int suflen, uint64_t minier, int *n) +{ + khint_t k; + const idxhash_t *h = (const idxhash_t*)h_; + *n = 0; + if (h == 0) return 0; + k = mg_hidx_get(h, minier>>suflen<<1); + if (k == kh_end(h)) return 0; + if (kh_key(h, k)&1) { // special casing when there is only one k-mer + *n = 1; + return &kh_val(h, k); + } else { + *n = (uint32_t)kh_val(h, k); + return &q[kh_val(h, k)>>32]; + } +} + +const uint64_t *mg_idx_get(const mg_idx_t *gi, uint64_t minier, int *n) +{ + int mask = (1<b) - 1; + mg_idx_bucket_t *b = &gi->B[minier&mask]; + return mg_idx_hget(b->h, b->p, gi->b, minier, n); +} + +void mg_idx_cal_quantile(const mg_idx_t *gi, int32_t m, float f[], int32_t q[]) +{ + int32_t i; + uint64_t n = 0; + khint_t *a, k; + for (i = 0; i < 1<b; ++i) + if (gi->B[i].h) n += kh_size((idxhash_t*)gi->B[i].h); + a = (uint32_t*)malloc(n * 4); + for (i = 0, n = 0; i < 1<b; ++i) { + idxhash_t *h = (idxhash_t*)gi->B[i].h; + if (h == 0) continue; + for (k = 0; k < kh_end(h); ++k) { + if (!kh_exist(h, k)) continue; + a[n++] = kh_key(h, k)&1? 1 : (uint32_t)kh_val(h, k); + } + } + for (i = 0; i < m; ++i) + q[i] = ks_ksmall_uint32_t(n, a, (size_t)((1.0 - (double)f[i]) * n)); + free(a); +} + +/*************** + * Index build * + ***************/ + +static void mg_idx_add(mg_idx_t *gi, int n, const mg128_t *a) +{ + int i, mask = (1<b) - 1; + for (i = 0; i < n; ++i) { + mg128_v *p = &gi->B[a[i].x>>8&mask].a; + kv_push(mg128_t, 0, *p, a[i]); + } +} + +void mg_idx_hfree(void *h_) +{ + idxhash_t *h = (idxhash_t*)h_; + if (h == 0) return; + mg_hidx_destroy(h); +} + +void *mg_idx_a2h(void *km, int32_t n_a, mg128_t *a, int suflen, uint64_t **q_, int32_t *n_) +{ + int32_t N, n, n_keys; + int32_t j, start_a, start_q; + idxhash_t *h; + uint64_t *q; + + *q_ = 0, *n_ = 0; + if (n_a == 0) return 0; + + // sort by minimizer + radix_sort_128x(a, a + n_a); + + // count and preallocate + for (j = 1, n = 1, n_keys = 0, N = 0; j <= n_a; ++j) { + if (j == n_a || a[j].x>>8 != a[j-1].x>>8) { + ++n_keys; + if (n > 1) N += n; + n = 1; + } else ++n; + } + h = mg_hidx_init2(km); + mg_hidx_resize(h, n_keys); + KCALLOC(km, q, N); + *q_ = q, *n_ = N; + + // create the hash table + for (j = 1, n = 1, start_a = start_q = 0; j <= n_a; ++j) { + if (j == n_a || a[j].x>>8 != a[j-1].x>>8) { + khint_t itr; + int absent; + mg128_t *p = &a[j-1]; + itr = mg_hidx_put(h, p->x>>8>>suflen<<1, &absent); + assert(absent && j == start_a + n); + if (n == 1) { + kh_key(h, itr) |= 1; + kh_val(h, itr) = p->y; + } else { + int k; + for (k = 0; k < n; ++k) + q[start_q + k] = a[start_a + k].y; + radix_sort_gfa64(&q[start_q], &q[start_q + n]); // sort by position; needed as in-place radix_sort_128x() is not stable + kh_val(h, itr) = (uint64_t)start_q<<32 | n; + start_q += n; + } + start_a = j, n = 1; + } else ++n; + } + assert(N == start_q); + return h; +} + +static void worker_post(void *g, long i, int tid) +{ + mg_idx_t *gi = (mg_idx_t*)g; + mg_idx_bucket_t *b = &gi->B[i]; + if (b->a.n == 0) return; + b->h = (idxhash_t*)mg_idx_a2h(0, b->a.n, b->a.a, gi->b, &b->p, &b->n); + kfree(0, b->a.a); + b->a.n = b->a.m = 0, b->a.a = 0; +} + +int mg_gfa_overlap(const gfa_t *g) +{ + int64_t i; + for (i = 0; i < g->n_arc; ++i) // non-zero overlap + if (g->arc[i].ov != 0 || g->arc[i].ow != 0) + return 1; + return 0; +} + +mg_idx_t *mg_index_core(gfa_t *g, int k, int w, int b, int n_threads) +{ + mg_idx_t *gi; + mg128_v a = {0,0,0}; + int i; + + if (mg_gfa_overlap(g)) { + if (mg_verbose >= 1) + fprintf(stderr, "[E::%s] minigraph doesn't work with graphs containing overlapping segments\n", __func__); + return 0; + } + gi = mg_idx_init(k, w, b); + gi->g = g; + + for (i = 0; i < g->n_seg; ++i) { + gfa_seg_t *s = &g->seg[i]; + a.n = 0; + mg_sketch(0, s->seq, s->len, w, k, i, &a); // TODO: this can be parallelized + mg_idx_add(gi, a.n, a.a); + } + free(a.a); + kt_for(n_threads, worker_post, gi, 1<b); + return gi; +} + +mg_idx_t *mg_index(gfa_t *g, const mg_idxopt_t *io, int n_threads, mg_mapopt_t *mo) +{ + int32_t i, j; + mg_idx_t *gi; + for (i = 0; i < g->n_seg; ++i) { // uppercase + gfa_seg_t *s = &g->seg[i]; + for (j = 0; j < s->len; ++j) + if (s->seq[j] >= 'a' && s->seq[j] <= 'z') + s->seq[j] -= 32; + } + gi = mg_index_core(g, io->k, io->w, io->bucket_bits, n_threads); + if (gi == 0) return 0; + gi->es = gfa_edseq_init(gi->g); + gi->n_seg = g->n_seg; + if (mg_verbose >= 3) + fprintf(stderr, "[M::%s::%.3f*%.2f] indexed the graph\n", __func__, + realtime() - mg_realtime0, cputime() / (realtime() - mg_realtime0)); + if (mo) mg_opt_update(gi, mo, 0); + return gi; +} diff --git a/kalloc.c b/kalloc.c new file mode 100644 index 0000000..f5de41a --- /dev/null +++ b/kalloc.c @@ -0,0 +1,224 @@ +#include +#include +#include +#include "kalloc.h" + +/* In kalloc, a *core* is a large chunk of contiguous memory. Each core is + * associated with a master header, which keeps the size of the current core + * and the pointer to next core. Kalloc allocates small *blocks* of memory from + * the cores and organizes free memory blocks in a circular single-linked list. + * + * In the following diagram, "@" stands for the header of a free block (of type + * header_t), "#" for the header of an allocated block (of type size_t), "-" + * for free memory, and "+" for allocated memory. + * + * master This region is core 1. master This region is core 2. + * | | + * *@-------#++++++#++++++++++++@-------- *@----------#++++++++++++#+++++++@------------ + * | | | | + * p=p->ptr->ptr->ptr->ptr p->ptr p->ptr->ptr p->ptr->ptr->ptr + */ +typedef struct header_t { + size_t size; + struct header_t *ptr; +} header_t; + +typedef struct { + void *par; + size_t min_core_size; + header_t base, *loop_head, *core_head; /* base is a zero-sized block always kept in the loop */ +} kmem_t; + +static void panic(const char *s) +{ + fprintf(stderr, "%s\n", s); + abort(); +} + +void *km_init2(void *km_par, size_t min_core_size) +{ + kmem_t *km; + km = (kmem_t*)kcalloc(km_par, 1, sizeof(kmem_t)); + km->par = km_par; + if (km_par) km->min_core_size = min_core_size > 0? min_core_size : ((kmem_t*)km_par)->min_core_size - 2; + else km->min_core_size = min_core_size > 0? min_core_size : 0x80000; + return (void*)km; +} + +void *km_init(void) { return km_init2(0, 0); } + +void km_destroy(void *_km) +{ + kmem_t *km = (kmem_t*)_km; + void *km_par; + header_t *p, *q; + if (km == NULL) return; + km_par = km->par; + for (p = km->core_head; p != NULL;) { + q = p->ptr; + kfree(km_par, p); + p = q; + } + kfree(km_par, km); +} + +static header_t *morecore(kmem_t *km, size_t nu) +{ + header_t *q; + size_t bytes, *p; + nu = (nu + 1 + (km->min_core_size - 1)) / km->min_core_size * km->min_core_size; /* the first +1 for core header */ + bytes = nu * sizeof(header_t); + q = (header_t*)kmalloc(km->par, bytes); + if (!q) panic("[morecore] insufficient memory"); + q->ptr = km->core_head, q->size = nu, km->core_head = q; + p = (size_t*)(q + 1); + *p = nu - 1; /* the size of the free block; -1 because the first unit is used for the core header */ + kfree(km, p + 1); /* initialize the new "core"; NB: the core header is not looped. */ + return km->loop_head; +} + +void kfree(void *_km, void *ap) /* kfree() also adds a new core to the circular list */ +{ + header_t *p, *q; + kmem_t *km = (kmem_t*)_km; + + if (!ap) return; + if (km == NULL) { + free(ap); + return; + } + p = (header_t*)((size_t*)ap - 1); + p->size = *((size_t*)ap - 1); + /* Find the pointer that points to the block to be freed. The following loop can stop on two conditions: + * + * a) "p>q && pptr": @------#++++++++#+++++++@------- @---------------#+++++++@------- + * (can also be in | | | -> | | + * two cores) q p q->ptr q q->ptr + * + * @-------- #+++++++++@-------- @-------- @------------------ + * | | | -> | | + * q p q->ptr q q->ptr + * + * b) "q>=q->ptr && (p>q || pptr)": @-------#+++++ @--------#+++++++ @-------#+++++ @---------------- + * | | | -> | | + * q->ptr q p q->ptr q + * + * #+++++++@----- #++++++++@------- @------------- #++++++++@------- + * | | | -> | | + * p q->ptr q q->ptr q + */ + for (q = km->loop_head; !(p > q && p < q->ptr); q = q->ptr) + if (q >= q->ptr && (p > q || p < q->ptr)) break; + if (p + p->size == q->ptr) { /* two adjacent blocks, merge p and q->ptr (the 2nd and 4th cases) */ + p->size += q->ptr->size; + p->ptr = q->ptr->ptr; + } else if (p + p->size > q->ptr && q->ptr >= p) { + panic("[kfree] The end of the allocated block enters a free block."); + } else p->ptr = q->ptr; /* backup q->ptr */ + + if (q + q->size == p) { /* two adjacent blocks, merge q and p (the other two cases) */ + q->size += p->size; + q->ptr = p->ptr; + km->loop_head = q; + } else if (q + q->size > p && p >= q) { + panic("[kfree] The end of a free block enters the allocated block."); + } else km->loop_head = p, q->ptr = p; /* in two cores, cannot be merged; create a new block in the list */ +} + +void *kmalloc(void *_km, size_t n_bytes) +{ + kmem_t *km = (kmem_t*)_km; + size_t n_units; + header_t *p, *q; + + if (n_bytes == 0) return 0; + if (km == NULL) return malloc(n_bytes); + n_units = (n_bytes + sizeof(size_t) + sizeof(header_t) - 1) / sizeof(header_t); /* header+n_bytes requires at least this number of units */ + + if (!(q = km->loop_head)) /* the first time when kmalloc() is called, intialize it */ + q = km->loop_head = km->base.ptr = &km->base; + for (p = q->ptr;; q = p, p = p->ptr) { /* search for a suitable block */ + if (p->size >= n_units) { /* p->size if the size of current block. This line means the current block is large enough. */ + if (p->size == n_units) q->ptr = p->ptr; /* no need to split the block */ + else { /* split the block. NB: memory is allocated at the end of the block! */ + p->size -= n_units; /* reduce the size of the free block */ + p += p->size; /* p points to the allocated block */ + *(size_t*)p = n_units; /* set the size */ + } + km->loop_head = q; /* set the end of chain */ + return (size_t*)p + 1; + } + if (p == km->loop_head) { /* then ask for more "cores" */ + if ((p = morecore(km, n_units)) == 0) return 0; + } + } +} + +void *kcalloc(void *_km, size_t count, size_t size) +{ + kmem_t *km = (kmem_t*)_km; + void *p; + if (size == 0 || count == 0) return 0; + if (km == NULL) return calloc(count, size); + p = kmalloc(km, count * size); + memset(p, 0, count * size); + return p; +} + +void *krealloc(void *_km, void *ap, size_t n_bytes) // TODO: this can be made more efficient in principle +{ + kmem_t *km = (kmem_t*)_km; + size_t cap, *p, *q; + + if (n_bytes == 0) { + kfree(km, ap); return 0; + } + if (km == NULL) return realloc(ap, n_bytes); + if (ap == NULL) return kmalloc(km, n_bytes); + p = (size_t*)ap - 1; + cap = (*p) * sizeof(header_t) - sizeof(size_t); + if (cap >= n_bytes) return ap; /* TODO: this prevents shrinking */ + q = (size_t*)kmalloc(km, n_bytes); + memcpy(q, ap, cap); + kfree(km, ap); + return q; +} + +void *krelocate(void *km, void *ap, size_t n_bytes) +{ + void *p; + if (km == 0 || ap == 0) return ap; + p = kmalloc(km, n_bytes); + memcpy(p, ap, n_bytes); + kfree(km, ap); + return p; +} + +void km_stat(const void *_km, km_stat_t *s) +{ + kmem_t *km = (kmem_t*)_km; + header_t *p; + memset(s, 0, sizeof(km_stat_t)); + if (km == NULL || km->loop_head == NULL) return; + for (p = km->loop_head;; p = p->ptr) { + s->available += p->size * sizeof(header_t); + if (p->size != 0) ++s->n_blocks; /* &kmem_t::base is always one of the cores. It is zero-sized. */ + if (p->ptr > p && p + p->size > p->ptr) + panic("[km_stat] The end of a free block enters another free block."); + if (p->ptr == km->loop_head) break; + } + for (p = km->core_head; p != NULL; p = p->ptr) { + size_t size = p->size * sizeof(header_t); + ++s->n_cores; + s->capacity += size; + s->largest = s->largest > size? s->largest : size; + } +} + +void km_stat_print(const void *km) +{ + km_stat_t st; + km_stat(km, &st); + fprintf(stderr, "[km_stat] cap=%ld, avail=%ld, largest=%ld, n_core=%ld, n_block=%ld\n", + st.capacity, st.available, st.largest, st.n_blocks, st.n_cores); +} diff --git a/kalloc.h b/kalloc.h new file mode 100644 index 0000000..8cbfbd9 --- /dev/null +++ b/kalloc.h @@ -0,0 +1,82 @@ +#ifndef _KALLOC_H_ +#define _KALLOC_H_ + +#include /* for size_t */ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + size_t capacity, available, n_blocks, n_cores, largest; +} km_stat_t; + +void *kmalloc(void *km, size_t size); +void *krealloc(void *km, void *ptr, size_t size); +void *krelocate(void *km, void *ap, size_t n_bytes); +void *kcalloc(void *km, size_t count, size_t size); +void kfree(void *km, void *ptr); + +void *km_init(void); +void *km_init2(void *km_par, size_t min_core_size); +void km_destroy(void *km); +void km_stat(const void *_km, km_stat_t *s); +void km_stat_print(const void *km); + +#ifdef __cplusplus +} +#endif + +#define Kmalloc(km, type, cnt) ((type*)kmalloc((km), (cnt) * sizeof(type))) +#define Kcalloc(km, type, cnt) ((type*)kcalloc((km), (cnt), sizeof(type))) +#define Krealloc(km, type, ptr, cnt) ((type*)krealloc((km), (ptr), (cnt) * sizeof(type))) + +#define KMALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))kmalloc((km), (len) * sizeof(*(ptr)))) +#define KCALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))kcalloc((km), (len), sizeof(*(ptr)))) +#define KREALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))krealloc((km), (ptr), (len) * sizeof(*(ptr)))) + +#define KEXPAND(km, a, m) do { \ + (m) = (m) >= 4? (m) + ((m)>>1) : 16; \ + KREALLOC((km), (a), (m)); \ + } while (0) + +#ifndef klib_unused +#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3) +#define klib_unused __attribute__ ((__unused__)) +#else +#define klib_unused +#endif +#endif /* klib_unused */ + +#define KALLOC_POOL_INIT2(SCOPE, name, kmptype_t) \ + typedef struct { \ + size_t cnt, n, max; \ + kmptype_t **buf; \ + void *km; \ + } kmp_##name##_t; \ + SCOPE kmp_##name##_t *kmp_init_##name(void *km) { \ + kmp_##name##_t *mp; \ + KCALLOC(km, mp, 1); \ + mp->km = km; \ + return mp; \ + } \ + SCOPE void kmp_destroy_##name(kmp_##name##_t *mp) { \ + size_t k; \ + for (k = 0; k < mp->n; ++k) kfree(mp->km, mp->buf[k]); \ + kfree(mp->km, mp->buf); kfree(mp->km, mp); \ + } \ + SCOPE kmptype_t *kmp_alloc_##name(kmp_##name##_t *mp) { \ + ++mp->cnt; \ + if (mp->n == 0) return (kmptype_t*)kcalloc(mp->km, 1, sizeof(kmptype_t)); \ + return mp->buf[--mp->n]; \ + } \ + SCOPE void kmp_free_##name(kmp_##name##_t *mp, kmptype_t *p) { \ + --mp->cnt; \ + if (mp->n == mp->max) KEXPAND(mp->km, mp->buf, mp->max); \ + mp->buf[mp->n++] = p; \ + } + +#define KALLOC_POOL_INIT(name, kmptype_t) \ + KALLOC_POOL_INIT2(static inline klib_unused, name, kmptype_t) + +#endif diff --git a/kavl.h b/kavl.h new file mode 100644 index 0000000..e0a8e1b --- /dev/null +++ b/kavl.h @@ -0,0 +1,414 @@ +/* The MIT License + + Copyright (c) 2018 by Attractive Chaos + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +/* An example: + +#include +#include +#include +#include "kavl.h" + +struct my_node { + char key; + KAVL_HEAD(struct my_node) head; +}; +#define my_cmp(p, q) (((q)->key < (p)->key) - ((p)->key < (q)->key)) +KAVL_INIT(my, struct my_node, head, my_cmp) + +int main(void) { + const char *str = "MNOLKQOPHIA"; // from wiki, except a duplicate + struct my_node *root = 0; + int i, l = strlen(str); + for (i = 0; i < l; ++i) { // insert in the input order + struct my_node *q, *p = malloc(sizeof(*p)); + p->key = str[i]; + q = kavl_insert(my, &root, p, 0); + if (p != q) free(p); // if already present, free + } + kavl_itr_t(my) itr; + kavl_itr_first(my, root, &itr); // place at first + do { // traverse + const struct my_node *p = kavl_at(&itr); + putchar(p->key); + free((void*)p); // free node + } while (kavl_itr_next(my, &itr)); + putchar('\n'); + return 0; +} +*/ + +#ifndef KAVL_H +#define KAVL_H + +#ifdef __STRICT_ANSI__ +#define inline __inline__ +#endif + +#define KAVL_MAX_DEPTH 64 + +#define kavl_size(head, p) ((p)? (p)->head.size : 0) +#define kavl_size_child(head, q, i) ((q)->head.p[(i)]? (q)->head.p[(i)]->head.size : 0) + +#define KAVL_HEAD(__type) \ + struct { \ + __type *p[2]; \ + signed char balance; /* balance factor */ \ + unsigned size; /* #elements in subtree */ \ + } + +#define __KAVL_FIND(suf, __scope, __type, __head, __cmp) \ + __scope __type *kavl_find_##suf(const __type *root, const __type *x, unsigned *cnt_) { \ + const __type *p = root; \ + unsigned cnt = 0; \ + while (p != 0) { \ + int cmp; \ + cmp = __cmp(x, p); \ + if (cmp >= 0) cnt += kavl_size_child(__head, p, 0) + 1; \ + if (cmp < 0) p = p->__head.p[0]; \ + else if (cmp > 0) p = p->__head.p[1]; \ + else break; \ + } \ + if (cnt_) *cnt_ = cnt; \ + return (__type*)p; \ + } \ + __scope __type *kavl_interval_##suf(const __type *root, const __type *x, __type **lower, __type **upper) { \ + const __type *p = root, *l = 0, *u = 0; \ + while (p != 0) { \ + int cmp; \ + cmp = __cmp(x, p); \ + if (cmp < 0) u = p, p = p->__head.p[0]; \ + else if (cmp > 0) l = p, p = p->__head.p[1]; \ + else { l = u = p; break; } \ + } \ + if (lower) *lower = (__type*)l; \ + if (upper) *upper = (__type*)u; \ + return (__type*)p; \ + } + +#define __KAVL_ROTATE(suf, __type, __head) \ + /* one rotation: (a,(b,c)q)p => ((a,b)p,c)q */ \ + static inline __type *kavl_rotate1_##suf(__type *p, int dir) { /* dir=0 to left; dir=1 to right */ \ + int opp = 1 - dir; /* opposite direction */ \ + __type *q = p->__head.p[opp]; \ + unsigned size_p = p->__head.size; \ + p->__head.size -= q->__head.size - kavl_size_child(__head, q, dir); \ + q->__head.size = size_p; \ + p->__head.p[opp] = q->__head.p[dir]; \ + q->__head.p[dir] = p; \ + return q; \ + } \ + /* two consecutive rotations: (a,((b,c)r,d)q)p => ((a,b)p,(c,d)q)r */ \ + static inline __type *kavl_rotate2_##suf(__type *p, int dir) { \ + int b1, opp = 1 - dir; \ + __type *q = p->__head.p[opp], *r = q->__head.p[dir]; \ + unsigned size_x_dir = kavl_size_child(__head, r, dir); \ + r->__head.size = p->__head.size; \ + p->__head.size -= q->__head.size - size_x_dir; \ + q->__head.size -= size_x_dir + 1; \ + p->__head.p[opp] = r->__head.p[dir]; \ + r->__head.p[dir] = p; \ + q->__head.p[dir] = r->__head.p[opp]; \ + r->__head.p[opp] = q; \ + b1 = dir == 0? +1 : -1; \ + if (r->__head.balance == b1) q->__head.balance = 0, p->__head.balance = -b1; \ + else if (r->__head.balance == 0) q->__head.balance = p->__head.balance = 0; \ + else q->__head.balance = b1, p->__head.balance = 0; \ + r->__head.balance = 0; \ + return r; \ + } + +#define __KAVL_INSERT(suf, __scope, __type, __head, __cmp) \ + __scope __type *kavl_insert_##suf(__type **root_, __type *x, unsigned *cnt_) { \ + unsigned char stack[KAVL_MAX_DEPTH]; \ + __type *path[KAVL_MAX_DEPTH]; \ + __type *bp, *bq; \ + __type *p, *q, *r = 0; /* _r_ is potentially the new root */ \ + int i, which = 0, top, b1, path_len; \ + unsigned cnt = 0; \ + bp = *root_, bq = 0; \ + /* find the insertion location */ \ + for (p = bp, q = bq, top = path_len = 0; p; q = p, p = p->__head.p[which]) { \ + int cmp; \ + cmp = __cmp(x, p); \ + if (cmp >= 0) cnt += kavl_size_child(__head, p, 0) + 1; \ + if (cmp == 0) { \ + if (cnt_) *cnt_ = cnt; \ + return p; \ + } \ + if (p->__head.balance != 0) \ + bq = q, bp = p, top = 0; \ + stack[top++] = which = (cmp > 0); \ + path[path_len++] = p; \ + } \ + if (cnt_) *cnt_ = cnt; \ + x->__head.balance = 0, x->__head.size = 1, x->__head.p[0] = x->__head.p[1] = 0; \ + if (q == 0) *root_ = x; \ + else q->__head.p[which] = x; \ + if (bp == 0) return x; \ + for (i = 0; i < path_len; ++i) ++path[i]->__head.size; \ + for (p = bp, top = 0; p != x; p = p->__head.p[stack[top]], ++top) /* update balance factors */ \ + if (stack[top] == 0) --p->__head.balance; \ + else ++p->__head.balance; \ + if (bp->__head.balance > -2 && bp->__head.balance < 2) return x; /* no re-balance needed */ \ + /* re-balance */ \ + which = (bp->__head.balance < 0); \ + b1 = which == 0? +1 : -1; \ + q = bp->__head.p[1 - which]; \ + if (q->__head.balance == b1) { \ + r = kavl_rotate1_##suf(bp, which); \ + q->__head.balance = bp->__head.balance = 0; \ + } else r = kavl_rotate2_##suf(bp, which); \ + if (bq == 0) *root_ = r; \ + else bq->__head.p[bp != bq->__head.p[0]] = r; \ + return x; \ + } + +#define __KAVL_ERASE(suf, __scope, __type, __head, __cmp) \ + __scope __type *kavl_erase_##suf(__type **root_, const __type *x, unsigned *cnt_) { \ + __type *p, *path[KAVL_MAX_DEPTH], fake; \ + unsigned char dir[KAVL_MAX_DEPTH]; \ + int i, d = 0, cmp; \ + unsigned cnt = 0; \ + fake.__head.p[0] = *root_, fake.__head.p[1] = 0; \ + if (cnt_) *cnt_ = 0; \ + if (x) { \ + for (cmp = -1, p = &fake; cmp; cmp = __cmp(x, p)) { \ + int which = (cmp > 0); \ + if (cmp > 0) cnt += kavl_size_child(__head, p, 0) + 1; \ + dir[d] = which; \ + path[d++] = p; \ + p = p->__head.p[which]; \ + if (p == 0) { \ + if (cnt_) *cnt_ = 0; \ + return 0; \ + } \ + } \ + cnt += kavl_size_child(__head, p, 0) + 1; /* because p==x is not counted */ \ + } else { \ + for (p = &fake, cnt = 1; p; p = p->__head.p[0]) \ + dir[d] = 0, path[d++] = p; \ + p = path[--d]; \ + } \ + if (cnt_) *cnt_ = cnt; \ + for (i = 1; i < d; ++i) --path[i]->__head.size; \ + if (p->__head.p[1] == 0) { /* ((1,.)2,3)4 => (1,3)4; p=2 */ \ + path[d-1]->__head.p[dir[d-1]] = p->__head.p[0]; \ + } else { \ + __type *q = p->__head.p[1]; \ + if (q->__head.p[0] == 0) { /* ((1,2)3,4)5 => ((1)2,4)5; p=3 */ \ + q->__head.p[0] = p->__head.p[0]; \ + q->__head.balance = p->__head.balance; \ + path[d-1]->__head.p[dir[d-1]] = q; \ + path[d] = q, dir[d++] = 1; \ + q->__head.size = p->__head.size - 1; \ + } else { /* ((1,((.,2)3,4)5)6,7)8 => ((1,(2,4)5)3,7)8; p=6 */ \ + __type *r; \ + int e = d++; /* backup _d_ */\ + for (;;) { \ + dir[d] = 0; \ + path[d++] = q; \ + r = q->__head.p[0]; \ + if (r->__head.p[0] == 0) break; \ + q = r; \ + } \ + r->__head.p[0] = p->__head.p[0]; \ + q->__head.p[0] = r->__head.p[1]; \ + r->__head.p[1] = p->__head.p[1]; \ + r->__head.balance = p->__head.balance; \ + path[e-1]->__head.p[dir[e-1]] = r; \ + path[e] = r, dir[e] = 1; \ + for (i = e + 1; i < d; ++i) --path[i]->__head.size; \ + r->__head.size = p->__head.size - 1; \ + } \ + } \ + while (--d > 0) { \ + __type *q = path[d]; \ + int which, other, b1 = 1, b2 = 2; \ + which = dir[d], other = 1 - which; \ + if (which) b1 = -b1, b2 = -b2; \ + q->__head.balance += b1; \ + if (q->__head.balance == b1) break; \ + else if (q->__head.balance == b2) { \ + __type *r = q->__head.p[other]; \ + if (r->__head.balance == -b1) { \ + path[d-1]->__head.p[dir[d-1]] = kavl_rotate2_##suf(q, which); \ + } else { \ + path[d-1]->__head.p[dir[d-1]] = kavl_rotate1_##suf(q, which); \ + if (r->__head.balance == 0) { \ + r->__head.balance = -b1; \ + q->__head.balance = b1; \ + break; \ + } else r->__head.balance = q->__head.balance = 0; \ + } \ + } \ + } \ + *root_ = fake.__head.p[0]; \ + return p; \ + } + +#define kavl_free(__type, __head, __root, __free) do { \ + __type *_p, *_q; \ + for (_p = __root; _p; _p = _q) { \ + if (_p->__head.p[0] == 0) { \ + _q = _p->__head.p[1]; \ + __free(_p); \ + } else { \ + _q = _p->__head.p[0]; \ + _p->__head.p[0] = _q->__head.p[1]; \ + _q->__head.p[1] = _p; \ + } \ + } \ + } while (0) + +#define __KAVL_ITR(suf, __scope, __type, __head, __cmp) \ + struct kavl_itr_##suf { \ + const __type *stack[KAVL_MAX_DEPTH], **top; \ + }; \ + __scope void kavl_itr_first_##suf(const __type *root, struct kavl_itr_##suf *itr) { \ + const __type *p; \ + for (itr->top = itr->stack - 1, p = root; p; p = p->__head.p[0]) \ + *++itr->top = p; \ + } \ + __scope int kavl_itr_find_##suf(const __type *root, const __type *x, struct kavl_itr_##suf *itr) { \ + const __type *p = root; \ + itr->top = itr->stack - 1; \ + while (p != 0) { \ + int cmp; \ + *++itr->top = p; \ + cmp = __cmp(x, p); \ + if (cmp < 0) p = p->__head.p[0]; \ + else if (cmp > 0) p = p->__head.p[1]; \ + else break; \ + } \ + return p? 1 : 0; \ + } \ + __scope int kavl_itr_next_bidir_##suf(struct kavl_itr_##suf *itr, int dir) { \ + const __type *p; \ + if (itr->top < itr->stack) return 0; \ + dir = !!dir; \ + p = (*itr->top)->__head.p[dir]; \ + if (p) { /* go down */ \ + for (; p; p = p->__head.p[!dir]) \ + *++itr->top = p; \ + return 1; \ + } else { /* go up */ \ + do { \ + p = *itr->top--; \ + } while (itr->top >= itr->stack && p == (*itr->top)->__head.p[dir]); \ + return itr->top < itr->stack? 0 : 1; \ + } \ + } \ + +/** + * Insert a node to the tree + * + * @param suf name suffix used in KAVL_INIT() + * @param proot pointer to the root of the tree (in/out: root may change) + * @param x node to insert (in) + * @param cnt number of nodes smaller than or equal to _x_; can be NULL (out) + * + * @return _x_ if not present in the tree, or the node equal to x. + */ +#define kavl_insert(suf, proot, x, cnt) kavl_insert_##suf(proot, x, cnt) + +/** + * Find a node in the tree + * + * @param suf name suffix used in KAVL_INIT() + * @param root root of the tree + * @param x node value to find (in) + * @param cnt number of nodes smaller than or equal to _x_; can be NULL (out) + * + * @return node equal to _x_ if present, or NULL if absent + */ +#define kavl_find(suf, root, x, cnt) kavl_find_##suf(root, x, cnt) +#define kavl_interval(suf, root, x, lower, upper) kavl_interval_##suf(root, x, lower, upper) + +/** + * Delete a node from the tree + * + * @param suf name suffix used in KAVL_INIT() + * @param proot pointer to the root of the tree (in/out: root may change) + * @param x node value to delete; if NULL, delete the first node (in) + * + * @return node removed from the tree if present, or NULL if absent + */ +#define kavl_erase(suf, proot, x, cnt) kavl_erase_##suf(proot, x, cnt) +#define kavl_erase_first(suf, proot) kavl_erase_##suf(proot, 0, 0) + +#define kavl_itr_t(suf) struct kavl_itr_##suf + +/** + * Place the iterator at the smallest object + * + * @param suf name suffix used in KAVL_INIT() + * @param root root of the tree + * @param itr iterator + */ +#define kavl_itr_first(suf, root, itr) kavl_itr_first_##suf(root, itr) + +/** + * Place the iterator at the object equal to or greater than the query + * + * @param suf name suffix used in KAVL_INIT() + * @param root root of the tree + * @param x query (in) + * @param itr iterator (out) + * + * @return 1 if find; 0 otherwise. kavl_at(itr) is NULL if and only if query is + * larger than all objects in the tree + */ +#define kavl_itr_find(suf, root, x, itr) kavl_itr_find_##suf(root, x, itr) + +/** + * Move to the next object in order + * + * @param itr iterator (modified) + * + * @return 1 if there is a next object; 0 otherwise + */ +#define kavl_itr_next(suf, itr) kavl_itr_next_bidir_##suf(itr, 1) +#define kavl_itr_prev(suf, itr) kavl_itr_next_bidir_##suf(itr, 0) + +/** + * Return the pointer at the iterator + * + * @param itr iterator + * + * @return pointer if present; NULL otherwise + */ +#define kavl_at(itr) ((itr)->top < (itr)->stack? 0 : *(itr)->top) + +#define KAVL_INIT2(suf, __scope, __type, __head, __cmp) \ + __KAVL_FIND(suf, __scope, __type, __head, __cmp) \ + __KAVL_ROTATE(suf, __type, __head) \ + __KAVL_INSERT(suf, __scope, __type, __head, __cmp) \ + __KAVL_ERASE(suf, __scope, __type, __head, __cmp) \ + __KAVL_ITR(suf, __scope, __type, __head, __cmp) + +#define KAVL_INIT(suf, __type, __head, __cmp) \ + KAVL_INIT2(suf,, __type, __head, __cmp) + +#endif diff --git a/kdq.h b/kdq.h new file mode 100644 index 0000000..c43944b --- /dev/null +++ b/kdq.h @@ -0,0 +1,134 @@ +#ifndef __AC_KDQ_H +#define __AC_KDQ_H + +#include +#include +#include +#include "kalloc.h" + +#define __KDQ_TYPE(type) \ + typedef struct { \ + uint64_t front:58, bits:6, count, mask; \ + type *a; \ + void *km; \ + } kdq_##type##_t; + +#define kdq_t(type) kdq_##type##_t +#define kdq_size(q) ((q)->count) +#define kdq_first(q) ((q)->a[(q)->front]) +#define kdq_last(q) ((q)->a[((q)->front + (q)->count - 1) & (q)->mask]) +#define kdq_at(q, i) ((q)->a[((q)->front + (i)) & (q)->mask]) + +#define __KDQ_IMPL(type, SCOPE) \ + SCOPE kdq_##type##_t *kdq_init2_##type(void *km, int32_t bits) \ + { \ + kdq_##type##_t *q; \ + q = (kdq_##type##_t*)kcalloc(km, 1, sizeof(kdq_##type##_t)); \ + q->bits = bits, q->mask = (1ULL<bits) - 1; \ + q->a = (type*)kmalloc(km, (1<bits) * sizeof(type)); \ + q->km = km; \ + return q; \ + } \ + SCOPE kdq_##type##_t *kdq_init_##type(void *km) { return kdq_init2_##type(km, 2); } \ + SCOPE void kdq_destroy_##type(kdq_##type##_t *q) \ + { \ + if (q == 0) return; \ + kfree(q->km, q->a); kfree(q->km, q); \ + } \ + SCOPE int kdq_resize_##type(kdq_##type##_t *q, int new_bits) \ + { \ + size_t new_size = 1ULL<bits; \ + if (new_size < q->count) { /* not big enough */ \ + int i; \ + for (i = 0; i < 64; ++i) \ + if (1ULL< q->count) break; \ + new_bits = i, new_size = 1ULL<bits) return q->bits; /* unchanged */ \ + if (new_bits > q->bits) q->a = (type*)krealloc(q->km, q->a, (1ULL<front + q->count <= old_size) { /* unwrapped */ \ + if (q->front + q->count > new_size) /* only happens for shrinking */ \ + memmove(q->a, q->a + new_size, (q->front + q->count - new_size) * sizeof(type)); \ + } else { /* wrapped */ \ + memmove(q->a + (new_size - (old_size - q->front)), q->a + q->front, (old_size - q->front) * sizeof(type)); \ + q->front = new_size - (old_size - q->front); \ + } \ + q->bits = new_bits, q->mask = (1ULL<bits) - 1; \ + if (new_bits < q->bits) q->a = (type*)krealloc(q->km, q->a, (1ULL<bits; \ + } \ + SCOPE type *kdq_pushp_##type(kdq_##type##_t *q) \ + { \ + if (q->count == 1ULL<bits) kdq_resize_##type(q, q->bits + 1); \ + return &q->a[((q->count++) + q->front) & (q)->mask]; \ + } \ + SCOPE void kdq_push_##type(kdq_##type##_t *q, type v) \ + { \ + if (q->count == 1ULL<bits) kdq_resize_##type(q, q->bits + 1); \ + q->a[((q->count++) + q->front) & (q)->mask] = v; \ + } \ + SCOPE type *kdq_unshiftp_##type(kdq_##type##_t *q) \ + { \ + if (q->count == 1ULL<bits) kdq_resize_##type(q, q->bits + 1); \ + ++q->count; \ + q->front = q->front? q->front - 1 : (1ULL<bits) - 1; \ + return &q->a[q->front]; \ + } \ + SCOPE void kdq_unshift_##type(kdq_##type##_t *q, type v) \ + { \ + type *p; \ + p = kdq_unshiftp_##type(q); \ + *p = v; \ + } \ + SCOPE type *kdq_pop_##type(kdq_##type##_t *q) \ + { \ + return q->count? &q->a[((--q->count) + q->front) & q->mask] : 0; \ + } \ + SCOPE type *kdq_shift_##type(kdq_##type##_t *q) \ + { \ + type *d = 0; \ + if (q->count == 0) return 0; \ + d = &q->a[q->front++]; \ + q->front &= q->mask; \ + --q->count; \ + return d; \ + } + +#define KDQ_INIT2(type, SCOPE) \ + __KDQ_TYPE(type) \ + __KDQ_IMPL(type, SCOPE) + +#ifndef klib_unused +#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3) +#define klib_unused __attribute__ ((__unused__)) +#else +#define klib_unused +#endif +#endif /* klib_unused */ + +#define KDQ_INIT(type) KDQ_INIT2(type, static inline klib_unused) + +#define KDQ_DECLARE(type) \ + __KDQ_TYPE(type) \ + kdq_##type##_t *kdq_init_##type(); \ + void kdq_destroy_##type(kdq_##type##_t *q); \ + int kdq_resize_##type(kdq_##type##_t *q, int new_bits); \ + type *kdq_pushp_##type(kdq_##type##_t *q); \ + void kdq_push_##type(kdq_##type##_t *q, type v); \ + type *kdq_unshiftp_##type(kdq_##type##_t *q); \ + void kdq_unshift_##type(kdq_##type##_t *q, type v); \ + type *kdq_pop_##type(kdq_##type##_t *q); \ + type *kdq_shift_##type(kdq_##type##_t *q); + +#define kdq_init2(type, km, bits) kdq_init2_##type(km, bits) +#define kdq_init(type, km) kdq_init_##type(km) +#define kdq_destroy(type, q) kdq_destroy_##type(q) +#define kdq_resize(type, q, new_bits) kdq_resize_##type(q, new_bits) +#define kdq_pushp(type, q) kdq_pushp_##type(q) +#define kdq_push(type, q, v) kdq_push_##type(q, v) +#define kdq_pop(type, q) kdq_pop_##type(q) +#define kdq_unshiftp(type, q) kdq_unshiftp_##type(q) +#define kdq_unshift(type, q, v) kdq_unshift_##type(q, v) +#define kdq_shift(type, q) kdq_shift_##type(q) + +#endif diff --git a/ketopt.h b/ketopt.h new file mode 100644 index 0000000..70193a5 --- /dev/null +++ b/ketopt.h @@ -0,0 +1,116 @@ +#ifndef KETOPT_H +#define KETOPT_H + +#include /* for strchr() and strncmp() */ + +#define ko_no_argument 0 +#define ko_required_argument 1 +#define ko_optional_argument 2 + +typedef struct { + int ind; /* equivalent to optind */ + int opt; /* equivalent to optopt */ + char *arg; /* equivalent to optarg */ + int longidx; /* index of a long option; or -1 if short */ + /* private variables not intended for external uses */ + int i, pos, n_args; +} ketopt_t; + +typedef struct { + char *name; + int has_arg; + int val; +} ko_longopt_t; + +static ketopt_t KETOPT_INIT = { 1, 0, 0, -1, 1, 0, 0 }; + +static void ketopt_permute(char *argv[], int j, int n) /* move argv[j] over n elements to the left */ +{ + int k; + char *p = argv[j]; + for (k = 0; k < n; ++k) + argv[j - k] = argv[j - k - 1]; + argv[j - k] = p; +} + +/** + * Parse command-line options and arguments + * + * This fuction has a similar interface to GNU's getopt_long(). Each call + * parses one option and returns the option name. s->arg points to the option + * argument if present. The function returns -1 when all command-line arguments + * are parsed. In this case, s->ind is the index of the first non-option + * argument. + * + * @param s status; shall be initialized to KETOPT_INIT on the first call + * @param argc length of argv[] + * @param argv list of command-line arguments; argv[0] is ignored + * @param permute non-zero to move options ahead of non-option arguments + * @param ostr option string + * @param longopts long options + * + * @return ASCII for a short option; ko_longopt_t::val for a long option; -1 if + * argv[] is fully processed; '?' for an unknown option or an ambiguous + * long option; ':' if an option argument is missing + */ +static int ketopt(ketopt_t *s, int argc, char *argv[], int permute, const char *ostr, const ko_longopt_t *longopts) +{ + int opt = -1, i0, j; + if (permute) { + while (s->i < argc && (argv[s->i][0] != '-' || argv[s->i][1] == '\0')) + ++s->i, ++s->n_args; + } + s->arg = 0, s->longidx = -1, i0 = s->i; + if (s->i >= argc || argv[s->i][0] != '-' || argv[s->i][1] == '\0') { + s->ind = s->i - s->n_args; + return -1; + } + if (argv[s->i][0] == '-' && argv[s->i][1] == '-') { /* "--" or a long option */ + if (argv[s->i][2] == '\0') { /* a bare "--" */ + ketopt_permute(argv, s->i, s->n_args); + ++s->i, s->ind = s->i - s->n_args; + return -1; + } + s->opt = 0, opt = '?', s->pos = -1; + if (longopts) { /* parse long options */ + int k, n_matches = 0; + const ko_longopt_t *o = 0; + for (j = 2; argv[s->i][j] != '\0' && argv[s->i][j] != '='; ++j) {} /* find the end of the option name */ + for (k = 0; longopts[k].name != 0; ++k) + if (strncmp(&argv[s->i][2], longopts[k].name, j - 2) == 0) + ++n_matches, o = &longopts[k]; + if (n_matches == 1) { + s->opt = opt = o->val, s->longidx = o - longopts; + if (argv[s->i][j] == '=') s->arg = &argv[s->i][j + 1]; + if (o->has_arg == 1 && argv[s->i][j] == '\0') { + if (s->i < argc - 1) s->arg = argv[++s->i]; + else opt = ':'; /* missing option argument */ + } + } + } + } else { /* a short option */ + char *p; + if (s->pos == 0) s->pos = 1; + opt = s->opt = argv[s->i][s->pos++]; + p = strchr((char*)ostr, opt); + if (p == 0) { + opt = '?'; /* unknown option */ + } else if (p[1] == ':') { + if (argv[s->i][s->pos] == 0) { + if (s->i < argc - 1) s->arg = argv[++s->i]; + else opt = ':'; /* missing option argument */ + } else s->arg = &argv[s->i][s->pos]; + s->pos = -1; + } + } + if (s->pos < 0 || argv[s->i][s->pos] == 0) { + ++s->i, s->pos = 0; + if (s->n_args > 0) /* permute */ + for (j = i0; j < s->i; ++j) + ketopt_permute(argv, j, s->n_args); + } + s->ind = s->i - s->n_args; + return opt; +} + +#endif diff --git a/khashl.h b/khashl.h new file mode 100644 index 0000000..a7bfeab --- /dev/null +++ b/khashl.h @@ -0,0 +1,348 @@ +/* The MIT License + + Copyright (c) 2019 by Attractive Chaos + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#ifndef __AC_KHASHL_H +#define __AC_KHASHL_H + +#define AC_VERSION_KHASHL_H "0.1" + +#include +#include +#include +#include "kalloc.h" + +/************************************ + * Compiler specific configurations * + ************************************/ + +#if UINT_MAX == 0xffffffffu +typedef unsigned int khint32_t; +#elif ULONG_MAX == 0xffffffffu +typedef unsigned long khint32_t; +#endif + +#if ULONG_MAX == ULLONG_MAX +typedef unsigned long khint64_t; +#else +typedef unsigned long long khint64_t; +#endif + +#ifndef kh_inline +#ifdef _MSC_VER +#define kh_inline __inline +#else +#define kh_inline inline +#endif +#endif /* kh_inline */ + +#ifndef klib_unused +#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3) +#define klib_unused __attribute__ ((__unused__)) +#else +#define klib_unused +#endif +#endif /* klib_unused */ + +#define KH_LOCAL static kh_inline klib_unused + +typedef khint32_t khint_t; + +/**************************** + * Simple private functions * + ****************************/ + +#define __kh_used(flag, i) (flag[i>>5] >> (i&0x1fU) & 1U) +#define __kh_set_used(flag, i) (flag[i>>5] |= 1U<<(i&0x1fU)) +#define __kh_set_unused(flag, i) (flag[i>>5] &= ~(1U<<(i&0x1fU))) + +#define __kh_fsize(m) ((m) < 32? 1 : (m)>>5) + +static kh_inline khint_t __kh_h2b(khint_t hash, khint_t bits) { return hash * 2654435769U >> (32 - bits); } + +/******************* + * Hash table base * + *******************/ + +#define __KHASHL_TYPE(HType, khkey_t) \ + typedef struct HType { \ + khint_t bits, count; \ + khint32_t *used; \ + khkey_t *keys; \ + void *km; \ + } HType; + +#define __KHASHL_PROTOTYPES(HType, prefix, khkey_t) \ + extern HType *prefix##_init2(void *km); \ + extern HType *prefix##_init(void); \ + extern void prefix##_destroy(HType *h); \ + extern void prefix##_clear(HType *h); \ + extern khint_t prefix##_getp(const HType *h, const khkey_t *key); \ + extern int prefix##_resize(HType *h, khint_t new_n_buckets); \ + extern khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent); \ + extern void prefix##_del(HType *h, khint_t k); + +#define __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \ + SCOPE HType *prefix##_init2(void *km) { \ + HType *h; \ + h = (HType*)kcalloc(km, 1, sizeof(HType)); \ + h->km = km; \ + return h; \ + } \ + SCOPE HType *prefix##_init(void) { return prefix##_init2(0); } \ + SCOPE void prefix##_destroy(HType *h) { \ + void *km; \ + if (!h) return; \ + km = h->km; \ + kfree(km, (void*)h->keys); kfree(km, h->used); \ + kfree(km, h); \ + } \ + SCOPE void prefix##_clear(HType *h) { \ + if (h && h->used) { \ + uint32_t n_buckets = 1U << h->bits; \ + memset(h->used, 0, __kh_fsize(n_buckets) * sizeof(khint32_t)); \ + h->count = 0; \ + } \ + } + +#define __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + SCOPE khint_t prefix##_getp(const HType *h, const khkey_t *key) { \ + khint_t i, last, n_buckets, mask; \ + if (h->keys == 0) return 0; \ + n_buckets = 1U << h->bits; \ + mask = n_buckets - 1U; \ + i = last = __kh_h2b(__hash_fn(*key), h->bits); \ + while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \ + i = (i + 1U) & mask; \ + if (i == last) return n_buckets; \ + } \ + return !__kh_used(h->used, i)? n_buckets : i; \ + } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { return prefix##_getp(h, &key); } + +#define __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + SCOPE int prefix##_resize(HType *h, khint_t new_n_buckets) { \ + khint32_t *new_used = 0; \ + khint_t j = 0, x = new_n_buckets, n_buckets, new_bits, new_mask; \ + while ((x >>= 1) != 0) ++j; \ + if (new_n_buckets & (new_n_buckets - 1)) ++j; \ + new_bits = j > 2? j : 2; \ + new_n_buckets = 1U << new_bits; \ + if (h->count > (new_n_buckets>>1) + (new_n_buckets>>2)) return 0; /* requested size is too small */ \ + new_used = (khint32_t*)kmalloc(h->km, __kh_fsize(new_n_buckets) * sizeof(khint32_t)); \ + memset(new_used, 0, __kh_fsize(new_n_buckets) * sizeof(khint32_t)); \ + if (!new_used) return -1; /* not enough memory */ \ + n_buckets = h->keys? 1U<bits : 0U; \ + if (n_buckets < new_n_buckets) { /* expand */ \ + khkey_t *new_keys = (khkey_t*)krealloc(h->km, (void*)h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (!new_keys) { kfree(h->km, new_used); return -1; } \ + h->keys = new_keys; \ + } /* otherwise shrink */ \ + new_mask = new_n_buckets - 1; \ + for (j = 0; j != n_buckets; ++j) { \ + khkey_t key; \ + if (!__kh_used(h->used, j)) continue; \ + key = h->keys[j]; \ + __kh_set_unused(h->used, j); \ + while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ + khint_t i; \ + i = __kh_h2b(__hash_fn(key), new_bits); \ + while (__kh_used(new_used, i)) i = (i + 1) & new_mask; \ + __kh_set_used(new_used, i); \ + if (i < n_buckets && __kh_used(h->used, i)) { /* kick out the existing element */ \ + { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ + __kh_set_unused(h->used, i); /* mark it as deleted in the old hash table */ \ + } else { /* write the element and jump out of the loop */ \ + h->keys[i] = key; \ + break; \ + } \ + } \ + } \ + if (n_buckets > new_n_buckets) /* shrink the hash table */ \ + h->keys = (khkey_t*)krealloc(h->km, (void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ + kfree(h->km, h->used); /* free the working space */ \ + h->used = new_used, h->bits = new_bits; \ + return 0; \ + } + +#define __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + SCOPE khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent) { \ + khint_t n_buckets, i, last, mask; \ + n_buckets = h->keys? 1U<bits : 0U; \ + *absent = -1; \ + if (h->count >= (n_buckets>>1) + (n_buckets>>2)) { /* rehashing */ \ + if (prefix##_resize(h, n_buckets + 1U) < 0) \ + return n_buckets; \ + n_buckets = 1U<bits; \ + } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ + mask = n_buckets - 1; \ + i = last = __kh_h2b(__hash_fn(*key), h->bits); \ + while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \ + i = (i + 1U) & mask; \ + if (i == last) break; \ + } \ + if (!__kh_used(h->used, i)) { /* not present at all */ \ + h->keys[i] = *key; \ + __kh_set_used(h->used, i); \ + ++h->count; \ + *absent = 1; \ + } else *absent = 0; /* Don't touch h->keys[i] if present */ \ + return i; \ + } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { return prefix##_putp(h, &key, absent); } + +#define __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn) \ + SCOPE int prefix##_del(HType *h, khint_t i) { \ + khint_t j = i, k, mask, n_buckets; \ + if (h->keys == 0) return 0; \ + n_buckets = 1U<bits; \ + mask = n_buckets - 1U; \ + while (1) { \ + j = (j + 1U) & mask; \ + if (j == i || !__kh_used(h->used, j)) break; /* j==i only when the table is completely full */ \ + k = __kh_h2b(__hash_fn(h->keys[j]), h->bits); \ + if ((j > i && (k <= i || k > j)) || (j < i && (k <= i && k > j))) \ + h->keys[i] = h->keys[j], i = j; \ + } \ + __kh_set_unused(h->used, i); \ + --h->count; \ + return 1; \ + } + +#define KHASHL_DECLARE(HType, prefix, khkey_t) \ + __KHASHL_TYPE(HType, khkey_t) \ + __KHASHL_PROTOTYPES(HType, prefix, khkey_t) + +#define KHASHL_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_TYPE(HType, khkey_t) \ + __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \ + __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn) + +/***************************** + * More convenient interface * + *****************************/ + +#define __kh_packed __attribute__ ((__packed__)) +#define __kh_cached_hash(x) ((x).hash) + +#define KHASHL_SET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; } __kh_packed HType##_s_bucket_t; \ + static kh_inline khint_t prefix##_s_hash(HType##_s_bucket_t x) { return __hash_fn(x.key); } \ + static kh_inline int prefix##_s_eq(HType##_s_bucket_t x, HType##_s_bucket_t y) { return __hash_eq(x.key, y.key); } \ + KHASHL_INIT(KH_LOCAL, HType, prefix##_s, HType##_s_bucket_t, prefix##_s_hash, prefix##_s_eq) \ + SCOPE HType *prefix##_init2(void *km) { return prefix##_s_init2(km); } \ + SCOPE HType *prefix##_init(void) { return prefix##_s_init(); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_s_destroy(h); } \ + SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { prefix##_s_resize(h, new_n_buckets); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_s_bucket_t t; t.key = key; return prefix##_s_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_s_del(h, k); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_s_bucket_t t; t.key = key; return prefix##_s_putp(h, &t, absent); } + +#define KHASHL_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; kh_val_t val; } __kh_packed HType##_m_bucket_t; \ + static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \ + static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \ + KHASHL_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \ + SCOPE HType *prefix##_init2(void *km) { return prefix##_m_init2(km); } \ + SCOPE HType *prefix##_init(void) { return prefix##_m_init(); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \ + SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { prefix##_m_resize(h, new_n_buckets); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_m_del(h, k); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); } + +#define KHASHL_CSET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; khint_t hash; } __kh_packed HType##_cs_bucket_t; \ + static kh_inline int prefix##_cs_eq(HType##_cs_bucket_t x, HType##_cs_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \ + KHASHL_INIT(KH_LOCAL, HType, prefix##_cs, HType##_cs_bucket_t, __kh_cached_hash, prefix##_cs_eq) \ + SCOPE HType *prefix##_init(void) { return prefix##_cs_init(); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_cs_destroy(h); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cs_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cs_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cs_del(h, k); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cs_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cs_putp(h, &t, absent); } + +#define KHASHL_CMAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; kh_val_t val; khint_t hash; } __kh_packed HType##_cm_bucket_t; \ + static kh_inline int prefix##_cm_eq(HType##_cm_bucket_t x, HType##_cm_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \ + KHASHL_INIT(KH_LOCAL, HType, prefix##_cm, HType##_cm_bucket_t, __kh_cached_hash, prefix##_cm_eq) \ + SCOPE HType *prefix##_init(void) { return prefix##_cm_init(); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_cm_destroy(h); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cm_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cm_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cm_del(h, k); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cm_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cm_putp(h, &t, absent); } + +/************************** + * Public macro functions * + **************************/ + +#define kh_bucket(h, x) ((h)->keys[x]) +#define kh_size(h) ((h)->count) +#define kh_capacity(h) ((h)->keys? 1U<<(h)->bits : 0U) +#define kh_end(h) kh_capacity(h) + +#define kh_key(h, x) ((h)->keys[x].key) +#define kh_val(h, x) ((h)->keys[x].val) +#define kh_exist(h, x) __kh_used((h)->used, (x)) + +/************************************** + * Common hash and equality functions * + **************************************/ + +#define kh_eq_generic(a, b) ((a) == (b)) +#define kh_eq_str(a, b) (strcmp((a), (b)) == 0) +#define kh_hash_dummy(x) ((khint_t)(x)) + +typedef const char *kh_cstr_t; + +static kh_inline khint_t kh_hash_uint32(khint_t key) { + key += ~(key << 15); + key ^= (key >> 10); + key += (key << 3); + key ^= (key >> 6); + key += ~(key << 11); + key ^= (key >> 16); + return key; +} + +static kh_inline khint_t kh_hash_uint64(khint64_t key) { + key = ~key + (key << 21); + key = key ^ key >> 24; + key = (key + (key << 3)) + (key << 8); + key = key ^ key >> 14; + key = (key + (key << 2)) + (key << 4); + key = key ^ key >> 28; + key = key + (key << 31); + return (khint_t)key; +} + +static kh_inline khint_t kh_hash_str(const char *s) { + khint_t h = (khint_t)*s; + if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s; + return h; +} + +#endif /* __AC_KHASHL_H */ diff --git a/krmq.h b/krmq.h new file mode 100644 index 0000000..8fa1cce --- /dev/null +++ b/krmq.h @@ -0,0 +1,474 @@ +/* The MIT License + + Copyright (c) 2019 by Attractive Chaos + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +/* An example: + +#include +#include +#include +#include "krmq.h" + +struct my_node { + char key; + KRMQ_HEAD(struct my_node) head; +}; +#define my_cmp(p, q) (((q)->key < (p)->key) - ((p)->key < (q)->key)) +KRMQ_INIT(my, struct my_node, head, my_cmp) + +int main(void) { + const char *str = "MNOLKQOPHIA"; // from wiki, except a duplicate + struct my_node *root = 0; + int i, l = strlen(str); + for (i = 0; i < l; ++i) { // insert in the input order + struct my_node *q, *p = malloc(sizeof(*p)); + p->key = str[i]; + q = krmq_insert(my, &root, p, 0); + if (p != q) free(p); // if already present, free + } + krmq_itr_t(my) itr; + krmq_itr_first(my, root, &itr); // place at first + do { // traverse + const struct my_node *p = krmq_at(&itr); + putchar(p->key); + free((void*)p); // free node + } while (krmq_itr_next(my, &itr)); + putchar('\n'); + return 0; +} +*/ + +#ifndef KRMQ_H +#define KRMQ_H + +#ifdef __STRICT_ANSI__ +#define inline __inline__ +#endif + +#define KRMQ_MAX_DEPTH 64 + +#define krmq_size(head, p) ((p)? (p)->head.size : 0) +#define krmq_size_child(head, q, i) ((q)->head.p[(i)]? (q)->head.p[(i)]->head.size : 0) + +#define KRMQ_HEAD(__type) \ + struct { \ + __type *p[2], *s; \ + signed char balance; /* balance factor */ \ + unsigned size; /* #elements in subtree */ \ + } + +#define __KRMQ_FIND(suf, __scope, __type, __head, __cmp) \ + __scope __type *krmq_find_##suf(const __type *root, const __type *x, unsigned *cnt_) { \ + const __type *p = root; \ + unsigned cnt = 0; \ + while (p != 0) { \ + int cmp; \ + cmp = __cmp(x, p); \ + if (cmp >= 0) cnt += krmq_size_child(__head, p, 0) + 1; \ + if (cmp < 0) p = p->__head.p[0]; \ + else if (cmp > 0) p = p->__head.p[1]; \ + else break; \ + } \ + if (cnt_) *cnt_ = cnt; \ + return (__type*)p; \ + } \ + __scope __type *krmq_interval_##suf(const __type *root, const __type *x, __type **lower, __type **upper) { \ + const __type *p = root, *l = 0, *u = 0; \ + while (p != 0) { \ + int cmp; \ + cmp = __cmp(x, p); \ + if (cmp < 0) u = p, p = p->__head.p[0]; \ + else if (cmp > 0) l = p, p = p->__head.p[1]; \ + else { l = u = p; break; } \ + } \ + if (lower) *lower = (__type*)l; \ + if (upper) *upper = (__type*)u; \ + return (__type*)p; \ + } + +#define __KRMQ_RMQ(suf, __scope, __type, __head, __cmp, __lt2) \ + __scope __type *krmq_rmq_##suf(const __type *root, const __type *lo, const __type *up) { /* CLOSED interval */ \ + const __type *p = root, *path[2][KRMQ_MAX_DEPTH], *min; \ + int plen[2] = {0, 0}, pcmp[2][KRMQ_MAX_DEPTH], i, cmp, lca; \ + if (root == 0) return 0; \ + while (p) { \ + cmp = __cmp(lo, p); \ + path[0][plen[0]] = p, pcmp[0][plen[0]++] = cmp; \ + if (cmp < 0) p = p->__head.p[0]; \ + else if (cmp > 0) p = p->__head.p[1]; \ + else break; \ + } \ + p = root; \ + while (p) { \ + cmp = __cmp(up, p); \ + path[1][plen[1]] = p, pcmp[1][plen[1]++] = cmp; \ + if (cmp < 0) p = p->__head.p[0]; \ + else if (cmp > 0) p = p->__head.p[1]; \ + else break; \ + } \ + for (i = 0; i < plen[0] && i < plen[1]; ++i) /* find the LCA */ \ + if (path[0][i] == path[1][i] && pcmp[0][i] <= 0 && pcmp[1][i] >= 0) \ + break; \ + if (i == plen[0] || i == plen[1]) return 0; /* no elements in the closed interval */ \ + lca = i, min = path[0][lca]; \ + for (i = lca + 1; i < plen[0]; ++i) { \ + if (pcmp[0][i] <= 0) { \ + if (__lt2(path[0][i], min)) min = path[0][i]; \ + if (path[0][i]->__head.p[1] && __lt2(path[0][i]->__head.p[1]->__head.s, min)) \ + min = path[0][i]->__head.p[1]->__head.s; \ + } \ + } \ + for (i = lca + 1; i < plen[1]; ++i) { \ + if (pcmp[1][i] >= 0) { \ + if (__lt2(path[1][i], min)) min = path[1][i]; \ + if (path[1][i]->__head.p[0] && __lt2(path[1][i]->__head.p[0]->__head.s, min)) \ + min = path[1][i]->__head.p[0]->__head.s; \ + } \ + } \ + return (__type*)min; \ + } + +#define __KRMQ_ROTATE(suf, __type, __head, __lt2) \ + /* */ \ + static inline void krmq_update_min_##suf(__type *p, const __type *q, const __type *r) { \ + p->__head.s = !q || __lt2(p, q->__head.s)? p : q->__head.s; \ + p->__head.s = !r || __lt2(p->__head.s, r->__head.s)? p->__head.s : r->__head.s; \ + } \ + /* one rotation: (a,(b,c)q)p => ((a,b)p,c)q */ \ + static inline __type *krmq_rotate1_##suf(__type *p, int dir) { /* dir=0 to left; dir=1 to right */ \ + int opp = 1 - dir; /* opposite direction */ \ + __type *q = p->__head.p[opp], *s = p->__head.s; \ + unsigned size_p = p->__head.size; \ + p->__head.size -= q->__head.size - krmq_size_child(__head, q, dir); \ + q->__head.size = size_p; \ + krmq_update_min_##suf(p, p->__head.p[dir], q->__head.p[dir]); \ + q->__head.s = s; \ + p->__head.p[opp] = q->__head.p[dir]; \ + q->__head.p[dir] = p; \ + return q; \ + } \ + /* two consecutive rotations: (a,((b,c)r,d)q)p => ((a,b)p,(c,d)q)r */ \ + static inline __type *krmq_rotate2_##suf(__type *p, int dir) { \ + int b1, opp = 1 - dir; \ + __type *q = p->__head.p[opp], *r = q->__head.p[dir], *s = p->__head.s; \ + unsigned size_x_dir = krmq_size_child(__head, r, dir); \ + r->__head.size = p->__head.size; \ + p->__head.size -= q->__head.size - size_x_dir; \ + q->__head.size -= size_x_dir + 1; \ + krmq_update_min_##suf(p, p->__head.p[dir], r->__head.p[dir]); \ + krmq_update_min_##suf(q, q->__head.p[opp], r->__head.p[opp]); \ + r->__head.s = s; \ + p->__head.p[opp] = r->__head.p[dir]; \ + r->__head.p[dir] = p; \ + q->__head.p[dir] = r->__head.p[opp]; \ + r->__head.p[opp] = q; \ + b1 = dir == 0? +1 : -1; \ + if (r->__head.balance == b1) q->__head.balance = 0, p->__head.balance = -b1; \ + else if (r->__head.balance == 0) q->__head.balance = p->__head.balance = 0; \ + else q->__head.balance = b1, p->__head.balance = 0; \ + r->__head.balance = 0; \ + return r; \ + } + +#define __KRMQ_INSERT(suf, __scope, __type, __head, __cmp, __lt2) \ + __scope __type *krmq_insert_##suf(__type **root_, __type *x, unsigned *cnt_) { \ + unsigned char stack[KRMQ_MAX_DEPTH]; \ + __type *path[KRMQ_MAX_DEPTH]; \ + __type *bp, *bq; \ + __type *p, *q, *r = 0; /* _r_ is potentially the new root */ \ + int i, which = 0, top, b1, path_len; \ + unsigned cnt = 0; \ + bp = *root_, bq = 0; \ + /* find the insertion location */ \ + for (p = bp, q = bq, top = path_len = 0; p; q = p, p = p->__head.p[which]) { \ + int cmp; \ + cmp = __cmp(x, p); \ + if (cmp >= 0) cnt += krmq_size_child(__head, p, 0) + 1; \ + if (cmp == 0) { \ + if (cnt_) *cnt_ = cnt; \ + return p; \ + } \ + if (p->__head.balance != 0) \ + bq = q, bp = p, top = 0; \ + stack[top++] = which = (cmp > 0); \ + path[path_len++] = p; \ + } \ + if (cnt_) *cnt_ = cnt; \ + x->__head.balance = 0, x->__head.size = 1, x->__head.p[0] = x->__head.p[1] = 0, x->__head.s = x; \ + if (q == 0) *root_ = x; \ + else q->__head.p[which] = x; \ + if (bp == 0) return x; \ + for (i = 0; i < path_len; ++i) ++path[i]->__head.size; \ + for (i = path_len - 1; i >= 0; --i) { \ + krmq_update_min_##suf(path[i], path[i]->__head.p[0], path[i]->__head.p[1]); \ + if (path[i]->__head.s != x) break; \ + } \ + for (p = bp, top = 0; p != x; p = p->__head.p[stack[top]], ++top) /* update balance factors */ \ + if (stack[top] == 0) --p->__head.balance; \ + else ++p->__head.balance; \ + if (bp->__head.balance > -2 && bp->__head.balance < 2) return x; /* no re-balance needed */ \ + /* re-balance */ \ + which = (bp->__head.balance < 0); \ + b1 = which == 0? +1 : -1; \ + q = bp->__head.p[1 - which]; \ + if (q->__head.balance == b1) { \ + r = krmq_rotate1_##suf(bp, which); \ + q->__head.balance = bp->__head.balance = 0; \ + } else r = krmq_rotate2_##suf(bp, which); \ + if (bq == 0) *root_ = r; \ + else bq->__head.p[bp != bq->__head.p[0]] = r; \ + return x; \ + } + +#define __KRMQ_ERASE(suf, __scope, __type, __head, __cmp, __lt2) \ + __scope __type *krmq_erase_##suf(__type **root_, const __type *x, unsigned *cnt_) { \ + __type *p, *path[KRMQ_MAX_DEPTH], fake; \ + unsigned char dir[KRMQ_MAX_DEPTH]; \ + int i, d = 0, cmp; \ + unsigned cnt = 0; \ + fake = **root_, fake.__head.p[0] = *root_, fake.__head.p[1] = 0; \ + if (cnt_) *cnt_ = 0; \ + if (x) { \ + for (cmp = -1, p = &fake; cmp; cmp = __cmp(x, p)) { \ + int which = (cmp > 0); \ + if (cmp > 0) cnt += krmq_size_child(__head, p, 0) + 1; \ + dir[d] = which; \ + path[d++] = p; \ + p = p->__head.p[which]; \ + if (p == 0) { \ + if (cnt_) *cnt_ = 0; \ + return 0; \ + } \ + } \ + cnt += krmq_size_child(__head, p, 0) + 1; /* because p==x is not counted */ \ + } else { \ + for (p = &fake, cnt = 1; p; p = p->__head.p[0]) \ + dir[d] = 0, path[d++] = p; \ + p = path[--d]; \ + } \ + if (cnt_) *cnt_ = cnt; \ + for (i = 1; i < d; ++i) --path[i]->__head.size; \ + if (p->__head.p[1] == 0) { /* ((1,.)2,3)4 => (1,3)4; p=2 */ \ + path[d-1]->__head.p[dir[d-1]] = p->__head.p[0]; \ + } else { \ + __type *q = p->__head.p[1]; \ + if (q->__head.p[0] == 0) { /* ((1,2)3,4)5 => ((1)2,4)5; p=3,q=2 */ \ + q->__head.p[0] = p->__head.p[0]; \ + q->__head.balance = p->__head.balance; \ + path[d-1]->__head.p[dir[d-1]] = q; \ + path[d] = q, dir[d++] = 1; \ + q->__head.size = p->__head.size - 1; \ + } else { /* ((1,((.,2)3,4)5)6,7)8 => ((1,(2,4)5)3,7)8; p=6 */ \ + __type *r; \ + int e = d++; /* backup _d_ */\ + for (;;) { \ + dir[d] = 0; \ + path[d++] = q; \ + r = q->__head.p[0]; \ + if (r->__head.p[0] == 0) break; \ + q = r; \ + } \ + r->__head.p[0] = p->__head.p[0]; \ + q->__head.p[0] = r->__head.p[1]; \ + r->__head.p[1] = p->__head.p[1]; \ + r->__head.balance = p->__head.balance; \ + path[e-1]->__head.p[dir[e-1]] = r; \ + path[e] = r, dir[e] = 1; \ + for (i = e + 1; i < d; ++i) --path[i]->__head.size; \ + r->__head.size = p->__head.size - 1; \ + } \ + } \ + for (i = d - 1; i >= 0; --i) /* not sure why adding condition "path[i]->__head.s==p" doesn't work */ \ + krmq_update_min_##suf(path[i], path[i]->__head.p[0], path[i]->__head.p[1]); \ + while (--d > 0) { \ + __type *q = path[d]; \ + int which, other, b1 = 1, b2 = 2; \ + which = dir[d], other = 1 - which; \ + if (which) b1 = -b1, b2 = -b2; \ + q->__head.balance += b1; \ + if (q->__head.balance == b1) break; \ + else if (q->__head.balance == b2) { \ + __type *r = q->__head.p[other]; \ + if (r->__head.balance == -b1) { \ + path[d-1]->__head.p[dir[d-1]] = krmq_rotate2_##suf(q, which); \ + } else { \ + path[d-1]->__head.p[dir[d-1]] = krmq_rotate1_##suf(q, which); \ + if (r->__head.balance == 0) { \ + r->__head.balance = -b1; \ + q->__head.balance = b1; \ + break; \ + } else r->__head.balance = q->__head.balance = 0; \ + } \ + } \ + } \ + *root_ = fake.__head.p[0]; \ + return p; \ + } + +#define krmq_free(__type, __head, __root, __free) do { \ + __type *_p, *_q; \ + for (_p = __root; _p; _p = _q) { \ + if (_p->__head.p[0] == 0) { \ + _q = _p->__head.p[1]; \ + __free(_p); \ + } else { \ + _q = _p->__head.p[0]; \ + _p->__head.p[0] = _q->__head.p[1]; \ + _q->__head.p[1] = _p; \ + } \ + } \ + } while (0) + +#define __KRMQ_ITR(suf, __scope, __type, __head, __cmp) \ + struct krmq_itr_##suf { \ + const __type *stack[KRMQ_MAX_DEPTH], **top; \ + }; \ + __scope void krmq_itr_first_##suf(const __type *root, struct krmq_itr_##suf *itr) { \ + const __type *p; \ + for (itr->top = itr->stack - 1, p = root; p; p = p->__head.p[0]) \ + *++itr->top = p; \ + } \ + __scope int krmq_itr_find_##suf(const __type *root, const __type *x, struct krmq_itr_##suf *itr) { \ + const __type *p = root; \ + itr->top = itr->stack - 1; \ + while (p != 0) { \ + int cmp; \ + *++itr->top = p; \ + cmp = __cmp(x, p); \ + if (cmp < 0) p = p->__head.p[0]; \ + else if (cmp > 0) p = p->__head.p[1]; \ + else break; \ + } \ + return p? 1 : 0; \ + } \ + __scope int krmq_itr_next_bidir_##suf(struct krmq_itr_##suf *itr, int dir) { \ + const __type *p; \ + if (itr->top < itr->stack) return 0; \ + dir = !!dir; \ + p = (*itr->top)->__head.p[dir]; \ + if (p) { /* go down */ \ + for (; p; p = p->__head.p[!dir]) \ + *++itr->top = p; \ + return 1; \ + } else { /* go up */ \ + do { \ + p = *itr->top--; \ + } while (itr->top >= itr->stack && p == (*itr->top)->__head.p[dir]); \ + return itr->top < itr->stack? 0 : 1; \ + } \ + } \ + +/** + * Insert a node to the tree + * + * @param suf name suffix used in KRMQ_INIT() + * @param proot pointer to the root of the tree (in/out: root may change) + * @param x node to insert (in) + * @param cnt number of nodes smaller than or equal to _x_; can be NULL (out) + * + * @return _x_ if not present in the tree, or the node equal to x. + */ +#define krmq_insert(suf, proot, x, cnt) krmq_insert_##suf(proot, x, cnt) + +/** + * Find a node in the tree + * + * @param suf name suffix used in KRMQ_INIT() + * @param root root of the tree + * @param x node value to find (in) + * @param cnt number of nodes smaller than or equal to _x_; can be NULL (out) + * + * @return node equal to _x_ if present, or NULL if absent + */ +#define krmq_find(suf, root, x, cnt) krmq_find_##suf(root, x, cnt) +#define krmq_interval(suf, root, x, lower, upper) krmq_interval_##suf(root, x, lower, upper) +#define krmq_rmq(suf, root, lo, up) krmq_rmq_##suf(root, lo, up) + +/** + * Delete a node from the tree + * + * @param suf name suffix used in KRMQ_INIT() + * @param proot pointer to the root of the tree (in/out: root may change) + * @param x node value to delete; if NULL, delete the first node (in) + * + * @return node removed from the tree if present, or NULL if absent + */ +#define krmq_erase(suf, proot, x, cnt) krmq_erase_##suf(proot, x, cnt) +#define krmq_erase_first(suf, proot) krmq_erase_##suf(proot, 0, 0) + +#define krmq_itr_t(suf) struct krmq_itr_##suf + +/** + * Place the iterator at the smallest object + * + * @param suf name suffix used in KRMQ_INIT() + * @param root root of the tree + * @param itr iterator + */ +#define krmq_itr_first(suf, root, itr) krmq_itr_first_##suf(root, itr) + +/** + * Place the iterator at the object equal to or greater than the query + * + * @param suf name suffix used in KRMQ_INIT() + * @param root root of the tree + * @param x query (in) + * @param itr iterator (out) + * + * @return 1 if find; 0 otherwise. krmq_at(itr) is NULL if and only if query is + * larger than all objects in the tree + */ +#define krmq_itr_find(suf, root, x, itr) krmq_itr_find_##suf(root, x, itr) + +/** + * Move to the next object in order + * + * @param itr iterator (modified) + * + * @return 1 if there is a next object; 0 otherwise + */ +#define krmq_itr_next(suf, itr) krmq_itr_next_bidir_##suf(itr, 1) +#define krmq_itr_prev(suf, itr) krmq_itr_next_bidir_##suf(itr, 0) + +/** + * Return the pointer at the iterator + * + * @param itr iterator + * + * @return pointer if present; NULL otherwise + */ +#define krmq_at(itr) ((itr)->top < (itr)->stack? 0 : *(itr)->top) + +#define KRMQ_INIT2(suf, __scope, __type, __head, __cmp, __lt2) \ + __KRMQ_FIND(suf, __scope, __type, __head, __cmp) \ + __KRMQ_RMQ(suf, __scope, __type, __head, __cmp, __lt2) \ + __KRMQ_ROTATE(suf, __type, __head, __lt2) \ + __KRMQ_INSERT(suf, __scope, __type, __head, __cmp, __lt2) \ + __KRMQ_ERASE(suf, __scope, __type, __head, __cmp, __lt2) \ + __KRMQ_ITR(suf, __scope, __type, __head, __cmp) + +#define KRMQ_INIT(suf, __type, __head, __cmp, __lt2) \ + KRMQ_INIT2(suf,, __type, __head, __cmp, __lt2) + +#endif diff --git a/kseq.h b/kseq.h new file mode 100644 index 0000000..a08ea01 --- /dev/null +++ b/kseq.h @@ -0,0 +1,256 @@ +/* The MIT License + + Copyright (c) 2008, 2009, 2011 Attractive Chaos + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +/* Last Modified: 05MAR2012 */ + +#ifndef AC_KSEQ_H +#define AC_KSEQ_H + +#include +#include +#include + +#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r +#define KS_SEP_TAB 1 // isspace() && !' ' +#define KS_SEP_LINE 2 // line separator: "\n" (Unix) or "\r\n" (Windows) +#define KS_SEP_MAX 2 + +#ifndef klib_unused +#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3) +#define klib_unused __attribute__ ((__unused__)) +#else +#define klib_unused +#endif +#endif /* klib_unused */ + +#define __KS_TYPE(type_t) \ + typedef struct __kstream_t { \ + int begin, end; \ + int is_eof:2, bufsize:30; \ + type_t f; \ + unsigned char *buf; \ + } kstream_t; + +#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end) +#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0) + +#define __KS_BASIC(SCOPE, type_t, __bufsize) \ + SCOPE kstream_t *ks_init(type_t f) \ + { \ + kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \ + ks->f = f; ks->bufsize = __bufsize; \ + ks->buf = (unsigned char*)malloc(__bufsize); \ + return ks; \ + } \ + SCOPE void ks_destroy(kstream_t *ks) \ + { \ + if (!ks) return; \ + free(ks->buf); \ + free(ks); \ + } + +#define __KS_INLINED(__read) \ + static klib_unused inline int ks_getc(kstream_t *ks) \ + { \ + if (ks->is_eof && ks->begin >= ks->end) return -1; \ + if (ks->begin >= ks->end) { \ + ks->begin = 0; \ + ks->end = __read(ks->f, ks->buf, ks->bufsize); \ + if (ks->end < ks->bufsize) ks->is_eof = 1; \ + if (ks->end == 0) return -1; \ + } \ + return (int)ks->buf[ks->begin++]; \ + } \ + static inline int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \ + { return ks_getuntil2(ks, delimiter, str, dret, 0); } + +#ifndef KSTRING_T +#define KSTRING_T kstring_t +typedef struct __kstring_t { + unsigned l, m; + char *s; +} kstring_t; +#endif + +#ifndef kroundup32 +#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +#endif + +#define __KS_GETUNTIL(SCOPE, __read) \ + SCOPE int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \ + { \ + if (dret) *dret = 0; \ + str->l = append? str->l : 0; \ + if (ks->begin >= ks->end && ks->is_eof) return -1; \ + for (;;) { \ + int i; \ + if (ks->begin >= ks->end) { \ + if (!ks->is_eof) { \ + ks->begin = 0; \ + ks->end = __read(ks->f, ks->buf, ks->bufsize); \ + if (ks->end < ks->bufsize) ks->is_eof = 1; \ + if (ks->end == 0) break; \ + } else break; \ + } \ + if (delimiter == KS_SEP_LINE) { \ + for (i = ks->begin; i < ks->end; ++i) \ + if (ks->buf[i] == '\n') break; \ + } else if (delimiter > KS_SEP_MAX) { \ + for (i = ks->begin; i < ks->end; ++i) \ + if (ks->buf[i] == delimiter) break; \ + } else if (delimiter == KS_SEP_SPACE) { \ + for (i = ks->begin; i < ks->end; ++i) \ + if (isspace(ks->buf[i])) break; \ + } else if (delimiter == KS_SEP_TAB) { \ + for (i = ks->begin; i < ks->end; ++i) \ + if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \ + } else i = 0; /* never come to here! */ \ + if (str->m - str->l < (size_t)(i - ks->begin + 1)) { \ + str->m = str->l + (i - ks->begin) + 1; \ + kroundup32(str->m); \ + str->s = (char*)realloc(str->s, str->m); \ + } \ + memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \ + str->l = str->l + (i - ks->begin); \ + ks->begin = i + 1; \ + if (i < ks->end) { \ + if (dret) *dret = ks->buf[i]; \ + break; \ + } \ + } \ + if (str->s == 0) { \ + str->m = 1; \ + str->s = (char*)calloc(1, 1); \ + } else if (delimiter == KS_SEP_LINE && str->l > 1 && str->s[str->l-1] == '\r') --str->l; \ + str->s[str->l] = '\0'; \ + return str->l; \ + } + +#define KSTREAM_INIT2(SCOPE, type_t, __read, __bufsize) \ + __KS_TYPE(type_t) \ + __KS_BASIC(SCOPE, type_t, __bufsize) \ + __KS_GETUNTIL(SCOPE, __read) \ + __KS_INLINED(__read) + +#define KSTREAM_INIT(type_t, __read, __bufsize) KSTREAM_INIT2(static, type_t, __read, __bufsize) + +#define KSTREAM_DECLARE(type_t, __read) \ + __KS_TYPE(type_t) \ + extern int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append); \ + extern kstream_t *ks_init(type_t f); \ + extern void ks_destroy(kstream_t *ks); \ + __KS_INLINED(__read) + +/****************** + * FASTA/Q parser * + ******************/ + +#define kseq_rewind(ks) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0) + +#define __KSEQ_BASIC(SCOPE, type_t) \ + SCOPE kseq_t *kseq_init(type_t fd) \ + { \ + kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \ + s->f = ks_init(fd); \ + return s; \ + } \ + SCOPE void kseq_destroy(kseq_t *ks) \ + { \ + if (!ks) return; \ + free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \ + ks_destroy(ks->f); \ + free(ks); \ + } + +/* Return value: + >=0 length of the sequence (normal) + -1 end-of-file + -2 truncated quality string + */ +#define __KSEQ_READ(SCOPE) \ + SCOPE int kseq_read(kseq_t *seq) \ + { \ + int c; \ + kstream_t *ks = seq->f; \ + if (seq->last_char == 0) { /* then jump to the next header line */ \ + while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \ + if (c == -1) return -1; /* end of file */ \ + seq->last_char = c; \ + } /* else: the first header char has been read in the previous call */ \ + seq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \ + if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; /* normal exit: EOF */ \ + if (c != '\n') ks_getuntil(ks, KS_SEP_LINE, &seq->comment, 0); /* read FASTA/Q comment */ \ + if (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \ + seq->seq.m = 256; \ + seq->seq.s = (char*)malloc(seq->seq.m); \ + } \ + while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \ + if (c == '\n') continue; /* skip empty lines */ \ + seq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \ + ks_getuntil2(ks, KS_SEP_LINE, &seq->seq, 0, 1); /* read the rest of the line */ \ + } \ + if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \ + if (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \ + seq->seq.m = seq->seq.l + 2; \ + kroundup32(seq->seq.m); /* rounded to the next closest 2^k */ \ + seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \ + } \ + seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \ + if (c != '+') return seq->seq.l; /* FASTA */ \ + if (seq->qual.m < seq->seq.m) { /* allocate memory for qual in case insufficient */ \ + seq->qual.m = seq->seq.m; \ + seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \ + } \ + while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \ + if (c == -1) return -2; /* error: no quality string */ \ + while (ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l); \ + seq->last_char = 0; /* we have not come to the next header line */ \ + if (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \ + return seq->seq.l; \ + } + +#define __KSEQ_TYPE(type_t) \ + typedef struct { \ + kstring_t name, comment, seq, qual; \ + int last_char; \ + kstream_t *f; \ + } kseq_t; + +#define KSEQ_INIT2(SCOPE, type_t, __read) \ + KSTREAM_INIT2(SCOPE, type_t, __read, 65536) \ + __KSEQ_TYPE(type_t) \ + __KSEQ_BASIC(SCOPE, type_t) \ + __KSEQ_READ(SCOPE) + +#define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read) + +#define KSEQ_DECLARE(type_t) \ + __KS_TYPE(type_t) \ + __KSEQ_TYPE(type_t) \ + extern kseq_t *kseq_init(type_t fd); \ + void kseq_destroy(kseq_t *ks); \ + int kseq_read(kseq_t *seq); + +#endif diff --git a/ksort.h b/ksort.h new file mode 100644 index 0000000..271d6ed --- /dev/null +++ b/ksort.h @@ -0,0 +1,164 @@ +/* The MIT License + + Copyright (c) 2008, 2011 Attractive Chaos + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +// This is a simplified version of ksort.h + +#ifndef AC_KSORT_H +#define AC_KSORT_H + +#include +#include +#include + +typedef struct { + void *left, *right; + int depth; +} ks_isort_stack_t; + +#define KSORT_SWAP(type_t, a, b) { type_t t=(a); (a)=(b); (b)=t; } + +#define KSORT_INIT(name, type_t, __sort_lt) \ + void ks_heapdown_##name(size_t i, size_t n, type_t l[]) \ + { \ + size_t k = i; \ + type_t tmp = l[i]; \ + while ((k = (k << 1) + 1) < n) { \ + if (k != n - 1 && __sort_lt(l[k], l[k+1])) ++k; \ + if (__sort_lt(l[k], tmp)) break; \ + l[i] = l[k]; i = k; \ + } \ + l[i] = tmp; \ + } \ + void ks_heapup_##name(size_t n, type_t l[]) \ + { \ + size_t i, k = n - 1; \ + type_t tmp = l[k]; \ + while (k) { \ + i = (k - 1) >> 1; \ + if (__sort_lt(tmp, l[i])) break; \ + l[k] = l[i]; k = i; \ + } \ + l[k] = tmp; \ + } \ + void ks_heapmake_##name(size_t lsize, type_t l[]) \ + { \ + size_t i; \ + for (i = (lsize >> 1) - 1; i != (size_t)(-1); --i) \ + ks_heapdown_##name(i, lsize, l); \ + } \ + type_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk) \ + { \ + type_t *low, *high, *k, *ll, *hh, *mid; \ + low = arr; high = arr + n - 1; k = arr + kk; \ + for (;;) { \ + if (high <= low) return *k; \ + if (high == low + 1) { \ + if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \ + return *k; \ + } \ + mid = low + (high - low) / 2; \ + if (__sort_lt(*high, *mid)) KSORT_SWAP(type_t, *mid, *high); \ + if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \ + if (__sort_lt(*low, *mid)) KSORT_SWAP(type_t, *mid, *low); \ + KSORT_SWAP(type_t, *mid, *(low+1)); \ + ll = low + 1; hh = high; \ + for (;;) { \ + do ++ll; while (__sort_lt(*ll, *low)); \ + do --hh; while (__sort_lt(*low, *hh)); \ + if (hh < ll) break; \ + KSORT_SWAP(type_t, *ll, *hh); \ + } \ + KSORT_SWAP(type_t, *low, *hh); \ + if (hh <= k) low = ll; \ + if (hh >= k) high = hh - 1; \ + } \ + } \ + +#define ks_ksmall(name, n, a, k) ks_ksmall_##name(n, a, k) + +#define ks_lt_generic(a, b) ((a) < (b)) +#define ks_lt_str(a, b) (strcmp((a), (b)) < 0) + +typedef const char *ksstr_t; + +#define KSORT_INIT_GENERIC(type_t) KSORT_INIT(type_t, type_t, ks_lt_generic) +#define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str) + +#define RS_MIN_SIZE 64 +#define RS_MAX_BITS 8 + +#define KRADIX_SORT_INIT(name, rstype_t, rskey, sizeof_key) \ + typedef struct { \ + rstype_t *b, *e; \ + } rsbucket_##name##_t; \ + void rs_insertsort_##name(rstype_t *beg, rstype_t *end) \ + { \ + rstype_t *i; \ + for (i = beg + 1; i < end; ++i) \ + if (rskey(*i) < rskey(*(i - 1))) { \ + rstype_t *j, tmp = *i; \ + for (j = i; j > beg && rskey(tmp) < rskey(*(j-1)); --j) \ + *j = *(j - 1); \ + *j = tmp; \ + } \ + } \ + void rs_sort_##name(rstype_t *beg, rstype_t *end, int n_bits, int s) \ + { \ + rstype_t *i; \ + int size = 1<b = k->e = beg; \ + for (i = beg; i != end; ++i) ++b[rskey(*i)>>s&m].e; \ + for (k = b + 1; k != be; ++k) \ + k->e += (k-1)->e - beg, k->b = (k-1)->e; \ + for (k = b; k != be;) { \ + if (k->b != k->e) { \ + rsbucket_##name##_t *l; \ + if ((l = b + (rskey(*k->b)>>s&m)) != k) { \ + rstype_t tmp = *k->b, swap; \ + do { \ + swap = tmp; tmp = *l->b; *l->b++ = swap; \ + l = b + (rskey(tmp)>>s&m); \ + } while (l != k); \ + *k->b++ = tmp; \ + } else ++k->b; \ + } else ++k; \ + } \ + for (b->b = beg, k = b + 1; k != be; ++k) k->b = (k-1)->e; \ + if (s) { \ + s = s > n_bits? s - n_bits : 0; \ + for (k = b; k != be; ++k) \ + if (k->e - k->b > RS_MIN_SIZE) rs_sort_##name(k->b, k->e, n_bits, s); \ + else if (k->e - k->b > 1) rs_insertsort_##name(k->b, k->e); \ + } \ + } \ + void radix_sort_##name(rstype_t *beg, rstype_t *end) \ + { \ + if (end - beg <= RS_MIN_SIZE) rs_insertsort_##name(beg, end); \ + else rs_sort_##name(beg, end, RS_MAX_BITS, (sizeof_key - 1) * RS_MAX_BITS); \ + } + +#endif diff --git a/kstring.h b/kstring.h new file mode 100644 index 0000000..216d4f5 --- /dev/null +++ b/kstring.h @@ -0,0 +1,165 @@ +/* The MIT License + + Copyright (c) by Attractive Chaos + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#ifndef KSTRING_H +#define KSTRING_H + +#include +#include +#include +#include + +#ifndef kroundup32 +#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +#endif + +#ifndef KSTRING_T +#define KSTRING_T kstring_t +typedef struct __kstring_t { + size_t l, m; + char *s; +} kstring_t; +#endif + +typedef struct { + uint64_t tab[4]; + int sep, finished; + const char *p; // end of the current token +} ks_tokaux_t; + +static inline void ks_resize(kstring_t *s, size_t size) +{ + if (s->m < size) { + s->m = size; + kroundup32(s->m); + s->s = (char*)realloc(s->s, s->m); + } +} + +static inline int kputsn(const char *p, int l, kstring_t *s) +{ + if (s->l + l + 1 >= s->m) { + s->m = s->l + l + 2; + kroundup32(s->m); + s->s = (char*)realloc(s->s, s->m); + } + memcpy(s->s + s->l, p, l); + s->l += l; + s->s[s->l] = 0; + return l; +} + +static inline int kputs(const char *p, kstring_t *s) +{ + return kputsn(p, strlen(p), s); +} + +static inline int kputc(int c, kstring_t *s) +{ + if (s->l + 1 >= s->m) { + s->m = s->l + 2; + kroundup32(s->m); + s->s = (char*)realloc(s->s, s->m); + } + s->s[s->l++] = c; + s->s[s->l] = 0; + return c; +} + +static inline void kputc_(int c, kstring_t *s) +{ + if (s->l + 1 > s->m) { + s->m = s->l + 1; + kroundup32(s->m); + s->s = (char*)realloc(s->s, s->m); + } + s->s[s->l++] = c; +} + +static inline void kputsn_(const void *p, int l, kstring_t *s) +{ + if (s->l + l > s->m) { + s->m = s->l + l; + kroundup32(s->m); + s->s = (char*)realloc(s->s, s->m); + } + memcpy(s->s + s->l, p, l); + s->l += l; +} + +static inline int kputw(int c, kstring_t *s) +{ + char buf[16]; + int l, x; + if (c == 0) return kputc('0', s); + for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0'; + if (c < 0) buf[l++] = '-'; + if (s->l + l + 1 >= s->m) { + s->m = s->l + l + 2; + kroundup32(s->m); + s->s = (char*)realloc(s->s, s->m); + } + for (x = l - 1; x >= 0; --x) s->s[s->l++] = buf[x]; + s->s[s->l] = 0; + return 0; +} + +static inline int kputuw(unsigned c, kstring_t *s) +{ + char buf[16]; + int l, i; + unsigned x; + if (c == 0) return kputc('0', s); + for (l = 0, x = c; x > 0; x /= 10) buf[l++] = x%10 + '0'; + if (s->l + l + 1 >= s->m) { + s->m = s->l + l + 2; + kroundup32(s->m); + s->s = (char*)realloc(s->s, s->m); + } + for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i]; + s->s[s->l] = 0; + return 0; +} + +static inline int ksprintf(kstring_t *s, const char *fmt, ...) +{ + va_list ap; + int l; + va_start(ap, fmt); + l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap); // This line does not work with glibc 2.0. See `man snprintf'. + va_end(ap); + if ((size_t)l + 1 > s->m - s->l) { + s->m = s->l + l + 2; + kroundup32(s->m); + s->s = (char*)realloc(s->s, s->m); + va_start(ap, fmt); + l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap); + } + va_end(ap); + s->l += l; + return l; +} + +#endif diff --git a/kthread.c b/kthread.c new file mode 100644 index 0000000..ffdf940 --- /dev/null +++ b/kthread.c @@ -0,0 +1,159 @@ +#include +#include +#include +#include +#include "kthread.h" + +#if (defined(WIN32) || defined(_WIN32)) && defined(_MSC_VER) +#define __sync_fetch_and_add(ptr, addend) _InterlockedExchangeAdd((void*)ptr, addend) +#endif + +/************ + * kt_for() * + ************/ + +struct kt_for_t; + +typedef struct { + struct kt_for_t *t; + long i; +} ktf_worker_t; + +typedef struct kt_for_t { + int n_threads; + long n; + ktf_worker_t *w; + void (*func)(void*,long,int); + void *data; +} kt_for_t; + +static inline long steal_work(kt_for_t *t) +{ + int i, min_i = -1; + long k, min = LONG_MAX; + for (i = 0; i < t->n_threads; ++i) + if (min > t->w[i].i) min = t->w[i].i, min_i = i; + k = __sync_fetch_and_add(&t->w[min_i].i, t->n_threads); + return k >= t->n? -1 : k; +} + +static void *ktf_worker(void *data) +{ + ktf_worker_t *w = (ktf_worker_t*)data; + long i; + for (;;) { + i = __sync_fetch_and_add(&w->i, w->t->n_threads); + if (i >= w->t->n) break; + w->t->func(w->t->data, i, w - w->t->w); + } + while ((i = steal_work(w->t)) >= 0) + w->t->func(w->t->data, i, w - w->t->w); + pthread_exit(0); +} + +void kt_for(int n_threads, void (*func)(void*,long,int), void *data, long n) +{ + if (n_threads > 1) { + int i; + kt_for_t t; + pthread_t *tid; + t.func = func, t.data = data, t.n_threads = n_threads, t.n = n; + t.w = (ktf_worker_t*)calloc(n_threads, sizeof(ktf_worker_t)); + tid = (pthread_t*)calloc(n_threads, sizeof(pthread_t)); + for (i = 0; i < n_threads; ++i) + t.w[i].t = &t, t.w[i].i = i; + for (i = 0; i < n_threads; ++i) pthread_create(&tid[i], 0, ktf_worker, &t.w[i]); + for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0); + free(tid); free(t.w); + } else { + long j; + for (j = 0; j < n; ++j) func(data, j, 0); + } +} + +/***************** + * kt_pipeline() * + *****************/ + +struct ktp_t; + +typedef struct { + struct ktp_t *pl; + int64_t index; + int step; + void *data; +} ktp_worker_t; + +typedef struct ktp_t { + void *shared; + void *(*func)(void*, int, void*); + int64_t index; + int n_workers, n_steps; + ktp_worker_t *workers; + pthread_mutex_t mutex; + pthread_cond_t cv; +} ktp_t; + +static void *ktp_worker(void *data) +{ + ktp_worker_t *w = (ktp_worker_t*)data; + ktp_t *p = w->pl; + while (w->step < p->n_steps) { + // test whether we can kick off the job with this worker + pthread_mutex_lock(&p->mutex); + for (;;) { + int i; + // test whether another worker is doing the same step + for (i = 0; i < p->n_workers; ++i) { + if (w == &p->workers[i]) continue; // ignore itself + if (p->workers[i].step <= w->step && p->workers[i].index < w->index) + break; + } + if (i == p->n_workers) break; // no workers with smaller indices are doing w->step or the previous steps + pthread_cond_wait(&p->cv, &p->mutex); + } + pthread_mutex_unlock(&p->mutex); + + // working on w->step + w->data = p->func(p->shared, w->step, w->step? w->data : 0); // for the first step, input is NULL + + // update step and let other workers know + pthread_mutex_lock(&p->mutex); + w->step = w->step == p->n_steps - 1 || w->data? (w->step + 1) % p->n_steps : p->n_steps; + if (w->step == 0) w->index = p->index++; + pthread_cond_broadcast(&p->cv); + pthread_mutex_unlock(&p->mutex); + } + pthread_exit(0); +} + +void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_data, int n_steps) +{ + ktp_t aux; + pthread_t *tid; + int i; + + if (n_threads < 1) n_threads = 1; + aux.n_workers = n_threads; + aux.n_steps = n_steps; + aux.func = func; + aux.shared = shared_data; + aux.index = 0; + pthread_mutex_init(&aux.mutex, 0); + pthread_cond_init(&aux.cv, 0); + + aux.workers = (ktp_worker_t*)calloc(n_threads, sizeof(ktp_worker_t)); + for (i = 0; i < n_threads; ++i) { + ktp_worker_t *w = &aux.workers[i]; + w->step = 0; w->pl = &aux; w->data = 0; + w->index = aux.index++; + } + + tid = (pthread_t*)calloc(n_threads, sizeof(pthread_t)); + for (i = 0; i < n_threads; ++i) pthread_create(&tid[i], 0, ktp_worker, &aux.workers[i]); + for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0); + free(tid); free(aux.workers); + + pthread_mutex_destroy(&aux.mutex); + pthread_cond_destroy(&aux.cv); +} diff --git a/kthread.h b/kthread.h new file mode 100644 index 0000000..c3cd165 --- /dev/null +++ b/kthread.h @@ -0,0 +1,15 @@ +#ifndef KTHREAD_H +#define KTHREAD_H + +#ifdef __cplusplus +extern "C" { +#endif + +void kt_for(int n_threads, void (*func)(void*,long,int), void *data, long n); +void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_data, int n_steps); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/kvec-km.h b/kvec-km.h new file mode 100644 index 0000000..e865173 --- /dev/null +++ b/kvec-km.h @@ -0,0 +1,105 @@ +/* The MIT License + + Copyright (c) 2008, by Attractive Chaos + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +/* + An example: + +#include "kvec.h" +int main() { + kvec_t(int) array; + kv_init(array); + kv_push(int, array, 10); // append + kv_a(int, array, 20) = 5; // dynamic + kv_A(array, 20) = 4; // static + kv_destroy(array); + return 0; +} +*/ + +/* + 2008-09-22 (0.1.0): + + * The initial version. + +*/ + +#ifndef AC_KVEC_H +#define AC_KVEC_H + +#include +#include "kalloc.h" + +#define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) + +#define kvec_t(type) struct { size_t n, m; type *a; } +#define kv_init(v) ((v).n = (v).m = 0, (v).a = 0) +#define kv_destroy(v) free((v).a) +#define kv_A(v, i) ((v).a[(i)]) +#define kv_pop(v) ((v).a[--(v).n]) +#define kv_size(v) ((v).n) +#define kv_max(v) ((v).m) + +#define kv_resize(type, km, v, s) do { \ + if ((v).m < (s)) { \ + (v).m = (s); \ + kv_roundup32((v).m); \ + (v).a = (type*)krealloc((km), (v).a, sizeof(type) * (v).m); \ + } \ + } while (0) + +#define kv_copy(type, km, v1, v0) do { \ + if ((v1).m < (v0).n) kv_resize(type, (km), (v1), (v0).n); \ + (v1).n = (v0).n; \ + memcpy((v1).a, (v0).a, sizeof(type) * (v0).n); \ + } while (0) \ + +#define kv_push(type, km, v, x) do { \ + if ((v).n == (v).m) { \ + (v).m = (v).m? (v).m<<1 : 2; \ + (v).a = (type*)krealloc((km), (v).a, sizeof(type) * (v).m); \ + } \ + (v).a[(v).n++] = (x); \ + } while (0) + +#define kv_pushp(type, km, v, p) do { \ + if ((v).n == (v).m) { \ + (v).m = (v).m? (v).m<<1 : 2; \ + (v).a = (type*)krealloc((km), (v).a, sizeof(type) * (v).m); \ + } \ + *(p) = &(v).a[(v).n++]; \ + } while (0) + +#define kv_reverse(type, v, start) do { \ + if ((v).m > 0 && (v).n > (start)) { \ + size_t __i, __end = (v).n - (start); \ + type *__a = (v).a + (start); \ + for (__i = 0; __i < __end>>1; ++__i) { \ + type __t = __a[__end - 1 - __i]; \ + __a[__end - 1 - __i] = __a[__i]; __a[__i] = __t; \ + } \ + } \ + } while (0) + +#endif diff --git a/kvec.h b/kvec.h new file mode 100644 index 0000000..632fce4 --- /dev/null +++ b/kvec.h @@ -0,0 +1,110 @@ +/* The MIT License + + Copyright (c) 2008, by Attractive Chaos + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +/* + An example: + +#include "kvec.h" +int main() { + kvec_t(int) array; + kv_init(array); + kv_push(int, array, 10); // append + kv_a(int, array, 20) = 5; // dynamic + kv_A(array, 20) = 4; // static + kv_destroy(array); + return 0; +} +*/ + +/* + 2008-09-22 (0.1.0): + + * The initial version. + +*/ + +#ifndef AC_KVEC_H +#define AC_KVEC_H + +#include + +#define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) + +#define kvec_t(type) struct { size_t n, m; type *a; } +#define kv_init(v) ((v).n = (v).m = 0, (v).a = 0) +#define kv_destroy(v) free((v).a) +#define kv_A(v, i) ((v).a[(i)]) +#define kv_pop(v) ((v).a[--(v).n]) +#define kv_size(v) ((v).n) +#define kv_max(v) ((v).m) + +#define kv_resize(type, v, s) do { \ + if ((v).m < (s)) { \ + (v).m = (s); \ + kv_roundup32((v).m); \ + (v).a = (type*)realloc((v).a, sizeof(type) * (v).m); \ + } \ + } while (0) + +#define kv_copy(type, v1, v0) do { \ + if ((v1).m < (v0).n) kv_resize(type, v1, (v0).n); \ + (v1).n = (v0).n; \ + memcpy((v1).a, (v0).a, sizeof(type) * (v0).n); \ + } while (0) \ + +#define kv_push(type, v, x) do { \ + if ((v).n == (v).m) { \ + (v).m = (v).m? (v).m<<1 : 2; \ + (v).a = (type*)realloc((v).a, sizeof(type) * (v).m); \ + } \ + (v).a[(v).n++] = (x); \ + } while (0) + +#define kv_pushp(type, v, p) do { \ + if ((v).n == (v).m) { \ + (v).m = (v).m? (v).m<<1 : 2; \ + (v).a = (type*)realloc((v).a, sizeof(type) * (v).m); \ + } \ + *(p) = &(v).a[(v).n++]; \ + } while (0) + +#define kv_a(type, v, i) ((v).m <= (size_t)(i)? \ + ((v).m = (v).n = (i) + 1, kv_roundup32((v).m), \ + (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \ + : (v).n <= (size_t)(i)? (v).n = (i) \ + : 0), (v).a[(i)] + +#define kv_reverse(type, v, start) do { \ + if ((v).m > 0 && (v).n > (start)) { \ + size_t __i, __end = (v).n - (start); \ + type *__a = (v).a + (start); \ + for (__i = 0; __i < __end>>1; ++__i) { \ + type __t = __a[__end - 1 - __i]; \ + __a[__end - 1 - __i] = __a[__i]; __a[__i] = __t; \ + } \ + } \ + } while (0) + +#endif diff --git a/lchain.c b/lchain.c new file mode 100644 index 0000000..e16ba3e --- /dev/null +++ b/lchain.c @@ -0,0 +1,441 @@ +#include +#include +#include +#include +#include "mgpriv.h" +#include "kalloc.h" +#include "krmq.h" + +static int64_t mg_chain_bk_end(int32_t max_drop, const mg128_t *z, const int32_t *f, const int64_t *p, int32_t *t, int64_t k) +{ + int64_t i = z[k].y, end_i = -1, max_i = i; + int32_t max_s = 0; + if (i < 0 || t[i] != 0) return i; + do { + int32_t s; + t[i] = 2; + end_i = i = p[i]; + s = i < 0? z[k].x : (int32_t)z[k].x - f[i]; + if (s > max_s) max_s = s, max_i = i; + else if (max_s - s > max_drop) break; + } while (i >= 0 && t[i] == 0); + for (i = z[k].y; i >= 0 && i != end_i; i = p[i]) // reset modified t[] + t[i] = 0; + return max_i; +} + +uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_t *p, int32_t *v, int32_t *t, int32_t min_cnt, int32_t min_sc, int32_t max_drop, + int32_t extra_u, int32_t *n_u_, int32_t *n_v_) +{ + mg128_t *z; + uint64_t *u; + int64_t i, k, n_z, n_v; + int32_t n_u; + + *n_u_ = *n_v_ = 0; + for (i = 0, n_z = 0; i < n; ++i) // precompute n_z + if (f[i] >= min_sc) ++n_z; + if (n_z == 0) return 0; + KMALLOC(km, z, n_z); + for (i = 0, k = 0; i < n; ++i) // populate z[] + if (f[i] >= min_sc) z[k].x = f[i], z[k++].y = i; + radix_sort_128x(z, z + n_z); + + memset(t, 0, n * 4); + for (k = n_z - 1, n_v = n_u = 0; k >= 0; --k) { // precompute n_u + if (t[z[k].y] == 0) { + int64_t n_v0 = n_v, end_i; + int32_t sc; + end_i = mg_chain_bk_end(max_drop, z, f, p, t, k); + for (i = z[k].y; i != end_i; i = p[i]) + ++n_v, t[i] = 1; + sc = i < 0? z[k].x : (int32_t)z[k].x - f[i]; + if (sc >= min_sc && n_v > n_v0 && n_v - n_v0 >= min_cnt) + ++n_u; + else n_v = n_v0; + } + } + KMALLOC(km, u, n_u + extra_u); + memset(t, 0, n * 4); + for (k = n_z - 1, n_v = n_u = 0; k >= 0; --k) { // populate u[] + if (t[z[k].y] == 0) { + int64_t n_v0 = n_v, end_i; + int32_t sc; + end_i = mg_chain_bk_end(max_drop, z, f, p, t, k); + for (i = z[k].y; i != end_i; i = p[i]) + v[n_v++] = i, t[i] = 1; + sc = i < 0? z[k].x : (int32_t)z[k].x - f[i]; + if (sc >= min_sc && n_v > n_v0 && n_v - n_v0 >= min_cnt) + u[n_u++] = (uint64_t)sc << 32 | (n_v - n_v0); + else n_v = n_v0; + } + } + kfree(km, z); + assert(n_v < INT32_MAX); + *n_u_ = n_u, *n_v_ = n_v; + return u; +} + +static mg128_t *compact_a(void *km, int32_t n_u, uint64_t *u, int32_t n_v, int32_t *v, mg128_t *a) +{ + mg128_t *b, *w; + uint64_t *u2; + int64_t i, j, k; + + // write the result to b[] + KMALLOC(km, b, n_v); + for (i = 0, k = 0; i < n_u; ++i) { + int32_t k0 = k, ni = (int32_t)u[i]; + for (j = 0; j < ni; ++j) + b[k++] = a[v[k0 + (ni - j - 1)]]; + } + kfree(km, v); + + // sort u[] and a[] by the target position, such that adjacent chains may be joined + KMALLOC(km, w, n_u); + for (i = k = 0; i < n_u; ++i) { + w[i].x = b[k].x, w[i].y = (uint64_t)k<<32|i; + k += (int32_t)u[i]; + } + radix_sort_128x(w, w + n_u); + KMALLOC(km, u2, n_u); + for (i = k = 0; i < n_u; ++i) { + int32_t j = (int32_t)w[i].y, n = (int32_t)u[j]; + u2[i] = u[j]; + memcpy(&a[k], &b[w[i].y>>32], n * sizeof(mg128_t)); + k += n; + } + memcpy(u, u2, n_u * 8); + memcpy(b, a, k * sizeof(mg128_t)); // write _a_ to _b_ and deallocate _a_ because _a_ is oversized, sometimes a lot + kfree(km, a); kfree(km, w); kfree(km, u2); + return b; +} + +static inline int32_t comput_sc(const mg128_t *ai, const mg128_t *aj, int32_t max_dist_x, int32_t max_dist_y, int32_t bw, float chn_pen_gap, float chn_pen_skip, int is_cdna, int n_seg) +{ + int32_t dq = (int32_t)ai->y - (int32_t)aj->y, dr, dd, dg, q_span, sc; + int32_t sidi = (ai->y & MG_SEED_SEG_MASK) >> MG_SEED_SEG_SHIFT; + int32_t sidj = (aj->y & MG_SEED_SEG_MASK) >> MG_SEED_SEG_SHIFT; + if (dq <= 0 || dq > max_dist_x) return INT32_MIN; + dr = (int32_t)(ai->x - aj->x); + if (sidi == sidj && (dr == 0 || dq > max_dist_y)) return INT32_MIN; + dd = dr > dq? dr - dq : dq - dr; + if (sidi == sidj && dd > bw) return INT32_MIN; + if (n_seg > 1 && !is_cdna && sidi == sidj && dr > max_dist_y) return INT32_MIN; + dg = dr < dq? dr : dq; + q_span = aj->y>>32&0xff; + sc = q_span < dg? q_span : dg; + if (dd || dg > q_span) { + float lin_pen, log_pen; + lin_pen = chn_pen_gap * (float)dd + chn_pen_skip * (float)dg; + log_pen = dd >= 1? mg_log2(dd + 1) : 0.0f; // mg_log2() only works for dd>=2 + if (is_cdna || sidi != sidj) { + if (sidi != sidj && dr == 0) ++sc; // possibly due to overlapping paired ends; give a minor bonus + else if (dr > dq || sidi != sidj) sc -= (int)(lin_pen < log_pen? lin_pen : log_pen); // deletion or jump between paired ends + else sc -= (int)(lin_pen + .5f * log_pen); + } else sc -= (int)(lin_pen + .5f * log_pen); + } + return sc; +} + +/* Input: + * a[].x: tid<<33 | rev<<32 | tpos + * a[].y: flags<<40 | q_span<<32 | q_pos + * Output: + * n_u: #chains + * u[]: score<<32 | #anchors (sum of lower 32 bits of u[] is the returned length of a[]) + * input a[] is deallocated on return + */ +mg128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip, + int is_cdna, int n_seg, int64_t n, mg128_t *a, int *n_u_, uint64_t **_u, void *km) +{ // TODO: make sure this works when n has more than 32 bits + int32_t *f, *t, *v, n_u, n_v, mmax_f = 0, max_drop = bw; + int64_t *p, i, j, max_ii, st = 0, n_iter = 0; + uint64_t *u; + + if (_u) *_u = 0, *n_u_ = 0; + if (n == 0 || a == 0) { + kfree(km, a); + return 0; + } + if (max_dist_x < bw) max_dist_x = bw; + if (max_dist_y < bw && !is_cdna) max_dist_y = bw; + if (is_cdna) max_drop = INT32_MAX; + KMALLOC(km, p, n); + KMALLOC(km, f, n); + KMALLOC(km, v, n); + KCALLOC(km, t, n); + + // fill the score and backtrack arrays + for (i = 0, max_ii = -1; i < n; ++i) { + int64_t max_j = -1, end_j; + int32_t max_f = a[i].y>>32&0xff, n_skip = 0; + while (st < i && (a[i].x>>32 != a[st].x>>32 || a[i].x > a[st].x + max_dist_x)) ++st; + if (i - st > max_iter) st = i - max_iter; + for (j = i - 1; j >= st; --j) { + int32_t sc; + sc = comput_sc(&a[i], &a[j], max_dist_x, max_dist_y, bw, chn_pen_gap, chn_pen_skip, is_cdna, n_seg); + ++n_iter; + if (sc == INT32_MIN) continue; + sc += f[j]; + if (sc > max_f) { + max_f = sc, max_j = j; + if (n_skip > 0) --n_skip; + } else if (t[j] == (int32_t)i) { + if (++n_skip > max_skip) + break; + } + if (p[j] >= 0) t[p[j]] = i; + } + end_j = j; + if (max_ii < 0 || a[i].x - a[max_ii].x > (int64_t)max_dist_x) { + int32_t max = INT32_MIN; + max_ii = -1; + for (j = i - 1; j >= st; --j) + if (max < f[j]) max = f[j], max_ii = j; + } + if (max_ii >= 0 && max_ii < end_j) { + int32_t tmp; + tmp = comput_sc(&a[i], &a[max_ii], max_dist_x, max_dist_y, bw, chn_pen_gap, chn_pen_skip, is_cdna, n_seg); + if (tmp != INT32_MIN && max_f < tmp + f[max_ii]) + max_f = tmp + f[max_ii], max_j = max_ii; + } + f[i] = max_f, p[i] = max_j; + v[i] = max_j >= 0 && v[max_j] > max_f? v[max_j] : max_f; // v[] keeps the peak score up to i; f[] is the score ending at i, not always the peak + if (max_ii < 0 || (a[i].x - a[max_ii].x <= (int64_t)max_dist_x && f[max_ii] < f[i])) + max_ii = i; + if (mmax_f < max_f) mmax_f = max_f; + } + if (mg_dbg_flag & MG_DBG_LC_PROF) fprintf(stderr, "LP\tn_iter=%ld\tmmax_f=%d\n", (long)n_iter, mmax_f); + + u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, max_drop, 0, &n_u, &n_v); + *n_u_ = n_u, *_u = u; // NB: note that u[] may not be sorted by score here + kfree(km, p); kfree(km, f); kfree(km, t); + if (n_u == 0) { + kfree(km, a); kfree(km, v); + return 0; + } + return compact_a(km, n_u, u, n_v, v, a); +} + +typedef struct lc_elem_s { + int32_t y; + int64_t i; + double pri; + KRMQ_HEAD(struct lc_elem_s) head; +} lc_elem_t; + +#define lc_elem_cmp(a, b) ((a)->y < (b)->y? -1 : (a)->y > (b)->y? 1 : ((a)->i > (b)->i) - ((a)->i < (b)->i)) +#define lc_elem_lt2(a, b) ((a)->pri < (b)->pri) +KRMQ_INIT(lc_elem, lc_elem_t, head, lc_elem_cmp, lc_elem_lt2) + +KALLOC_POOL_INIT(rmq, lc_elem_t) + +static inline int32_t comput_sc_simple(const mg128_t *ai, const mg128_t *aj, float chn_pen_gap, float chn_pen_skip, int32_t *exact, int32_t *width) +{ + int32_t dq = (int32_t)ai->y - (int32_t)aj->y, dr, dd, dg, q_span, sc; + dr = (int32_t)(ai->x - aj->x); + *width = dd = dr > dq? dr - dq : dq - dr; + dg = dr < dq? dr : dq; + q_span = aj->y>>32&0xff; + sc = q_span < dg? q_span : dg; + if (exact) *exact = (dd == 0 && dg <= q_span); + if (dd || dq > q_span) { + float lin_pen, log_pen; + lin_pen = chn_pen_gap * (float)dd + chn_pen_skip * (float)dg; + log_pen = dd >= 1? mg_log2(dd + 1) : 0.0f; // mg_log2() only works for dd>=2 + sc -= (int)(lin_pen + .5f * log_pen); + } + return sc; +} + +mg128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_skip, int cap_rmq_size, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip, + int64_t n, mg128_t *a, int *n_u_, uint64_t **_u, void *km) +{ + int32_t *f,*t, *v, n_u, n_v, mmax_f = 0, max_rmq_size = 0, max_drop = bw; + int64_t *p, i, i0, st = 0, st_inner = 0, n_iter = 0; + uint64_t *u; + lc_elem_t *root = 0, *root_inner = 0; + void *mem_mp = 0; + kmp_rmq_t *mp; + + if (_u) *_u = 0, *n_u_ = 0; + if (n == 0 || a == 0) { + kfree(km, a); + return 0; + } + if (max_dist < bw) max_dist = bw; + if (max_dist_inner <= 0 || max_dist_inner >= max_dist) max_dist_inner = 0; + KMALLOC(km, p, n); + KMALLOC(km, f, n); + KCALLOC(km, t, n); + KMALLOC(km, v, n); + mem_mp = km_init2(km, 0x10000); + mp = kmp_init_rmq(mem_mp); + + // fill the score and backtrack arrays + for (i = i0 = 0; i < n; ++i) { + int64_t max_j = -1; + int32_t q_span = a[i].y>>32&0xff, max_f = q_span; + lc_elem_t s, *q, *r, lo, hi; + // add in-range anchors + if (i0 < i && a[i0].x != a[i].x) { + int64_t j; + for (j = i0; j < i; ++j) { + q = kmp_alloc_rmq(mp); + q->y = (int32_t)a[j].y, q->i = j, q->pri = -(f[j] + 0.5 * chn_pen_gap * ((int32_t)a[j].x + (int32_t)a[j].y)); + krmq_insert(lc_elem, &root, q, 0); + if (max_dist_inner > 0) { + r = kmp_alloc_rmq(mp); + *r = *q; + krmq_insert(lc_elem, &root_inner, r, 0); + } + } + i0 = i; + } + // get rid of active chains out of range + while (st < i && (a[i].x>>32 != a[st].x>>32 || a[i].x > a[st].x + max_dist || krmq_size(head, root) > cap_rmq_size)) { + s.y = (int32_t)a[st].y, s.i = st; + if ((q = krmq_find(lc_elem, root, &s, 0)) != 0) { + q = krmq_erase(lc_elem, &root, q, 0); + kmp_free_rmq(mp, q); + } + ++st; + } + if (max_dist_inner > 0) { // similar to the block above, but applied to the inner tree + while (st_inner < i && (a[i].x>>32 != a[st_inner].x>>32 || a[i].x > a[st_inner].x + max_dist_inner || krmq_size(head, root_inner) > cap_rmq_size)) { + s.y = (int32_t)a[st_inner].y, s.i = st_inner; + if ((q = krmq_find(lc_elem, root_inner, &s, 0)) != 0) { + q = krmq_erase(lc_elem, &root_inner, q, 0); + kmp_free_rmq(mp, q); + } + ++st_inner; + } + } + // RMQ + lo.i = INT32_MAX, lo.y = (int32_t)a[i].y - max_dist; + hi.i = 0, hi.y = (int32_t)a[i].y - 1; + if ((q = krmq_rmq(lc_elem, root, &lo, &hi)) != 0) { + int32_t sc, exact, width, n_skip = 0; + int64_t j = q->i; + assert(q->y >= lo.y && q->y <= hi.y); + sc = f[j] + comput_sc_simple(&a[i], &a[j], chn_pen_gap, chn_pen_skip, &exact, &width); + if (width <= bw && sc > max_f) max_f = sc, max_j = j; + if (!exact && root_inner && (int32_t)a[i].y > 0) { + lc_elem_t *lo, *hi; + s.y = (int32_t)a[i].y - 1, s.i = n; + krmq_interval(lc_elem, root_inner, &s, &lo, &hi); + if (lo) { + const lc_elem_t *q; + int32_t width, n_rmq_iter = 0; + krmq_itr_t(lc_elem) itr; + krmq_itr_find(lc_elem, root_inner, lo, &itr); + while ((q = krmq_at(&itr)) != 0) { + if (q->y < (int32_t)a[i].y - max_dist_inner) break; + ++n_rmq_iter; + j = q->i; + sc = f[j] + comput_sc_simple(&a[i], &a[j], chn_pen_gap, chn_pen_skip, 0, &width); + if (width <= bw) { + if (sc > max_f) { + max_f = sc, max_j = j; + if (n_skip > 0) --n_skip; + } else if (t[j] == (int32_t)i) { + if (++n_skip > max_chn_skip) + break; + } + if (p[j] >= 0) t[p[j]] = i; + } + if (!krmq_itr_prev(lc_elem, &itr)) break; + } + n_iter += n_rmq_iter; + } + } + } + // set max + assert(max_j < 0 || (a[max_j].x < a[i].x && (int32_t)a[max_j].y < (int32_t)a[i].y)); + f[i] = max_f, p[i] = max_j; + v[i] = max_j >= 0 && v[max_j] > max_f? v[max_j] : max_f; // v[] keeps the peak score up to i; f[] is the score ending at i, not always the peak + if (mmax_f < max_f) mmax_f = max_f; + if (max_rmq_size < krmq_size(head, root)) max_rmq_size = krmq_size(head, root); + } + if (mg_dbg_flag & MG_DBG_LC_PROF) fprintf(stderr, "LP\tn_iter=%ld\tmmax_f=%d\trmq_size=%d\tmp_max=%ld\n", (long)n_iter, mmax_f, max_rmq_size, mp->max); + km_destroy(mem_mp); + + u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, max_drop, 0, &n_u, &n_v); + *n_u_ = n_u, *_u = u; // NB: note that u[] may not be sorted by score here + kfree(km, p); kfree(km, f); kfree(km, t); + if (n_u == 0) { + kfree(km, a); kfree(km, v); + return 0; + } + return compact_a(km, n_u, u, n_v, v, a); +} + +mg_lchain_t *mg_lchain_gen(void *km, uint32_t hash, int qlen, int n_u, uint64_t *u, mg128_t *a) +{ + mg128_t *z; + mg_lchain_t *r; + int i, k; + + if (n_u == 0) return 0; + KCALLOC(km, r, n_u); + + // sort by query position + KMALLOC(km, z, n_u); + for (i = k = 0; i < n_u; ++i) { + int32_t qs = (int32_t)a[k].y + 1 - (a[k].y>>32 & 0xff); + z[i].x = (uint64_t)qs << 32 | u[i] >> 32; + z[i].y = (uint64_t)k << 32 | (int32_t)u[i]; + k += (int32_t)u[i]; + } + radix_sort_128x(z, z + n_u); + + // populate r[] + for (i = 0; i < n_u; ++i) { + mg_lchain_t *ri = &r[i]; + int32_t k = z[i].y >> 32, q_span = a[k].y >> 32 & 0xff; + ri->off = k; + ri->cnt = (int32_t)z[i].y; + ri->score = (uint32_t)z[i].x; + ri->v = a[k].x >> 32; + ri->rs = (int32_t)a[k].x + 1 > q_span? (int32_t)a[k].x + 1 - q_span : 0; // for HPC k-mer + ri->qs = z[i].x >> 32; + ri->re = (int32_t)a[k + ri->cnt - 1].x + 1; + ri->qe = (int32_t)a[k + ri->cnt - 1].y + 1; + } + kfree(km, z); + return r; +} + +static int32_t get_mini_idx(const mg128_t *a, int32_t n, const int32_t *mini_pos) +{ + int32_t x, L = 0, R = n - 1; + x = (int32_t)a->y; + while (L <= R) { // binary search + int32_t m = ((uint64_t)L + R) >> 1; + int32_t y = mini_pos[m]; + if (y < x) L = m + 1; + else if (y > x) R = m - 1; + else return m; + } + return -1; +} + +/* Before: + * a[].x: tid<<33 | rev<<32 | tpos + * a[].y: flags<<40 | q_span<<32 | q_pos + * After: + * a[].x: mini_pos<<32 | tpos + * a[].y: same + */ +void mg_update_anchors(int32_t n_a, mg128_t *a, int32_t n, const int32_t *mini_pos) +{ + int32_t st, j, k; + if (n_a <= 0) return; + st = get_mini_idx(&a[0], n, mini_pos); + assert(st >= 0); + for (k = 0, j = st; j < n && k < n_a; ++j) + if ((int32_t)a[k].y == mini_pos[j]) + a[k].x = (uint64_t)j << 32 | (a[k].x & 0xffffffffU), ++k; + assert(k == n_a); +} diff --git a/main.c b/main.c new file mode 100644 index 0000000..e2b23a1 --- /dev/null +++ b/main.c @@ -0,0 +1,301 @@ +#include +#include +#include +#include "mgpriv.h" +#include "gfa-priv.h" +#include "sys.h" +#include "ketopt.h" + +#ifdef __linux__ +#include +#include +void liftrlimit() +{ + struct rlimit r; + getrlimit(RLIMIT_AS, &r); + r.rlim_cur = r.rlim_max; + setrlimit(RLIMIT_AS, &r); +} +#else +void liftrlimit() {} +#endif + +static ko_longopt_t long_options[] = { + { "version", ko_no_argument, 300 }, + { "vc", ko_no_argument, 301 }, + { "secondary", ko_required_argument, 302 }, + { "ins-qovlp", ko_required_argument, 303 }, + { "heap-sort", ko_required_argument, 304 }, + { "show-unmap", ko_required_argument, 305 }, + { "ggen", ko_optional_argument, 306 }, + { "rmq", ko_optional_argument, 307 }, + { "gg-min-end-cnt", ko_required_argument, 309 }, + { "gg-min-end-frac", ko_required_argument, 310 }, + { "no-comp-path", ko_no_argument, 312 }, + { "gg-match-pen", ko_required_argument, 313 }, + { "frag", ko_no_argument, 314 }, + { "cov", ko_no_argument, 315 }, + { "min-cov-blen", ko_required_argument, 316 }, + { "min-cov-mapq", ko_required_argument, 317 }, + { "gap-pen", ko_required_argument, 318 }, + { "ref-bonus", ko_required_argument, 319 }, + { "max-gap-pre", ko_required_argument, 320 }, + { "max-lc-skip", ko_required_argument, 321 }, + { "max-gc-skip", ko_required_argument, 322 }, + { "max-lc-iter", ko_required_argument, 323 }, + { "max-rmq-size", ko_required_argument, 324 }, + { "inv", ko_required_argument, 325 }, + { "write-mz", ko_no_argument, 326 }, + { "call", ko_no_argument, 327 }, + { "cap-calloc", ko_required_argument, 328 }, + { "gdp-max-ed", ko_required_argument, 329 }, + { "no-kalloc", ko_no_argument, 401 }, + { "dbg-qname", ko_no_argument, 402 }, + { "dbg-lchain", ko_no_argument, 403 }, + { "dbg-insert", ko_no_argument, 404 }, + { "dbg-shortk", ko_no_argument, 405 }, + { "dbg-gc1", ko_no_argument, 406 }, + { "dbg-lc-prof", ko_no_argument, 407 }, + { "dbg-mwf-long", ko_no_argument, 408 }, + { "dbg-mwf-seq", ko_no_argument, 409 }, + { 0, 0, 0 } +}; + +static inline int64_t mm_parse_num2(const char *str, char **q) +{ + double x; + char *p; + x = strtod(str, &p); + if (*p == 'G' || *p == 'g') x *= 1e9, ++p; + else if (*p == 'M' || *p == 'm') x *= 1e6, ++p; + else if (*p == 'K' || *p == 'k') x *= 1e3, ++p; + if (q) *q = p; + return (int64_t)(x + .499); +} + +static inline int64_t mm_parse_num(const char *str) +{ + return mm_parse_num2(str, 0); +} + +static inline void yes_or_no(uint64_t *flag_, uint64_t f, int long_idx, const char *arg, int yes_to_set) +{ + uint64_t flag = *flag_; + if (yes_to_set) { + if (strcmp(arg, "yes") == 0 || strcmp(arg, "y") == 0) flag |= f; + else if (strcmp(arg, "no") == 0 || strcmp(arg, "n") == 0) flag &= ~f; + else fprintf(stderr, "[WARNING]\033[1;31m option '--%s' only accepts 'yes' or 'no'.\033[0m\n", long_options[long_idx].name); + } else { + if (strcmp(arg, "yes") == 0 || strcmp(arg, "y") == 0) flag &= ~f; + else if (strcmp(arg, "no") == 0 || strcmp(arg, "n") == 0) flag |= f; + else fprintf(stderr, "[WARNING]\033[1;31m option '--%s' only accepts 'yes' or 'no'.\033[0m\n", long_options[long_idx].name); + } + *flag_ = flag; +} + +int main(int argc, char *argv[]) +{ + const char *opt_str = "x:k:w:t:r:m:n:g:K:o:p:N:Pq:d:l:f:U:M:F:j:L:DSc"; + ketopt_t o = KETOPT_INIT; + mg_mapopt_t opt; + mg_idxopt_t ipt; + mg_ggopt_t gpt; + int i, c, ret, n_threads = 4; + char *s; + FILE *fp_help = stderr; + gfa_t *g; + + mg_verbose = 3; + liftrlimit(); + mg_realtime0 = realtime(); + mg_opt_set(0, &ipt, &opt, &gpt); + + while ((c = ketopt(&o, argc, argv, 1, opt_str, long_options)) >= 0) { // test command line options and apply option -x/preset first + if (c == 'x') { + if (mg_opt_set(o.arg, &ipt, &opt, &gpt) < 0) { + fprintf(stderr, "[ERROR] unknown preset '%s'\n", o.arg); + return 1; + } + } else if (c == ':') { + fprintf(stderr, "[ERROR] missing option argument\n"); + return 1; + } else if (c == '?') { + fprintf(stderr, "[ERROR] unknown option in \"%s\"\n", argv[o.i - 1]); + return 1; + } + } + o = KETOPT_INIT; + + while ((c = ketopt(&o, argc, argv, 1, opt_str, long_options)) >= 0) { + if (c == 'w') ipt.w = atoi(o.arg); + else if (c == 'k') ipt.k = atoi(o.arg); + else if (c == 't') n_threads = atoi(o.arg); + else if (c == 'f') opt.occ_max1_frac = atof(o.arg); + else if (c == 'g') opt.max_gap = mm_parse_num(o.arg); + else if (c == 'F') opt.max_frag_len = mm_parse_num(o.arg); + else if (c == 'K') opt.mini_batch_size = mm_parse_num(o.arg); + else if (c == 'p') opt.pri_ratio = atof(o.arg); + else if (c == 'N') opt.best_n = mm_parse_num(o.arg); + else if (c == 'P') opt.flag |= MG_M_ALL_CHAINS; + else if (c == 'D') opt.flag |= MG_M_NO_DIAG; + else if (c == 'M') opt.mask_level = atof(o.arg); + else if (c == 'j') opt.div = atof(o.arg); + else if (c == 'l') gpt.min_map_len = mm_parse_num(o.arg); + else if (c == 'd') gpt.min_depth_len = mm_parse_num(o.arg); + else if (c == 'q') gpt.min_mapq = atoi(o.arg); + else if (c == 'L') gpt.min_var_len = atoi(o.arg); + else if (c == 'S') opt.flag |= MG_M_WRITE_LCHAIN; + else if (c == 'c') opt.flag |= MG_M_CIGAR; + else if (c == 301) opt.flag |= MG_M_VERTEX_COOR; // --vc + else if (c == 309) gpt.ggs_min_end_cnt = atoi(o.arg); // --gg-min-end-cnt + else if (c == 310) gpt.ggs_min_end_frac = atof(o.arg); // --gg-min-end-frac + else if (c == 312) opt.flag |= MG_M_NO_COMP_PATH; // --no-comp-path + else if (c == 313) gpt.match_pen = atoi(o.arg); // --gg-match-pen + else if (c == 314) opt.flag |= MG_M_FRAG_MODE | MG_M_FRAG_MERGE; // --frag + else if (c == 315) opt.flag |= MG_M_CAL_COV | MG_M_SKIP_GCHECK, gpt.flag |= MG_G_CAL_COV; // --cov + else if (c == 316) opt.min_cov_blen = mm_parse_num(o.arg); // --min-cov-blen + else if (c == 317) opt.min_cov_mapq = atoi(o.arg); // --min-cov-mapq + else if (c == 318) opt.chn_pen_gap = atof(o.arg); // --gap-pen + else if (c == 319) opt.ref_bonus = atoi(o.arg); // --ref-bonus + else if (c == 320) opt.max_gap_pre = mm_parse_num(o.arg); // --max-gap-pre + else if (c == 321) opt.max_lc_skip = atoi(o.arg); // --max-lc-skip + else if (c == 322) opt.max_gc_skip = atoi(o.arg); // --max-gc-skip + else if (c == 323) opt.max_lc_iter = mm_parse_num(o.arg); // --max-lc-iter + else if (c == 324) opt.rmq_size_cap = mm_parse_num(o.arg); // --max-rmq-size + else if (c == 326) opt.flag |= MG_M_WRITE_MZ | MG_M_WRITE_LCHAIN; // --write-mz + else if (c == 327) gpt.flag |= MG_G_CALL, opt.flag |= MG_M_SKIP_GCHECK; // --call + else if (c == 328) opt.cap_kalloc = mm_parse_num(o.arg); // --cap-kalloc + else if (c == 329) opt.gdp_max_ed = mm_parse_num(o.arg); // --gdp-max-ed + else if (c == 401) mg_dbg_flag |= MG_DBG_NO_KALLOC; // --no-kalloc + else if (c == 402) mg_dbg_flag |= MG_DBG_QNAME; // --dbg-qname + else if (c == 403) mg_dbg_flag |= MG_DBG_LCHAIN; // --dbg-lchain + else if (c == 404) mg_dbg_flag |= MG_DBG_INSERT; // --dbg-insert + else if (c == 405) mg_dbg_flag |= MG_DBG_SHORTK; // --dbg-shortk + else if (c == 406) mg_dbg_flag |= MG_DBG_GC1; // --dbg-gc1 + else if (c == 407) mg_dbg_flag |= MG_DBG_LC_PROF; // --dbg-lc-prof + else if (c == 408) mg_dbg_flag |= MG_DBG_MINIWFA; // --dbg-mwf-long + else if (c == 409) mg_dbg_flag |= MG_DBG_MWF_SEQ; // --dbg-mwf-seq + else if (c == 'U') { + opt.occ_max1 = (int)mm_parse_num2(o.arg, &s); + if (*s == ',') opt.occ_max1_cap = (int)mm_parse_num2(s + 1, &s); + } else if (c == 'r') { + opt.bw = (int)mm_parse_num2(o.arg, &s); + if (*s == ',') opt.bw_long = (int)mm_parse_num2(s + 1, &s); + } else if (c == 'n') { + opt.min_gc_cnt = (int)mm_parse_num2(o.arg, &s); + if (*s == ',') opt.min_lc_cnt = (int)mm_parse_num2(s + 1, &s); + } else if (c == 'm') { + opt.min_gc_score = (int)mm_parse_num2(o.arg, &s); + if (*s == ',') opt.min_lc_score = (int)mm_parse_num2(s + 1, &s); + } else if (c == 'o') { + if (strcmp(o.arg, "-") != 0) { + if (freopen(o.arg, "wb", stdout) == NULL) { + fprintf(stderr, "[ERROR]\033[1;31m failed to write the output to file '%s'\033[0m\n", o.arg); + exit(1); + } + } + } else if (c == 306) { // --ggen + if (o.arg) { + if (strcmp(o.arg, "none") == 0) gpt.algo = MG_G_NONE; + else if (strcmp(o.arg, "simple") == 0) gpt.algo = MG_G_GGSIMPLE; + else { + fprintf(stderr, "ERROR: unknown graph generation algorithm \"%s\"\n", o.arg); + return 1; + } + } else gpt.algo = MG_G_GGSIMPLE; + } else if (c == 302) { // --secondary + yes_or_no(&opt.flag, MG_M_PRINT_2ND, o.longidx, o.arg, 1); + } else if (c == 303) { // --ins-qovlp + yes_or_no(&gpt.flag, MG_G_NO_QOVLP, o.longidx, o.arg, 1); + } else if (c == 304) { // --heap-sort + yes_or_no(&opt.flag, MG_M_HEAP_SORT, o.longidx, o.arg, 1); + } else if (c == 305) { // --show-unmap + yes_or_no(&opt.flag, MG_M_SHOW_UNMAP, o.longidx, o.arg, 1); + } else if (c == 307) { // --rmq + yes_or_no(&opt.flag, MG_M_RMQ, o.longidx, o.arg, 1); + } else if (c == 325) { // --inv + yes_or_no(&gpt.flag, MG_G_NO_INV, o.longidx, o.arg, 0); + } else if (c == 300) { // --version + puts(MG_VERSION); + return 0; + } + } + if (mg_opt_check(&ipt, &opt, &gpt) < 0) + return 1; + if (gpt.algo == MG_G_GGSIMPLE && !(opt.flag&MG_M_CIGAR)) + fprintf(stderr, "[WARNING]\033[1;31m it is recommended to add -c for graph generation\033[0m\n"); + + if (argc == o.ind || fp_help == stdout) { + fprintf(fp_help, "Usage: minigraph [options] [...]\n"); + fprintf(fp_help, "Options:\n"); + fprintf(fp_help, " Indexing:\n"); + fprintf(fp_help, " -k INT k-mer size (no larger than 28) [%d]\n", ipt.k); + fprintf(fp_help, " -w INT minizer window size [%d]\n", ipt.w); + fprintf(fp_help, " Mapping:\n"); + fprintf(fp_help, " -c perform base alignment; RECOMMENDED\n"); + fprintf(fp_help, " -f FLOAT ignore top FLOAT fraction of repetitive minimizers [%g]\n", opt.occ_max1_frac); + fprintf(fp_help, " -U INT[,INT] choose the minimizer occurrence threshold within this interval [%d,%d]\n", opt.occ_max1, opt.occ_max1_cap); + fprintf(fp_help, " -j FLOAT expected sequence divergence [%g]\n", opt.div); + fprintf(fp_help, " -g NUM stop chain enlongation if there are no minimizers in INT-bp [%d]\n", opt.max_gap); + fprintf(fp_help, " -F NUM max fragment length (effective with -xsr or in the fragment mode) [%d]\n", opt.max_frag_len); + fprintf(fp_help, " -r NUM[,NUM] bandwidth for the two rounds of chaining [%d,%d]\n", opt.bw, opt.bw_long); + fprintf(fp_help, " -n INT[,INT] minimal number of minimizers on a graph/linear chain [%d,%d]\n", opt.min_gc_cnt, opt.min_lc_cnt); + fprintf(fp_help, " -m INT[,INT] minimal graph/linear chaining score [%d,%d]\n", opt.min_gc_score, opt.min_lc_score); + fprintf(fp_help, " -p FLOAT min secondary-to-primary score ratio [%g]\n", opt.pri_ratio); + fprintf(fp_help, " -N INT retain at most INT secondary mappings [%d]\n", opt.best_n); + fprintf(fp_help, " -D skip self diagonal matches\n"); + fprintf(fp_help, " Graph generation:\n"); + fprintf(fp_help, " --ggen perform incremental graph generation\n"); + fprintf(fp_help, " -q INT min mapping quality [%d]\n", gpt.min_mapq); + fprintf(fp_help, " -l NUM min alignment length [%d]\n", gpt.min_map_len); + fprintf(fp_help, " -d NUM min alignment length for depth calculation [%d]\n", gpt.min_depth_len); + fprintf(fp_help, " -L INT min variant length [%d]\n", gpt.min_var_len); + fprintf(fp_help, " --call call the graph path in each bubble and output BED\n"); + fprintf(fp_help, " Input/output:\n"); + fprintf(fp_help, " -t INT number of threads [%d]\n", n_threads); + fprintf(fp_help, " -o FILE output mappings to FILE [stdout]\n"); + fprintf(fp_help, " -K NUM minibatch size for mapping [500M]\n"); + fprintf(fp_help, " -S output linear chains in * sName sLen nMz div sStart sEnd qStart qEnd\n"); + fprintf(fp_help, " --vc output in the vertex coordinate\n"); + fprintf(fp_help, " Preset:\n"); + fprintf(fp_help, " -x STR preset []\n"); + fprintf(fp_help, " - lr: noisy long read mapping (the default)\n"); + fprintf(fp_help, " - asm: asm-to-ref mapping\n"); + fprintf(fp_help, " - sr: short reads\n"); + fprintf(fp_help, " - ggs: incremental graph generation\n"); + return fp_help == stdout? 0 : 1; + } + + g = gfa_read(argv[o.ind]); + if (g == 0) { + fprintf(stderr, "[ERROR] failed to load the graph from file '%s'\n", argv[o.ind]); + return 1; + } else if (mg_verbose >= 3) { + fprintf(stderr, "[M::%s::%.3f*%.2f] loaded the graph from \"%s\"\n", __func__, realtime() - mg_realtime0, cputime() / (realtime() - mg_realtime0), argv[o.ind]); + } + + if (gpt.algo == MG_G_NONE && !(gpt.flag & MG_G_CALL)) { + ret = mg_map_files(g, argc - (o.ind + 1), (const char**)&argv[o.ind + 1], &ipt, &opt, n_threads); + } else { + if (gpt.flag & MG_G_CALL) gfa_sort_ref_arc(g); + ret = mg_ggen(g, argc - (o.ind + 1), (const char**)&argv[o.ind + 1], &ipt, &opt, &gpt, n_threads); + } + + if ((gpt.algo != MG_G_NONE || (opt.flag & MG_M_CAL_COV)) && !(gpt.flag & MG_G_CALL)) + gfa_print(g, stdout, 0); + gfa_destroy(g); + + if (fflush(stdout) == EOF) { + fprintf(stderr, "[ERROR] failed to write the results\n"); + exit(EXIT_FAILURE); + } + + if (mg_verbose >= 3) { + fprintf(stderr, "[M::%s] Version: %s\n", __func__, MG_VERSION); + fprintf(stderr, "[M::%s] CMD:", __func__); + for (i = 0; i < argc; ++i) + fprintf(stderr, " %s", argv[i]); + fprintf(stderr, "\n[M::%s] Real time: %.3f sec; CPU: %.3f sec; Peak RSS: %.3f GB\n", __func__, realtime() - mg_realtime0, cputime(), peakrss() / 1024.0 / 1024.0 / 1024.0); + } + return !!ret; +} diff --git a/map-algo.c b/map-algo.c new file mode 100644 index 0000000..5789f54 --- /dev/null +++ b/map-algo.c @@ -0,0 +1,500 @@ +#include +#include +#include +#include "kalloc.h" +#include "mgpriv.h" +#include "khashl.h" +#include "sys.h" + +struct mg_tbuf_s { + void *km; + int frag_gap; +}; + +mg_tbuf_t *mg_tbuf_init(void) +{ + mg_tbuf_t *b; + b = (mg_tbuf_t*)calloc(1, sizeof(mg_tbuf_t)); + if (!(mg_dbg_flag & MG_DBG_NO_KALLOC)) b->km = km_init(); + return b; +} + +void mg_tbuf_destroy(mg_tbuf_t *b) +{ + if (b == 0) return; + if (b->km) km_destroy(b->km); + free(b); +} + +void *mg_tbuf_get_km(mg_tbuf_t *b) +{ + return b->km; +} + +static void collect_minimizers(void *km, const mg_mapopt_t *opt, const mg_idx_t *gi, int n_segs, const int *qlens, const char **seqs, mg128_v *mv) +{ + int i, n, sum = 0; + mv->n = 0; + for (i = n = 0; i < n_segs; ++i) { + size_t j; + mg_sketch(km, seqs[i], qlens[i], gi->w, gi->k, i, mv); + for (j = n; j < mv->n; ++j) + mv->a[j].y += sum << 1; + sum += qlens[i], n = mv->n; + } +} + +#include "ksort.h" +#define heap_lt(a, b) ((a).x > (b).x) +KSORT_INIT(heap, mg128_t, heap_lt) + +typedef struct { + uint32_t n; + uint32_t q_pos, q_span; + uint32_t seg_id:31, is_tandem:1; + const uint64_t *cr; +} mg_match_t; + +static mg_match_t *collect_matches(void *km, int *_n_m, int max_occ, const mg_idx_t *gi, const mg128_v *mv, int64_t *n_a, int *rep_len, int *n_mini_pos, int32_t **mini_pos) +{ + int rep_st = 0, rep_en = 0, n_m; + size_t i; + mg_match_t *m; + *n_mini_pos = 0; + KMALLOC(km, *mini_pos, mv->n); + m = (mg_match_t*)kmalloc(km, mv->n * sizeof(mg_match_t)); + for (i = 0, n_m = 0, *rep_len = 0, *n_a = 0; i < mv->n; ++i) { + const uint64_t *cr; + mg128_t *p = &mv->a[i]; + uint32_t q_pos = (uint32_t)p->y, q_span = p->x & 0xff; + int t; + cr = mg_idx_get(gi, p->x>>8, &t); + if (t >= max_occ) { + int en = (q_pos >> 1) + 1, st = en - q_span; + if (st > rep_en) { + *rep_len += rep_en - rep_st; + rep_st = st, rep_en = en; + } else rep_en = en; + } else { + mg_match_t *q = &m[n_m++]; + q->q_pos = q_pos, q->q_span = q_span, q->cr = cr, q->n = t, q->seg_id = p->y >> 32; + q->is_tandem = 0; + if (i > 0 && p->x>>8 == mv->a[i - 1].x>>8) q->is_tandem = 1; + if (i < mv->n - 1 && p->x>>8 == mv->a[i + 1].x>>8) q->is_tandem = 1; + *n_a += q->n; + (*mini_pos)[(*n_mini_pos)++] = q_pos>>1; + } + } + *rep_len += rep_en - rep_st; + *_n_m = n_m; + return m; +} + +static mg128_t *collect_seed_hits_heap(void *km, const mg_mapopt_t *opt, int max_occ, const mg_idx_t *gi, const char *qname, const mg128_v *mv, int qlen, int64_t *n_a, int *rep_len, + int *n_mini_pos, int32_t **mini_pos) +{ + int i, n_m, heap_size = 0; + int64_t n_for = 0, n_rev = 0; + mg_match_t *m; + mg128_t *a, *heap; + + m = collect_matches(km, &n_m, max_occ, gi, mv, n_a, rep_len, n_mini_pos, mini_pos); + + heap = (mg128_t*)kmalloc(km, n_m * sizeof(mg128_t)); + a = (mg128_t*)kmalloc(km, *n_a * sizeof(mg128_t)); + + for (i = 0, heap_size = 0; i < n_m; ++i) { + if (m[i].n > 0) { + heap[heap_size].x = m[i].cr[0]; + heap[heap_size].y = (uint64_t)i<<32; + ++heap_size; + } + } + ks_heapmake_heap(heap_size, heap); + while (heap_size > 0) { + mg_match_t *q = &m[heap->y>>32]; + mg128_t *p; + uint64_t r = heap->x; + int32_t rpos = (uint32_t)r >> 1; + // TODO: skip anchor if MG_F_NO_DIAL + if ((r&1) == (q->q_pos&1)) { // forward strand + p = &a[n_for++]; + p->x = r>>32<<33 | rpos; + } else { // reverse strand; TODO: more testing needed for this block + p = &a[(*n_a) - (++n_rev)]; + p->x = r>>32<<33 | 1ULL<<32 | (gi->g->seg[r>>32].len - (rpos + 1 - q->q_span) - 1); + } + p->y = (uint64_t)q->q_span << 32 | q->q_pos >> 1; + p->y |= (uint64_t)q->seg_id << MG_SEED_SEG_SHIFT; + if (q->is_tandem) p->y |= MG_SEED_TANDEM; + p->y |= (uint64_t)(q->n < 255? q->n : 255) << MG_SEED_OCC_SHIFT; + // update the heap + if ((uint32_t)heap->y < q->n - 1) { + ++heap[0].y; + heap[0].x = m[heap[0].y>>32].cr[(uint32_t)heap[0].y]; + } else { + heap[0] = heap[heap_size - 1]; + --heap_size; + } + ks_heapdown_heap(0, heap_size, heap); + } + kfree(km, m); + kfree(km, heap); + + // reverse anchors on the reverse strand, as they are in the descending order + if (*n_a > n_for + n_rev) { + memmove(a + n_for, a + (*n_a) - n_rev, n_rev * sizeof(mg128_t)); + *n_a = n_for + n_rev; + } + return a; +} + +static mg128_t *collect_seed_hits(void *km, const mg_mapopt_t *opt, int max_occ, const mg_idx_t *gi, const char *qname, const mg128_v *mv, int qlen, int64_t *n_a, int *rep_len, + int *n_mini_pos, int32_t **mini_pos) +{ + int i, n_m; + mg_match_t *m; + mg128_t *a; + m = collect_matches(km, &n_m, max_occ, gi, mv, n_a, rep_len, n_mini_pos, mini_pos); + a = (mg128_t*)kmalloc(km, *n_a * sizeof(mg128_t)); + for (i = 0, *n_a = 0; i < n_m; ++i) { + mg_match_t *q = &m[i]; + const uint64_t *r = q->cr; + uint32_t k; + for (k = 0; k < q->n; ++k) { + int32_t rpos = (uint32_t)r[k] >> 1; + mg128_t *p; + if (qname && (opt->flag & MG_M_NO_DIAG)) { + const gfa_seg_t *s = &gi->g->seg[r[k]>>32]; + const char *gname = s->snid >= 0 && gi->g->sseq? gi->g->sseq[s->snid].name : s->name; + int32_t g_pos; + if (s->snid >= 0 && gi->g->sseq) + gname = gi->g->sseq[s->snid].name, g_pos = s->soff + (uint32_t)r[k]; + else + gname = s->name, g_pos = (uint32_t)r[k]; + if (g_pos == q->q_pos && strcmp(qname, gname) == 0) + continue; + } + p = &a[(*n_a)++]; + if ((r[k]&1) == (q->q_pos&1)) // forward strand + p->x = r[k]>>32<<33 | rpos; + else // reverse strand + p->x = r[k]>>32<<33 | 1ULL<<32 | (gi->g->seg[r[k]>>32].len - (rpos + 1 - q->q_span) - 1); + p->y = (uint64_t)q->q_span << 32 | q->q_pos >> 1; + p->y |= (uint64_t)q->seg_id << MG_SEED_SEG_SHIFT; + if (q->is_tandem) p->y |= MG_SEED_TANDEM; + p->y |= (uint64_t)(q->n < 255? q->n : 255) << MG_SEED_OCC_SHIFT; + } + } + kfree(km, m); + radix_sort_128x(a, a + (*n_a)); + return a; +} + +static void mm_fix_bad_ends(const mg128_t *a, int32_t lc_max_occ, int32_t lc_max_trim, int32_t *as, int32_t *cnt) +{ + int32_t i, k, as0 = *as, cnt0 = *cnt; + for (i = as0 + cnt0 - 1, k = 0; k < lc_max_trim && k < cnt0; ++k, --i) + if (a[i].y>>MG_SEED_OCC_SHIFT <= lc_max_occ) + break; + *cnt -= k; + for (i = as0, k = 0; k < *cnt && k < lc_max_trim; ++i, ++k) + if (a[i].y>>MG_SEED_OCC_SHIFT <= lc_max_occ) + break; + *as += k, *cnt -= k; +} + +static void mm_fix_bad_ends_alt(const mg128_t *a, int32_t score, int bw, int min_match, int32_t *as, int32_t *cnt) +{ + int32_t i, l, m, as0 = *as, cnt0 = *cnt; + if (cnt0 < 3) return; + m = l = a[as0].y >> 32 & 0xff; + for (i = as0 + 1; i < as0 + cnt0 - 1; ++i) { + int32_t lq, lr, min, max; + int32_t q_span = a[i].y >> 32 & 0xff; + lr = (int32_t)a[i].x - (int32_t)a[i-1].x; + lq = (int32_t)a[i].y - (int32_t)a[i-1].y; + min = lr < lq? lr : lq; + max = lr > lq? lr : lq; + if (max - min > l >> 1) *as = i; + l += min; + m += min < q_span? min : q_span; + if (l >= bw << 1 || (m >= min_match && m >= bw) || m >= score>>1) break; + } + *cnt = as0 + cnt0 - *as; + m = l = a[as0 + cnt0 - 1].y >> 32 & 0xff; + for (i = as0 + cnt0 - 2; i > *as; --i) { + int32_t lq, lr, min, max; + int32_t q_span = a[i+1].y >> 32 & 0xff; + lr = (int32_t)a[i+1].x - (int32_t)a[i].x; + lq = (int32_t)a[i+1].y - (int32_t)a[i].y; + min = lr < lq? lr : lq; + max = lr > lq? lr : lq; + if (max - min > l >> 1) *cnt = i + 1 - *as; + l += min; + m += min < q_span? min : q_span; + if (l >= bw << 1 || (m >= min_match && m >= bw) || m >= score>>1) break; + } +} + +static int *collect_long_gaps(void *km, int as1, int cnt1, mg128_t *a, int min_gap, int *n_) +{ + int i, n, *K; + *n_ = 0; + for (i = 1, n = 0; i < cnt1; ++i) { // count the number of gaps longer than min_gap + int gap = ((int32_t)a[as1 + i].y - a[as1 + i - 1].y) - ((int32_t)a[as1 + i].x - a[as1 + i - 1].x); + if (gap < -min_gap || gap > min_gap) ++n; + } + if (n <= 1) return 0; + K = (int*)kmalloc(km, n * sizeof(int)); + for (i = 1, n = 0; i < cnt1; ++i) { // store the positions of long gaps + int gap = ((int32_t)a[as1 + i].y - a[as1 + i - 1].y) - ((int32_t)a[as1 + i].x - a[as1 + i - 1].x); + if (gap < -min_gap || gap > min_gap) + K[n++] = i; + } + *n_ = n; + return K; +} + +static void mm_filter_bad_seeds(void *km, int as1, int cnt1, mg128_t *a, int min_gap, int diff_thres, int max_ext_len, int max_ext_cnt) +{ + int max_st, max_en, n, i, k, max, *K; + K = collect_long_gaps(km, as1, cnt1, a, min_gap, &n); + if (K == 0) return; + max = 0, max_st = max_en = -1; + for (k = 0;; ++k) { // traverse long gaps + int gap, l, n_ins = 0, n_del = 0, qs, rs, max_diff = 0, max_diff_l = -1; + if (k == n || k >= max_en) { + if (max_en > 0) + for (i = K[max_st]; i < K[max_en]; ++i) + a[as1 + i].y |= MG_SEED_IGNORE; + max = 0, max_st = max_en = -1; + if (k == n) break; + } + i = K[k]; + gap = ((int32_t)a[as1 + i].y - (int32_t)a[as1 + i - 1].y) - (int32_t)(a[as1 + i].x - a[as1 + i - 1].x); + if (gap > 0) n_ins += gap; + else n_del += -gap; + qs = (int32_t)a[as1 + i - 1].y; + rs = (int32_t)a[as1 + i - 1].x; + for (l = k + 1; l < n && l <= k + max_ext_cnt; ++l) { + int j = K[l], diff; + if ((int32_t)a[as1 + j].y - qs > max_ext_len || (int32_t)a[as1 + j].x - rs > max_ext_len) break; + gap = ((int32_t)a[as1 + j].y - (int32_t)a[as1 + j - 1].y) - (int32_t)(a[as1 + j].x - a[as1 + j - 1].x); + if (gap > 0) n_ins += gap; + else n_del += -gap; + diff = n_ins + n_del - abs(n_ins - n_del); + if (max_diff < diff) + max_diff = diff, max_diff_l = l; + } + if (max_diff > diff_thres && max_diff > max) + max = max_diff, max_st = k, max_en = max_diff_l; + } + kfree(km, K); +} + +static void mm_filter_bad_seeds_alt(void *km, int as1, int cnt1, mg128_t *a, int min_gap, int max_ext) +{ + int n, k, *K; + K = collect_long_gaps(km, as1, cnt1, a, min_gap, &n); + if (K == 0) return; + for (k = 0; k < n;) { + int i = K[k], l; + int gap1 = ((int32_t)a[as1 + i].y - (int32_t)a[as1 + i - 1].y) - ((int32_t)a[as1 + i].x - (int32_t)a[as1 + i - 1].x); + int re1 = (int32_t)a[as1 + i].x; + int qe1 = (int32_t)a[as1 + i].y; + gap1 = gap1 > 0? gap1 : -gap1; + for (l = k + 1; l < n; ++l) { + int j = K[l], gap2, q_span_pre, rs2, qs2, m; + if ((int32_t)a[as1 + j].y - qe1 > max_ext || (int32_t)a[as1 + j].x - re1 > max_ext) break; + gap2 = ((int32_t)a[as1 + j].y - (int32_t)a[as1 + j - 1].y) - (int32_t)(a[as1 + j].x - a[as1 + j - 1].x); + q_span_pre = a[as1 + j - 1].y >> 32 & 0xff; + rs2 = (int32_t)a[as1 + j - 1].x + q_span_pre; + qs2 = (int32_t)a[as1 + j - 1].y + q_span_pre; + m = rs2 - re1 < qs2 - qe1? rs2 - re1 : qs2 - qe1; + gap2 = gap2 > 0? gap2 : -gap2; + if (m > gap1 + gap2) break; + re1 = (int32_t)a[as1 + j].x; + qe1 = (int32_t)a[as1 + j].y; + gap1 = gap2; + } + if (l > k + 1) { + int j, end = K[l - 1]; + for (j = K[k]; j < end; ++j) + a[as1 + j].y |= MG_SEED_IGNORE; + a[as1 + end].y |= MG_SEED_FIXED; + } + k = l; + } + kfree(km, K); +} + +static double print_time(double t0, int stage, const char *qname) +{ + double t; + t = realtime(); + fprintf(stderr, "Q%d\t%s\t%.3f\n", stage, qname, t - t0); + return t; +} + +void mg_map_frag(const mg_idx_t *gi, int n_segs, const int *qlens, const char **seqs, mg_gchains_t **gcs, mg_tbuf_t *b, const mg_mapopt_t *opt, const char *qname) +{ + int i, l, rep_len, qlen_sum, n_lc, n_gc, n_mini_pos; + int max_chain_gap_qry, max_chain_gap_ref, is_splice = !!(opt->flag & MG_M_SPLICE), is_sr = !!(opt->flag & MG_M_SR); + uint32_t hash; + int64_t n_a; + uint64_t *u; + int32_t *mini_pos; + mg128_t *a; + mg128_v mv = {0,0,0}; + mg_lchain_t *lc; + char *seq_cat; + km_stat_t kmst; + float tmp, chn_pen_gap, chn_pen_skip; + double t = 0.0; + + for (i = 0, qlen_sum = 0; i < n_segs; ++i) + qlen_sum += qlens[i], gcs[i] = 0; + + if (qlen_sum == 0 || n_segs <= 0 || n_segs > MG_MAX_SEG) return; + if (opt->max_qlen > 0 && qlen_sum > opt->max_qlen) return; + + hash = qname? kh_hash_str(qname) : 0; + hash ^= kh_hash_uint32(qlen_sum) + kh_hash_uint32(opt->seed); + hash = kh_hash_uint32(hash); + + collect_minimizers(b->km, opt, gi, n_segs, qlens, seqs, &mv); + if (opt->flag & MG_M_HEAP_SORT) a = collect_seed_hits_heap(b->km, opt, opt->occ_max1, gi, qname, &mv, qlen_sum, &n_a, &rep_len, &n_mini_pos, &mini_pos); + else a = collect_seed_hits(b->km, opt, opt->occ_max1, gi, qname, &mv, qlen_sum, &n_a, &rep_len, &n_mini_pos, &mini_pos); + + if (mg_dbg_flag & MG_DBG_SEED) { + fprintf(stderr, "RS\t%d\n", rep_len); + for (i = 0; i < n_a; ++i) + fprintf(stderr, "SD\t%s\t%d\t%c\t%d\t%d\t%d\n", gi->g->seg[a[i].x>>33].name, (int32_t)a[i].x, "+-"[a[i].x>>32&1], (int32_t)a[i].y, (int32_t)(a[i].y>>32&0xff), + i == 0? 0 : ((int32_t)a[i].y - (int32_t)a[i-1].y) - ((int32_t)a[i].x - (int32_t)a[i-1].x)); + } + + // set max chaining gap on the query and the reference sequence + if (is_sr) + max_chain_gap_qry = qlen_sum > opt->max_gap? qlen_sum : opt->max_gap; + else max_chain_gap_qry = opt->max_gap; + if (opt->max_gap_ref > 0) { + max_chain_gap_ref = opt->max_gap_ref; // always honor mg_mapopt_t::max_gap_ref if set + } else if (opt->max_frag_len > 0) { + max_chain_gap_ref = opt->max_frag_len - qlen_sum; + if (max_chain_gap_ref < opt->max_gap) max_chain_gap_ref = opt->max_gap; + } else max_chain_gap_ref = opt->max_gap; + + tmp = expf(-opt->div * gi->k); + chn_pen_gap = opt->chn_pen_gap * tmp; + chn_pen_skip = opt->chn_pen_skip * tmp; + + if (mg_dbg_flag & MG_DBG_QNAME) t = realtime(); + if (n_a == 0) { + if (a) kfree(b->km, a); + a = 0, n_lc = 0, u = 0; + } else { + if (opt->flag & MG_M_RMQ) { + a = mg_lchain_rmq(opt->max_gap, opt->max_gap_pre, opt->bw, opt->max_lc_skip, opt->rmq_size_cap, opt->min_lc_cnt, opt->min_lc_score, + chn_pen_gap, chn_pen_skip, n_a, a, &n_lc, &u, b->km); + } else { + a = mg_lchain_dp(max_chain_gap_ref, max_chain_gap_qry, opt->bw, opt->max_lc_skip, opt->max_lc_iter, opt->min_lc_cnt, opt->min_lc_score, + chn_pen_gap, chn_pen_skip, is_splice, n_segs, n_a, a, &n_lc, &u, b->km); + } + } + if (mg_dbg_flag & MG_DBG_QNAME) t = print_time(t, 1, qname); + + if (opt->bw_long > opt->bw && (opt->flag & (MG_M_SPLICE|MG_M_SR)) == 0 && n_segs == 1 && n_lc > 1) { // re-chain/long-join for long sequences + int32_t st = (int32_t)a[0].y, en = (int32_t)a[(int32_t)u[0] - 1].y; + if (qlen_sum - (en - st) > opt->rmq_rescue_size || qlen_sum - (en - st) > qlen_sum * opt->rmq_rescue_ratio) { + int32_t i; + for (i = 0, n_a = 0; i < n_lc; ++i) n_a += (int32_t)u[i]; + kfree(b->km, u); + radix_sort_128x(a, a + n_a); + a = mg_lchain_rmq(opt->max_gap, opt->max_gap_pre, opt->bw_long, opt->max_lc_skip, opt->rmq_size_cap, opt->min_lc_cnt, opt->min_lc_score, + chn_pen_gap, chn_pen_skip, n_a, a, &n_lc, &u, b->km); + } + } + + b->frag_gap = max_chain_gap_ref; + kfree(b->km, mv.a); + + if (n_lc) { + lc = mg_lchain_gen(b->km, hash, qlen_sum, n_lc, u, a); + if (n_lc > 1) { + int32_t n_lc_new = 0; + for (i = 0; i < n_lc; ++i) { + mg_lchain_t *p = &lc[i]; + int32_t cnt = p->cnt, off = p->off; + mm_fix_bad_ends(a, opt->lc_max_occ, opt->lc_max_trim, &off, &cnt); + mm_fix_bad_ends_alt(a, p->score, opt->bw, 100, &off, &cnt); + mm_filter_bad_seeds(b->km, off, cnt, a, 10, 40, opt->max_gap>>1, 10); + mm_filter_bad_seeds_alt(b->km, off, cnt, a, 30, opt->max_gap>>1); + //printf("X\t%d\t%d\t%d\t%d\t%d\t%d\n", p->qs, p->qe, p->off, p->cnt, off, cnt); + p->off = off, p->cnt = cnt; + if (cnt >= opt->min_lc_cnt) { + int32_t q_span = a[p->off].y>>32 & 0xff; + p->rs = (int32_t)a[p->off].x + 1 - q_span; + p->qs = (int32_t)a[p->off].y + 1 - q_span; + p->re = (int32_t)a[p->off + p->cnt - 1].x + 1; + p->qe = (int32_t)a[p->off + p->cnt - 1].y + 1; + lc[n_lc_new++] = *p; + } + } + n_lc = n_lc_new; + } + for (i = 0; i < n_lc; ++i) + mg_update_anchors(lc[i].cnt, &a[lc[i].off], n_mini_pos, mini_pos); + } else lc = 0; + kfree(b->km, mini_pos); + kfree(b->km, u); + if (mg_dbg_flag & MG_DBG_QNAME) t = print_time(t, 2, qname); + + if (mg_dbg_flag & MG_DBG_LCHAIN) + mg_print_lchain(stdout, gi, n_lc, lc, a, qname); + + KMALLOC(b->km, seq_cat, qlen_sum); + for (i = l = 0; i < n_segs; ++i) { + strncpy(&seq_cat[l], seqs[i], qlens[i]); + l += qlens[i]; + } + n_gc = mg_gchain1_dp(b->km, gi->g, &n_lc, lc, qlen_sum, opt->bw_long, opt->bw_long, opt->bw_long, opt->max_gc_skip, opt->ref_bonus, + chn_pen_gap, chn_pen_skip, opt->mask_level, a, &u); + if (mg_dbg_flag & MG_DBG_QNAME) t = print_time(t, 3, qname); + gcs[0] = mg_gchain_gen(0, b->km, gi->g, gi->es, n_gc, u, lc, a, hash, opt->min_gc_cnt, opt->min_gc_score, opt->gdp_max_ed, n_segs, seq_cat); + if (mg_dbg_flag & MG_DBG_QNAME) t = print_time(t, 4, qname); + gcs[0]->rep_len = rep_len; + kfree(b->km, a); + kfree(b->km, lc); + kfree(b->km, u); + + mg_gchain_set_parent(b->km, opt->mask_level, gcs[0]->n_gc, gcs[0]->gc, opt->sub_diff, 0); + mg_gchain_flt_sub(opt->pri_ratio, gi->k * 2, opt->best_n, gcs[0]->n_gc, gcs[0]->gc); + mg_gchain_drop_flt(b->km, gcs[0]); + mg_gchain_set_mapq(b->km, gcs[0], qlen_sum, mv.n, opt->min_gc_score); + if ((opt->flag&MG_M_CIGAR) && n_segs == 1) + mg_gchain_cigar(b->km, gi->g, gi->es, seq_cat, gcs[0], qname); + kfree(b->km, seq_cat); + if (mg_dbg_flag & MG_DBG_QNAME) t = print_time(t, 5, qname); + + if (b->km) { + km_stat(b->km, &kmst); + if (mg_dbg_flag & MG_DBG_QNAME) + fprintf(stderr, "QM\t%s\t%d\tcap=%ld,nCore=%ld,largest=%ld\n", qname, qlen_sum, kmst.capacity, kmst.n_cores, kmst.largest); + if (kmst.n_blocks != kmst.n_cores) { + fprintf(stderr, "[E::%s] memory leak at %s\n", __func__, qname); + abort(); + } + if (kmst.largest > 1U<<28 || (opt->cap_kalloc > 0 && kmst.capacity > opt->cap_kalloc)) { + km_destroy(b->km); + b->km = km_init(); + } + } +} + +mg_gchains_t *mg_map(const mg_idx_t *gi, int qlen, const char *seq, mg_tbuf_t *b, const mg_mapopt_t *opt, const char *qname) +{ + mg_gchains_t *gcs; + mg_map_frag(gi, 1, &qlen, &seq, &gcs, b, opt, qname); + return gcs; +} diff --git a/mgpriv.h b/mgpriv.h new file mode 100644 index 0000000..9bcbf71 --- /dev/null +++ b/mgpriv.h @@ -0,0 +1,128 @@ +#ifndef MGPRIV_H +#define MGPRIV_H + +#include +#include "minigraph.h" + +#define MG_DBG_NO_KALLOC 0x1 +#define MG_DBG_QNAME 0x2 +#define MG_DBG_SEED 0x4 +#define MG_DBG_LCHAIN 0x8 +#define MG_DBG_INSERT 0x10 +#define MG_DBG_SHORTK 0x20 +#define MG_DBG_GC1 0x40 +#define MG_DBG_LC_PROF 0x80 +#define MG_DBG_MINIWFA 0x100 +#define MG_DBG_MWF_SEQ 0x200 + +#define MG_SEED_IGNORE (1ULL<<41) +#define MG_SEED_TANDEM (1ULL<<42) +#define MG_SEED_FIXED (1ULL<<43) + +#define MG_MAX_SEG 255 +#define MG_SEED_SEG_SHIFT 48 +#define MG_SEED_SEG_MASK (0xffULL<<(MG_SEED_SEG_SHIFT)) +#define mg_seg_id(a) ((int32_t)(((a).y&MG_SEED_SEG_MASK) >> MG_SEED_SEG_SHIFT)) + +#define MG_SEED_OCC_SHIFT 56 + +#define MG_MAX_SHORT_K 15 + +#ifndef KSTRING_T +#define KSTRING_T kstring_t +typedef struct __kstring_t { + unsigned l, m; + char *s; +} kstring_t; +#endif + +// shortest path +typedef struct { + // input + uint32_t v; + int32_t target_dist; + uint32_t target_hash; + uint32_t meta:30, check_hash:1, inner:1; + int32_t qlen; + // output + uint32_t n_path:31, is_0:1; + int32_t path_end; + int32_t dist; + uint32_t hash; +} mg_path_dst_t; + +typedef struct { + uint32_t v, d; + int32_t pre; +} mg_pathv_t; + +#ifdef __cplusplus +extern "C" { +#endif + +static inline float mg_log2(float x) // NB: this doesn't work when x<2 +{ + union { float f; uint32_t i; } z = { x }; + float log_2 = ((z.i >> 23) & 255) - 128; + z.i &= ~(255 << 23); + z.i += 127 << 23; + log_2 += (-0.34484843f * z.f + 2.02466578f) * z.f - 0.67487759f; + return log_2; +} + +extern unsigned char seq_nt4_table[256]; + +void mg_sketch(void *km, const char *str, int len, int w, int k, uint32_t rid, mg128_v *p); + +void *mg_idx_a2h(void *km, int32_t n_a, mg128_t *a, int suflen, uint64_t **q_, int32_t *n_); +const uint64_t *mg_idx_hget(const void *h_, const uint64_t *q, int suflen, uint64_t minier, int *n); +void mg_idx_hfree(void *h_); + +const uint64_t *mg_idx_get(const mg_idx_t *gi, uint64_t minier, int *n); +void mg_idx_cal_quantile(const mg_idx_t *gi, int32_t m, float f[], int32_t q[]); + +uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_t *p, int32_t *v, int32_t *t, int32_t min_cnt, int32_t min_sc, int32_t max_drop, + int32_t extra_u, int32_t *n_u_, int32_t *n_v_); +mg128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip, + int is_cdna, int n_seg, int64_t n, mg128_t *a, int *n_u_, uint64_t **_u, void *km); +mg128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_skip, int cap_rmq_size, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip, + int64_t n, mg128_t *a, int *n_u_, uint64_t **_u, void *km); +mg_lchain_t *mg_lchain_gen(void *km, uint32_t hash, int qlen, int n_u, uint64_t *u, mg128_t *a); +void mg_update_anchors(int32_t n_a, mg128_t *a, int32_t n, const int32_t *mini_pos); + +mg_pathv_t *mg_shortest_k(void *km0, const gfa_t *g, uint32_t src, int32_t n_dst, mg_path_dst_t *dst, int32_t max_dist, int32_t max_k, int32_t *n_pathv); +int32_t mg_gchain1_dp(void *km, const gfa_t *g, int32_t *n_lc_, mg_lchain_t *lc, int32_t qlen, int32_t max_dist_g, int32_t max_dist_q, int32_t bw, int32_t max_skip, + int32_t ref_bonus, float chn_pen_gap, float chn_pen_skip, float mask_level, const mg128_t *an, uint64_t **u_); +mg_gchains_t *mg_gchain_gen(void *km_dst, void *km, const gfa_t *g, const gfa_edseq_t *es, int32_t n_u, const uint64_t *u, + mg_lchain_t *lc, const mg128_t *a, uint32_t hash, int32_t min_gc_cnt, int32_t min_gc_score, + int32_t gdp_max_ed, int32_t n_seg, const char *qseq); +void mg_gchain_cigar(void *km, const gfa_t *g, const gfa_edseq_t *es, const char *qseq, mg_gchains_t *gt, const char *qname); +void mg_gchain_free(mg_gchains_t *gs); + +uint32_t *lv_ed_unified(void *km, int32_t tl, const char *ts, int32_t ql, const char *qs, int32_t is_ext, int32_t *score, int32_t *t_endl, int32_t *q_endl, int32_t *n_cigar); + +void mg_gchain_restore_order(void *km, mg_gchains_t *gcs); +void mg_gchain_restore_offset(mg_gchains_t *gcs); +void mg_gchain_sort_by_score(void *km, mg_gchains_t *gcs); +void mg_gchain_set_parent(void *km, float mask_level, int n, mg_gchain_t *r, int sub_diff, int hard_mask_level); +int mg_gchain_flt_sub(float pri_ratio, int min_diff, int best_n, int n, mg_gchain_t *r); +void mg_gchain_drop_flt(void *km, mg_gchains_t *gcs); +void mg_gchain_set_mapq(void *km, mg_gchains_t *gcs, int qlen, int max_mini, int min_gc_score); + +void mg_cov_map(const gfa_t *g, const mg_gchains_t *gt, int32_t min_mapq, int32_t min_blen, double *c_seg, double *c_link, const char *qname); +void mg_cov_asm(const gfa_t *g, int32_t n_seq, mg_gchains_t *const *gcs, int32_t min_mapq, int32_t min_blen, double *cov_seg, double *cov_link); + +void mg_print_lchain(FILE *fp, const mg_idx_t *gi, int n_lc0, const mg_lchain_t *lc, const mg128_t *a, const char *qname); +void mg_write_gaf(kstring_t *s, const gfa_t *g, const mg_gchains_t *gs, int32_t n_seg, const int32_t *qlens, const char *qname, uint64_t flag, void *km); + +void mg_sprintf_lite(kstring_t *s, const char *fmt, ...); + +void radix_sort_128x(mg128_t *beg, mg128_t *end); +void radix_sort_gfa64(uint64_t *beg, uint64_t *end); +uint32_t ks_ksmall_uint32_t(size_t n, uint32_t arr[], size_t kk); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/minigraph.1 b/minigraph.1 new file mode 100644 index 0000000..1ead00e --- /dev/null +++ b/minigraph.1 @@ -0,0 +1,359 @@ +.TH minigraph 1 "12 June 2022" "minigraph-0.19 (r551)" "Bioinformatics tools" + +.SH NAME +.PP +minigraph - sequence-to-graph mapping and incremental sequence graph generation + +.SH SYNOPSIS +* Sequence-to-graph mapping: +.RS 4 +.B minigraph +.RB [ -x +.IR preset ] +.RB [ -c ] +.RB [ -t +.IR nThreads ] +.I graph.gfa +.I query1.fa +.RI [ ... ] +.B > +.I out.gaf +.RE + +* Incremental graph generation: +.RS 4 +.B minigraph +.B -x ggs +.RB [ -c ] +.RB [ -t +.IR nThreads ] +.I initGraph.gfa +.I sample1Asm.fa +.RI [ ... ] +.B > +.I finalGraph.gfa + +.SH DESCRIPTION + +Minigraph is a +.I proof-of-concept +sequence-to-graph mapper and graph constructor. It finds approximate locations +of a query sequence in a sequence graph and incrementally augments an existing +graph with long query subsequences. + +.SH OPTIONS +.SS Indexing options +.TP 10 +.BI -k \ INT +Minimizer k-mer length [17] +.TP +.BI -w \ INT +Minimizer window size [11]. A minimizer is the smallest k-mer in a window of w +consecutive k-mers. +.SS Mapping options +.TP 10 +.BI -c +Perform base alignment; recommended for graph generation +.TP 10 +.BI -U \ INT1 [, INT2 ] +Choose the minimizer occurrence threshold within this interval [50,250] +.TP +.BI -f \ FLOAT +Ignore top +.I FLOAT +fraction of repetitive minimizers [0.0002]. If this threshold falls within the +interval set by +.BR -U , +it will be the final threshold; otherwise the lower or the upper bound of +.B -U +will be applied. +.TP +.BI -j \ FLOAT +Expected query-graph sequence divergence [0.1] +.TP +.BI -g \ NUM +Stop chain enlongation if there are no minimizers within +.IR INT -bp +[10k]. K/k/M/m suffixes are recognized. +.TP +.BI -r \ NUM1 [, NUM2 ] +Bandwidth for the two rounds of chaining [500,20k]. +.I NUM2 +also controls bandwidth for graph chaining. +.TP +.BI -n \ INT1 [, INT2 ] +Drop graph chains consisting of +.RI < INT1 +minimizers and drop linear chains consisting of +.RI < INT2 +minimizers [5,3] +.TP +.BI -m \ INT1 [, INT2 ] +Drop graph chains with graph chaining score +.RI < INT1 +and drop linear chains with linear chaining score +.RI < INT2 +[50,30]. Linear chaining score equals the approximate number of matching bases +minus a weak concave gap penalty. Graph chaining score uses a linear gap +penalty. +.TP +.BI -p \ FLOAT +Minimal secondary-to-primary score ratio to output secondary mappings [0.8]. +Between two chains overlaping over half of the shorter chain (controlled by +.BR -M ), +the chain with a lower score is secondary to the chain with a higher score. +.TP +.BI -N \ INT +Output at most +.I INT +secondary mappings [5]. This option has no effect when +.B -P +is applied. +.TP +.B -P +Retain all chains and don't attempt to set primary chains. Options +.B -p +and +.B -N +have no effect when this option is in use. +.TP +.BI -M \ FLOAT +Mark as secondary a chain that overlaps with a better chain by +.I FLOAT +or more of the shorter chain [0.5] +.TP +.BI --max-gap-pre \ NUM +Similar to +.B -g +but used for prefiltering [1000] +.TP +.BI --max-lc-iter \ NUM +max number of iterations for linear chaining [10000] +.TP +.BI --max-rmq-size \ NUM +max size of the RMQ tree [100000] +.TP +.BI --max-lc-skip \ INT +A heuristics that stops linear chaining early [25] +.TP +.BI --max-gc-skip \ INT +Similar to +.B --max-lc-skip +but applied to graph chaining [25] +.TP +.BI --ref-bonus \ INT +Bonus for a reference subwalk [0] +.TP +.BI --min-cov-blen \ NUM +Minimum alignment block length to count [1k] +.TP +.BI --min-cov-mapq \ INT +Minimum mapping quality to count [20] +.SS Graph generation options +.TP 10 +.BR --ggen =[ simple ] +Graph generation algorithm. So far only a +.B simple +algorithm is implemented [simple]. With this option, all query sequences are +loaded into memory. +.TP +.B --call +Call the graph path in each bubble and output in a BED-based format: +.RS + ctg start end sourceNode sinkNode walk:strand:queryName:qStart:qEnd +.RE +.TP +.BI -q \ INT +Minimum mapping quality [5] +.TP +.BI -l \ NUM +Minimum chain length to consider [100k] +.TP +.BI -d \ NUM +Minimum chain length for depth calculation [20k] +.TP +.BI -L \ INT +Minimum insertion length [50] +.TP +.BI --gg-match-pen \ INT +Penalty for a pair of matching anchors [5]. Larger value for more fragmented inserts. +Effectively without +.BR -c . +.TP +.BR --ins-qovlp = yes | no +Forcefully resolve query overlaps [no]. Effective without +.BR -c . +.TP +.BR --inv = yes | no +Generate graphs with inversions or not [yes] +.TP +.B --cov +Remap and generate segment and link use frequencies. This option triggers GFA +output. When used with +.BR --ggen , +minigraph writes the frequency of link uses and the average breadth of coverage +of each segment to the +.B cf +tag. When used without +.BR --ggen , +minigraph writes the count of link uses and the average depth of coverage of +each segment to the +.B dc +tag. +.B +WARNING: +THIS OPTION IS DEPRECATED AND MAY BE REMOVED IN FUTURE. +.SS Input/output options +.TP 10 +.BI -o \ FILE +Output alignments to +.I FILE +[stdout]. +.TP +.BI -t \ INT +Number of threads [4]. Minigraph uses at most three threads when indexing target +sequences, and uses up to +.IR INT +1 +threads when mapping (the extra thread is for I/O, which is frequently idle and +takes little CPU time). +.TP +.BI -K \ NUM +Number of bases loaded into memory to process in a mini-batch [500M]. +K/M/G/k/m/g suffix is accepted. A large +.I NUM +helps load balancing in the multi-threading mode, at the cost of increased +memory. This option has no effect if +.B --ggen +is applied. +.TP +.B --vc +In output GAF, show mapping paths in the unstable segment coordinate. +.TP +.B -S +Output linear chains in the format of: `*' segName segLen nMinimizer seqDiv segStart segEnd qStart qEnd +.TP +.B --write-mz +Output linear chains in the format of: `*' segName segLen nMinimizer seqDiv segStart segEnd qStart qEnd +k-mer segOffsets qOffsets. segOffsets and qOffsets are comma-separated lists +with each consisting of nMinimizer-1 integers which give the distance from the +previous minimizer on segments and query, respectively. +.TP +.BR --secondary = yes | no +Whether to output secondary alignments [no] +.TP +.BR --show-unmap = yes | no +Print unmapped query sequences in GAF [no] +.TP +.B --version +Print version number to stdout +.SS Preset options +.TP 10 +.BI -x \ STR +Preset []. This option applies multiple options at the same time. Other options +on the command line will always override values set by +.BR -x . +Available +.I STR +are: +.RS +.TP 8 +.B lr +Mapping noisy long reads. This is the same as the default setting. +.TP +.B sr +Mapping short single-end or paired-end reads +.RB ( -k21 +.B -w10 -U1000,2500 -g100 -r100 -p.5 -n3,2 -m40,25 --heap-sort=yes -K50m --frag --ref-bonus=1 +.BR --min-cov-blen=50 ). +Paired-end mapping is not supported. +.TP +.B asm +Mapping long contigs or high-quality CCS reads +.RB ( -k19 +.B -w10 -U10,100 -j.01 -g10k -r1k,150k -n5,5 -m1000,40 -K4g --max-lc-skip=50 --max-gc-skip=50 --min-cov-mapq=5 +.BR --min-cov-blen=100k ). +.TP +.B ggs +Incremental graph generation +.RB ( -xasm +.B -N0 +.BR --ggen=simple ). +.RE +.SS Miscellaneous options +.TP 10 +.B --no-kalloc +Use the libc default allocator instead of the kalloc thread-local allocator. +This debugging option is mostly used with Valgrind to detect invalid memory +accesses. Minigraph runs slower with this option, especially in the +multi-threading mode. +.SH OUTPUT FORMAT +.PP +Minigraph outputs mapping positions in the Graph mApping Format (GAF) by +default. GAF is a TAB-delimited text format with each line consisting of at +least 12 fields as are described in the following table: +.TS +center box; +cb | cb | cb +r | c | l . +Col Type Description +_ +1 string Query sequence name +2 int Query sequence length +3 int Query start coordinate (0-based; closed) +4 int Query end coordinate (0-based; open) +5 char `+' if query/path on the same strand; `-' if opposite +6 string Path matching /([><][^\\s><]+(:\\d+-\\d+)?)+|([^\\s><]+)/ +7 int Path sequence length +8 int Path start coordinate +9 int Path end coordinate +10 int Number of matching bases in the mapping +11 int Number bases, including gaps, in the mapping +12 int Mapping quality (0-255 with 255 for missing) +.TE + +.PP +When alignment is available, column 11 gives the total number of sequence +matches, mismatches and gaps in the alignment; column 10 divided by column 11 +gives the BLAST-like alignment identity. When alignment is unavailable, +these two columns are approximate. PAF may optionally have additional fields in +the SAM-like typed key-value format. Minigraph may output the following tags: +.TS +center box; +cb | cb | cb +r | c | l . +Tag Type Description +_ +tp A Type of aln: P/primary and S/secondary +cm i Number of minimizers on the chain +s1 i Chaining score +s2 i Chaining score of the best secondary chain +dv f Approximate per-base sequence divergence +cf f Avg. segment breadth of coverage and link use freq +dc f Avg. segment depth of coverage and link use counts +cg Z CIGAR string +ql B,i Lengths of single-end reads +.TE + +.SH LIMITATIONS +.TP 2 +* +Minigraph needs to find strong colinear chains first. For a graph consisting of +many short segments (e.g. one generated from rare SNPs in large populations), +minigraph will fail to map query sequences. +.TP +* +When connecting colinear chains on graphs, minigraph doesn't always take full +advantage of base sequences and may miss the optimal alignments. +.TP +* +Minigraph only inserts segments contained in long graph chains. This +conservative strategy helps to build relatively accurate graph, but may miss +more complex events. Other strategies may be explored in future. +.TP +* +Base alignment has only been evaluated for human. For more diverse genomes, +the performance may need to be improved. + +.SH SEE ALSO +.PP +minimap2(1), gfatools(1). diff --git a/minigraph.h b/minigraph.h new file mode 100644 index 0000000..2f67217 --- /dev/null +++ b/minigraph.h @@ -0,0 +1,175 @@ +#ifndef MINIGRAPH_H +#define MINIGRAPH_H + +#include +#include "gfa.h" + +#define MG_VERSION "0.19-r551" + +#define MG_M_SPLICE 0x10 +#define MG_M_SR 0x20 +#define MG_M_FRAG_MODE 0x40 +#define MG_M_FRAG_MERGE 0x80 +#define MG_M_FOR_ONLY 0x100 +#define MG_M_REV_ONLY 0x200 +#define MG_M_HEAP_SORT 0x400 +#define MG_M_VERTEX_COOR 0x800 +#define MG_M_ALL_CHAINS 0x1000 +#define MG_M_PRINT_2ND 0x2000 +#define MG_M_CAL_COV 0x4000 +#define MG_M_RMQ 0x8000 +#define MG_M_COPY_COMMENT 0x10000 +#define MG_M_INDEPEND_SEG 0x20000 +#define MG_M_NO_QUAL 0x40000 +#define MG_M_2_IO_THREADS 0x80000 +#define MG_M_SHOW_UNMAP 0x100000 +#define MG_M_NO_COMP_PATH 0x200000 +#define MG_M_NO_DIAG 0x400000 +#define MG_M_WRITE_LCHAIN 0x800000 +#define MG_M_WRITE_MZ 0x1000000 +#define MG_M_SKIP_GCHECK 0x2000000 +#define MG_M_CIGAR 0x4000000 + +#define MG_G_NONE 0 +#define MG_G_GGSIMPLE 1 + +#define MG_G_NO_QOVLP 0x1 +#define MG_G_CAL_COV 0x2 +#define MG_G_NO_INV 0x4 +#define MG_G_CALL 0x8 + +typedef struct { uint64_t x, y; } mg128_t; +typedef struct { size_t n, m; mg128_t *a; } mg128_v; +typedef struct { int32_t n, m; uint32_t *a; } mg32_v; + +typedef struct { + int w, k; + int bucket_bits; +} mg_idxopt_t; + +typedef struct { + uint64_t flag; + int64_t mini_batch_size; + int seed; + int max_qlen; + int pe_ori; + int occ_max1, occ_max1_cap; + float occ_max1_frac; + int bw, bw_long; + int rmq_size_cap; + int rmq_rescue_size; + float rmq_rescue_ratio; + int max_gap_pre, max_gap, max_gap_ref, max_frag_len; + float div; + float chn_pen_gap, chn_pen_skip; + int max_lc_skip, max_lc_iter, max_gc_skip; + int min_lc_cnt, min_lc_score; + int min_gc_cnt, min_gc_score; + int gdp_max_ed, lc_max_trim, lc_max_occ; + float mask_level; + int sub_diff; + int best_n; + float pri_ratio; + int ref_bonus; + int64_t cap_kalloc; + int min_cov_mapq, min_cov_blen; +} mg_mapopt_t; + +typedef struct { + uint64_t flag; + int algo; + int min_mapq; + int min_map_len, min_depth_len; + int min_var_len, match_pen; + // parameters specific to ggsimple/ggs + int ggs_shrink_pen; + int ggs_min_end_cnt; + float ggs_min_end_frac; + // scoring for SW check + float ggs_max_iden, ggs_min_inv_iden; +} mg_ggopt_t; + +typedef struct { + const gfa_t *g; + gfa_edseq_t *es; + int32_t b, w, k, flag, n_seg; + struct mg_idx_bucket_s *B; // index (hidden) +} mg_idx_t; + +typedef struct { + int32_t off, cnt:31, inner_pre:1; + uint32_t v; + int32_t rs, re, qs, qe; + int32_t score, dist_pre; + uint32_t hash_pre; +} mg_lchain_t; + +typedef struct { + int32_t off, cnt; + uint32_t v; + int32_t score; + int32_t ed; +} mg_llchain_t; + +typedef struct { + int32_t n_cigar, mlen, blen, aplen, ss, ee; // ss: start on the start vertex; ee: end on the end vertex + uint32_t cigar[]; +} mg_cigar_t; + +typedef struct { + int32_t id, parent; + int32_t off, cnt; + int32_t n_anchor, score; + int32_t qs, qe; + int32_t plen, ps, pe; + int32_t blen, mlen; + float div; + uint32_t hash; + int32_t subsc, n_sub; + uint32_t mapq:8, flt:1, dummy:23; + mg_cigar_t *p; +} mg_gchain_t; + +typedef struct { + void *km; + int32_t n_gc, n_lc, n_a, rep_len; + mg_gchain_t *gc; + mg_llchain_t *lc; + mg128_t *a; // minimizer positions; see comments above mg_update_anchors() for details +} mg_gchains_t; + +typedef struct mg_tbuf_s mg_tbuf_t; + +extern int mg_verbose, mg_dbg_flag; +extern double mg_realtime0; + +#ifdef __cplusplus +extern "C" { +#endif + +// options +int mg_opt_set(const char *preset, mg_idxopt_t *io, mg_mapopt_t *mo, mg_ggopt_t *go); +int mg_opt_check(const mg_idxopt_t *io, const mg_mapopt_t *mo, const mg_ggopt_t *go); +void mg_opt_update(const mg_idx_t *gi, mg_mapopt_t *mo, mg_ggopt_t *go); + +// index operations +mg_idx_t *mg_index(gfa_t *g, const mg_idxopt_t *io, int n_threads, mg_mapopt_t *mo); // combine mg_index_core() and mg_opt_update() +void mg_idx_destroy(mg_idx_t *gi); + +// mapping +mg_tbuf_t *mg_tbuf_init(void); +void mg_tbuf_destroy(mg_tbuf_t *b); +mg_gchains_t *mg_map(const mg_idx_t *gi, int qlen, const char *seq, mg_tbuf_t *b, const mg_mapopt_t *opt, const char *qname); +void mg_map_frag(const mg_idx_t *gi, int n_segs, const int *qlens, const char **seqs, mg_gchains_t **gcs, mg_tbuf_t *b, const mg_mapopt_t *opt, const char *qname); + +// high-level mapping APIs +int mg_map_files(gfa_t *g, int n_fn, const char **fn, const mg_idxopt_t *ipt, const mg_mapopt_t *opt0, int n_threads); + +// graph generation +int mg_ggen(gfa_t *g, int32_t n_fn, const char **fn, const mg_idxopt_t *ipt, const mg_mapopt_t *opt0, const mg_ggopt_t *go, int n_threads); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/miniwfa.c b/miniwfa.c new file mode 100644 index 0000000..6c5159a --- /dev/null +++ b/miniwfa.c @@ -0,0 +1,834 @@ +#include +#include +#include +#include +#include "miniwfa.h" +#include "kalloc.h" + +/* + * Default setting + */ +void mwf_opt_init(mwf_opt_t *opt) +{ + memset(opt, 0, sizeof(*opt)); + opt->x = 4; // corresponding SW score: m=1, x=3, o1=4, e1=3/2, o2=15, e2=1/2 + opt->o1 = 4, opt->e1 = 2; + opt->o2 = 15, opt->e2 = 1; + opt->kmer = 13, opt->max_occ = 2, opt->min_len = 30; +} + +/* + * Structs and simple functions for traceback + */ +typedef struct { + int32_t lo, hi; + uint8_t *x; +} wf_tb1_t; + +typedef struct { + int32_t m, n; + wf_tb1_t *a; +} wf_tb_t; + +static wf_tb1_t *wf_tb_add(void *km, wf_tb_t *tb, int32_t lo, int32_t hi) +{ + wf_tb1_t *p; + if (tb->n == tb->m) { + tb->m += (tb->m>>1) + 4; + tb->a = Krealloc(km, wf_tb1_t, tb->a, tb->m); + } + p = &tb->a[tb->n++]; + p->lo = lo, p->hi = hi; + p->x = Kcalloc(km, uint8_t, hi - lo + 1); + return p; +} + +typedef struct { + int32_t m, n; + uint32_t *cigar; +} wf_cigar_t; + +static void wf_cigar_push1(void *km, wf_cigar_t *c, int32_t op, int32_t len) +{ + if (c->n && op == (c->cigar[c->n-1]&0xf)) { + c->cigar[c->n-1] += len<<4; + } else { + if (c->n == c->m) { + c->m = c->m + (c->m>>1) + 4; + c->cigar = Krealloc(km, uint32_t, c->cigar, c->m); + } + c->cigar[c->n++] = len<<4 | op; + } +} + +/* + * The stripe data structure + */ +#define WF_NEG_INF (-0x40000000) + +typedef struct { + int32_t lo, hi; + int32_t *mem, *H, *E1, *E2, *F1, *F2; +} wf_slice_t; + +typedef struct { + int32_t s, top, n, max_pen, lo, hi; + wf_slice_t *a; +} wf_stripe_t; + +void wf_stripe_add(void *km, wf_stripe_t *wf, int32_t lo, int32_t hi) +{ + int32_t i, n, m1 = wf->max_pen + 1, m2 = m1 * 2; + wf_slice_t *f; + ++wf->s; + ++wf->top; + if (wf->top == wf->n) wf->top = 0; + f = &wf->a[wf->top]; + f->lo = lo, f->hi = hi; + n = hi - lo + 1; + kfree(km, f->mem); + f->mem = Kmalloc(km, int32_t, 5 * (n + m2)); + f->H = f->mem + m1; + f->E1 = f->H + n + m2; + f->F1 = f->E1 + n + m2; + f->E2 = f->F1 + n + m2; + f->F2 = f->E2 + n + m2; + for (i = -m1; i < 0; ++i) + f->H[i] = f->E1[i] = f->E2[i] = f->F1[i] = f->F2[i] = WF_NEG_INF; + for (i = n; i < n + m1; ++i) + f->H[i] = f->E1[i] = f->E2[i] = f->F1[i] = f->F2[i] = WF_NEG_INF; + f->H -= lo, f->E1 -= lo, f->E2 -= lo, f->F1 -= lo, f->F2 -= lo; // such that f->H[lo] points to 0 +} + +static wf_stripe_t *wf_stripe_init(void *km, int32_t max_pen) +{ + int32_t i; + wf_stripe_t *wf; + wf = Kcalloc(km, wf_stripe_t, 1); + wf->max_pen = max_pen; + wf->n = max_pen + 1; + wf->a = Kcalloc(km, wf_slice_t, wf->n); + wf->lo = wf->hi = 0; + for (i = 0; i < wf->n; ++i) { + wf_slice_t *f; + wf_stripe_add(km, wf, 0, 0); + f = &wf->a[wf->top]; + f->H[0] = f->E1[0] = f->E2[0] = f->F1[0] = f->F2[0] = WF_NEG_INF; + } + wf->s = 0; + wf->a[wf->top].H[0] = -1; + return wf; +} + +static void wf_stripe_destroy(void *km, wf_stripe_t *wf) +{ + int32_t i; + for (i = 0; i < wf->n; ++i) + kfree(km, wf->a[i].mem); + kfree(km, wf->a); + kfree(km, wf); +} + +static inline wf_slice_t *wf_stripe_get(const wf_stripe_t *wf, int32_t x) +{ + int32_t y = wf->top - x; + if (y < 0) y += wf->n; + return &wf->a[y]; +} + +static inline int good_diag(int32_t d, int32_t k, int32_t tl, int32_t ql) // check if (d,k) falls within the DP matrix +{ + return ((k >= -1 && k < tl) && (d + k >= -1 && d + k < ql)); +} + +static void wf_stripe_shrink(wf_stripe_t *wf, int32_t tl, int32_t ql) +{ + int32_t j, d; + for (d = wf->lo; d <= wf->hi; ++d) { + for (j = 0; j < wf->n; ++j) { + wf_slice_t *p = &wf->a[(wf->top + 1 + j) % wf->n]; + if (d < p->lo || d > p->hi) continue; + if (good_diag(d, p->H[d], tl, ql)) break; + if (good_diag(d, p->E1[d], tl, ql) || good_diag(d, p->F1[d], tl, ql)) break; + if (good_diag(d, p->E2[d], tl, ql) || good_diag(d, p->F2[d], tl, ql)) break; + } + if (j < wf->n) break; // stop when we see a "good diagonal" in the stripe + } + assert(d <= wf->hi); // should never happen + wf->lo = d; + for (d = wf->hi; d >= wf->lo; --d) { + for (j = 0; j < wf->n; ++j) { + wf_slice_t *p = &wf->a[(wf->top + 1 + j) % wf->n]; + if (d < p->lo || d > p->hi) continue; + if (good_diag(d, p->H[d], tl, ql)) break; + if (good_diag(d, p->E1[d], tl, ql) || good_diag(d, p->F1[d], tl, ql)) break; + if (good_diag(d, p->E2[d], tl, ql) || good_diag(d, p->F2[d], tl, ql)) break; + } + if (j < wf->n) break; + } + assert(d >= wf->lo); + wf->hi = d; +} + +typedef struct { + int32_t s, d; +} wf_chkpt_t; + +/* + * Extend a diagonal along exact matches + */ + +// pad strings with distinct characters +static void wf_pad_str(void *km, int32_t tl, const char *ts, int32_t ql, const char *qs, char **pts, char **pqs) +{ + uint8_t t[256]; + int32_t i, c1 = -1, c2 = -1; + char *s1, *s2; + *pts = *pqs = 0; + // collect all used characters + memset(t, 0, 256); + for (i = 0; i < tl; ++i) + if (t[(uint8_t)ts[i]] == 0) + t[(uint8_t)ts[i]] = 1; + for (i = 0; i < ql; ++i) + if (t[(uint8_t)qs[i]] == 0) + t[(uint8_t)qs[i]] = 1; + for (i = 0; i < 256; ++i) + if (t[i] == 0) { + if (c1 < 0) c1 = i; + else if (c2 < 0) c2 = i; + } + if (c1 < 0 || c2 < 0) return; // The two strings use >=255 characters. Unlikely for bio strings. + s1 = Kmalloc(km, char, tl + ql + 16); // the two strings are allocated together + s2 = s1 + tl + 8; + memcpy(s1, ts, tl); + for (i = tl; i < tl + 8; ++i) s1[i] = c1; // pad with c1 + memcpy(s2, qs, ql); + for (i = ql; i < ql + 8; ++i) s2[i] = c2; // pad with c2 + *pts = s1, *pqs = s2; +} + +// Extend a diagonal along exact matches. +static inline int32_t wf_extend1_padded(const char *ts, const char *qs, int32_t k, int32_t d) +{ + uint64_t cmp = 0; + const char *ts_ = ts + 1; + const char *qs_ = qs + d + 1; + while (1) { + uint64_t x = *(uint64_t*)(ts_ + k); // warning: unaligned memory access + uint64_t y = *(uint64_t*)(qs_ + k); + cmp = x ^ y; + if (cmp == 0) k += 8; + else break; + } + k += __builtin_ctzl(cmp) >> 3; + return k; +} + +/* + * Core wf_next() routines + */ + +// Force loop vectorization. Learned from WFA. +#if defined(__clang__) + #define PRAGMA_LOOP_VECTORIZE _Pragma("clang loop vectorize(enable)") +#elif defined(__GNUC__) + #define PRAGMA_LOOP_VECTORIZE _Pragma("GCC ivdep") +#else + #define PRAGMA_LOOP_VECTORIZE _Pragma("ivdep") +#endif + +#define wf_max(a, b) ((a) >= (b)? (a) : (b)) + +static void wf_next_prep(void *km, const mwf_opt_t *opt, wf_stripe_t *wf, int32_t lo, int32_t hi, + int32_t **H, int32_t **E1, int32_t **F1, int32_t **E2, int32_t **F2, + const int32_t **pHx, const int32_t **pHo1, const int32_t **pHo2, + const int32_t **pE1, const int32_t **pF1, const int32_t **pE2, const int32_t **pF2) +{ + const wf_slice_t *fx, *fo1, *fo2, *fe1, *fe2; + wf_slice_t *ft; + wf_stripe_add(km, wf, lo, hi); + ft = &wf->a[wf->top]; + fx = wf_stripe_get(wf, opt->x); + fo1 = wf_stripe_get(wf, opt->o1 + opt->e1); + fo2 = wf_stripe_get(wf, opt->o2 + opt->e2); + fe1 = wf_stripe_get(wf, opt->e1); + fe2 = wf_stripe_get(wf, opt->e2); + *pHx = fx->H, *pHo1 = fo1->H, *pHo2 = fo2->H, *pE1 = fe1->E1, *pE2 = fe2->E2, *pF1 = fe1->F1, *pF2 = fe2->F2; + *H = ft->H, *E1 = ft->E1, *E2 = ft->E2, *F1 = ft->F1, *F2 = ft->F2; +} + +static void wf_next_score(int32_t lo, int32_t hi, int32_t *H, int32_t *E1, int32_t *F1, int32_t *E2, int32_t *F2, + const int32_t *pHx, const int32_t *pHo1, const int32_t *pHo2, + const int32_t *pE1, const int32_t *pF1, const int32_t *pE2, const int32_t *pF2) +{ + int32_t d; + PRAGMA_LOOP_VECTORIZE + for (d = lo; d <= hi; ++d) { + int32_t h, f, e; + E1[d] = wf_max(pHo1[d-1], pE1[d-1]); + E2[d] = wf_max(pHo2[d-1], pE2[d-1]); + e = wf_max(E1[d], E2[d]); + F1[d] = wf_max(pHo1[d+1], pF1[d+1]) + 1; + F2[d] = wf_max(pHo2[d+1], pF2[d+1]) + 1; + f = wf_max(F1[d], F2[d]); + h = wf_max(e, f); + H[d] = wf_max(pHx[d] + 1, h); + // if (H[d] >= -1) fprintf(stderr, "s=%d, d=%d, k=%d, (%d,%d)\n", wf->s, d, H[d], E1[d], F1[d]); + } +} + +static void wf_next_tb(int32_t lo, int32_t hi, int32_t *H, int32_t *E1, int32_t *F1, int32_t *E2, int32_t *F2, uint8_t *ax, + const int32_t *pHx, const int32_t *pHo1, const int32_t *pHo2, + const int32_t *pE1, const int32_t *pF1, const int32_t *pE2, const int32_t *pF2) +{ + int32_t d; + PRAGMA_LOOP_VECTORIZE + for (d = lo; d <= hi; ++d) { + int32_t h, f, e; + uint8_t x = 0, ze, zf, z; + x |= pHo1[d-1] >= pE1[d-1]? 0 : 0x08; + E1[d] = wf_max(pHo1[d-1], pE1[d-1]); + x |= pHo2[d-1] >= pE2[d-1]? 0 : 0x20; + E2[d] = wf_max(pHo2[d-1], pE2[d-1]); + ze = E1[d] >= E2[d]? 1 : 3; + e = wf_max(E1[d], E2[d]); + x |= pHo1[d+1] >= pF1[d+1]? 0 : 0x10; + F1[d] = wf_max(pHo1[d+1], pF1[d+1]) + 1; + x |= pHo2[d+1] >= pF2[d+1]? 0 : 0x40; + F2[d] = wf_max(pHo2[d+1], pF2[d+1]) + 1; + zf = F1[d] >= F2[d]? 2 : 4; + f = wf_max(F1[d], F2[d]); + z = e >= f? ze : zf; + h = wf_max(e, f); + z = pHx[d] + 1 >= h? 0 : z; + H[d] = wf_max(pHx[d] + 1, h); + ax[d] = x | z; + } +} + +/* + * Core algorithm + */ +static void wf_next_basic(void *km, void *km_tb, const mwf_opt_t *opt, wf_stripe_t *wf, wf_tb_t *tb, int32_t lo, int32_t hi) +{ + int32_t *H, *E1, *E2, *F1, *F2; + const int32_t *pHx, *pHo1, *pHo2, *pE1, *pE2, *pF1, *pF2; + wf_next_prep(km, opt, wf, lo, hi, &H, &E1, &F1, &E2, &F2, &pHx, &pHo1, &pHo2, &pE1, &pF1, &pE2, &pF2); + if (tb) { + uint8_t *ax; + ax = wf_tb_add(km_tb, tb, lo, hi)->x - lo; + wf_next_tb(lo, hi, H, E1, F1, E2, F2, ax, pHx, pHo1, pHo2, pE1, pF1, pE2, pF2); + } else { + wf_next_score(lo, hi, H, E1, F1, E2, F2, pHx, pHo1, pHo2, pE1, pF1, pE2, pF2); + } + if (H[lo] >= -1 || E1[lo] >= -1 || F1[lo] >= -1 || E2[lo] >= -1 || F2[lo] >= -1) wf->lo = lo; + if (H[hi] >= -1 || E1[hi] >= -1 || F1[hi] >= -1 || E2[hi] >= -1 || F2[hi] >= -1) wf->hi = hi; +} + +static uint32_t *wf_traceback(void *km, const mwf_opt_t *opt, wf_tb_t *tb, int32_t t_end, const char *ts, int32_t q_end, const char *qs, int32_t last, int32_t *n_cigar) +{ + wf_cigar_t cigar = {0,0,0}; + int32_t i = q_end, k = t_end, s = tb->n - 1; + while (i >= 0 && k >= 0) { + int32_t k0 = k, j, x, state, ext; + if (last == 0) { // if the previous state is 0, check exact matches + while (i >= 0 && k >= 0 && qs[i] == ts[k]) + --i, --k; + if (k0 - k > 0) + wf_cigar_push1(km, &cigar, 7, k0 - k); + if (i < 0 || k < 0) break; + } + assert(s >= 0); + j = i - k - tb->a[s].lo; + assert(j <= tb->a[s].hi - tb->a[s].lo); + x = tb->a[s].x[j]; + state = last == 0? x&7 : last; + ext = state > 0? x>>(state+2)&1 : 0; // whether an extension + //fprintf(stderr, "s=%d, %d->%d, ext=%d%d%d%d, i=%d, k=%d\n", s, last, state, x>>3&1, x>>4&1, x>>5&1, x>>6&1, i, k); + if (state == 0) { + wf_cigar_push1(km, &cigar, 8, 1); + --i, --k, s -= opt->x; + } else if (state == 1) { + wf_cigar_push1(km, &cigar, 1, 1); + --i, s -= ext? opt->e1 : opt->o1 + opt->e1; + } else if (state == 3) { + wf_cigar_push1(km, &cigar, 1, 1); + --i, s -= ext? opt->e2 : opt->o2 + opt->e2; + } else if (state == 2) { + wf_cigar_push1(km, &cigar, 2, 1); + --k, s -= ext? opt->e1 : opt->o1 + opt->e1; + } else if (state == 4) { + wf_cigar_push1(km, &cigar, 2, 1); + --k, s -= ext? opt->e2 : opt->o2 + opt->e2; + } else abort(); + last = state > 0 && ext? state : 0; + } + if (opt->flag&MWF_F_DEBUG) fprintf(stderr, "s0=%d, s=%d, i=%d, k=%d\n", tb->n-1, s, i, k); + if (i >= 0) wf_cigar_push1(km, &cigar, 1, i + 1); + else if (k >= 0) wf_cigar_push1(km, &cigar, 2, k + 1); + for (i = 0; i < cigar.n>>1; ++i) { // reverse to the input order + uint32_t t = cigar.cigar[i]; + cigar.cigar[i] = cigar.cigar[cigar.n - i - 1]; + cigar.cigar[cigar.n - i - 1] = t; + } + *n_cigar = cigar.n; + return cigar.cigar; +} + +// pts and pqs MUST BE padded with wf_pad_str() +static void mwf_wfa_core(void *km, const mwf_opt_t *opt, int32_t tl, const char *pts, int32_t ql, const char *pqs, int32_t n_seg, wf_chkpt_t *seg, mwf_rst_t *r) +{ + int32_t max_pen, sid, is_tb = !!(opt->flag&MWF_F_CIGAR), last_state = 0, stopped = 0; + wf_stripe_t *wf; + wf_tb_t tb = {0,0,0}; + void *km_tb, *km_st; + + memset(r, 0, sizeof(*r)); + km_tb = is_tb && !(opt->flag&MWF_F_NO_KALLOC)? km_init2(km, 0) : 0; + km_st = !(opt->flag&MWF_F_NO_KALLOC)? km_init2(km, 0) : 0; + max_pen = opt->x; + max_pen = max_pen > opt->o1 + opt->e1? max_pen : opt->o1 + opt->e1; + max_pen = max_pen > opt->o2 + opt->e2? max_pen : opt->o2 + opt->e2; + wf = wf_stripe_init(km_st, max_pen); + assert(pts); + + sid = 0; + while (1) { + wf_slice_t *p = &wf->a[wf->top]; + int32_t d, lo, hi, *H = p->H; + for (d = p->lo; d <= p->hi; ++d) { + int32_t k = 0; + if (H[d] < -1 || d + H[d] < -1 || H[d] >= tl || d + H[d] >= ql) continue; + k = wf_extend1_padded(pts, pqs, H[d], d); + //fprintf(stderr, "[s=%d] [%d,%d]:%d %d->%d,%d,%d,%d,%d\n", wf->s, p->lo, p->hi, d, H[d], k, wf->a[wf->top].E1[d], wf->a[wf->top].F1[d], wf->a[wf->top].E2[d], wf->a[wf->top].F2[d]); + if (k == tl - 1 && d + k == ql - 1) { + if (k == H[d] && is_tb) + last_state = tb.a[tb.n-1].x[d - tb.a[tb.n-1].lo] & 7; + break; + } + H[d] = k; + } + if (d <= p->hi) break; + if (is_tb && seg && sid < n_seg && seg[sid].s == wf->s) { + assert(seg[sid].d >= wf->lo && seg[sid].d <= wf->hi); + wf->lo = wf->hi = seg[sid++].d; + } + lo = wf->lo > -tl? wf->lo - 1 : -tl; + hi = wf->hi < ql? wf->hi + 1 : ql; + wf_next_basic(km_st, km_tb, opt, wf, is_tb? &tb : 0, lo, hi); + if ((wf->s&0xff) == 0) wf_stripe_shrink(wf, tl, ql); + r->n_iter += hi - lo + 1; + if ((opt->max_iter > 0 && r->n_iter > opt->max_iter) || (opt->max_s > 0 && wf->s > opt->max_s)) { + stopped = 1; + break; + } + } + r->s = stopped? -1 : wf->s; + if (is_tb && !stopped) + r->cigar = wf_traceback(km, opt, &tb, tl-1, pts, ql-1, pqs, last_state, &r->n_cigar); + if (km_st == 0) wf_stripe_destroy(km_st, wf); + else km_destroy(km_st); + km_destroy(km_tb); + if (is_tb && !stopped) + r->cigar = (uint32_t*)krelocate(km, r->cigar, r->n_cigar * sizeof(*r->cigar)); +} + +/* + * Low-memory mode + */ +typedef struct { + int32_t n, n_intv, max_s; + int32_t *x; + uint64_t *intv; +} wf_ss_t; // snapshot + +typedef struct { + int32_t n, m; + wf_ss_t *a; +} wf_sss_t; + +static void wf_snapshot1(void *km, wf_stripe_t *sf, wf_ss_t *ss) +{ + int32_t j, k, t; + ss->n = 0, ss->max_s = sf->s; + for (j = 0; j < sf->n; ++j) + ss->n += 5 * (sf->a[j].hi - sf->a[j].lo + 1); + ss->x = Kmalloc(km, int32_t, ss->n); + ss->n_intv = sf->n; + ss->intv = Kmalloc(km, uint64_t, ss->n_intv); + for (j = 0, t = 0; j < sf->n; ++j) { + wf_slice_t *p; + k = (sf->top + 1 + j) % sf->n; + p = &sf->a[k]; + ss->intv[j] = (uint64_t)p->lo << 32 | (p->hi - p->lo + 1) * 5; + for (k = p->lo; k <= p->hi; ++k) { + ss->x[t] = p->H[k], p->H[k] = t++; + ss->x[t] = p->E1[k], p->E1[k] = t++; + ss->x[t] = p->F1[k], p->F1[k] = t++; + ss->x[t] = p->E2[k], p->E2[k] = t++; + ss->x[t] = p->F2[k], p->F2[k] = t++; + } + } + assert(t == ss->n); +} + +static void wf_snapshot(void *km, wf_sss_t *sss, wf_stripe_t *sf) +{ + if (sss->n == sss->m) { + sss->m += (sss->m>>1) + 8; + sss->a = Krealloc(km, wf_ss_t, sss->a, sss->m); + } + wf_snapshot1(km, sf, &sss->a[sss->n++]); +} + +static void wf_snapshot_free(void *km, wf_sss_t *sss) +{ + int32_t j; + for (j = 0; j < sss->n; ++j) { + kfree(km, sss->a[j].x); + kfree(km, sss->a[j].intv); + } + kfree(km, sss->a); +} + +static void wf_next_seg(void *km, const mwf_opt_t *opt, uint8_t *xbuf, wf_stripe_t *wf, wf_stripe_t *sf, int32_t lo, int32_t hi) +{ + int32_t d, *H, *E1, *E2, *F1, *F2; + const int32_t *pHx, *pHo1, *pHo2, *pE1, *pE2, *pF1, *pF2; + uint8_t *ax = xbuf - lo; + + wf_next_prep(km, opt, wf, lo, hi, &H, &E1, &F1, &E2, &F2, &pHx, &pHo1, &pHo2, &pE1, &pF1, &pE2, &pF2); + wf_next_tb(lo, hi, H, E1, F1, E2, F2, ax, pHx, pHo1, pHo2, pE1, pF1, pE2, pF2); + wf_next_prep(km, opt, sf, lo, hi, &H, &E1, &F1, &E2, &F2, &pHx, &pHo1, &pHo2, &pE1, &pF1, &pE2, &pF2); + PRAGMA_LOOP_VECTORIZE + for (d = lo; d <= hi; ++d) { // FIXME: merge this loop into the loop in wf_next_tb(). I tried but couldn't make clang vectorize. + uint8_t x = ax[d]; + int32_t a, b, e1, f1, e2, f2, h; + a = pHo1[d-1], b = pE1[d-1]; + e1 = E1[d] = (x&0x08) == 0? a : b; + a = pHo1[d+1], b = pF1[d+1]; + f1 = F1[d] = (x&0x10) == 0? a : b; + a = pHo2[d-1], b = pE2[d-1]; + e2 = E2[d] = (x&0x20) == 0? a : b; + a = pHo2[d+1], b = pF2[d+1]; + f2 = F2[d] = (x&0x40) == 0? a : b; + x &= 7; + h = pHx[d]; + h = x == 1? e1 : h; + h = x == 2? f1 : h; + h = x == 3? e2 : h; + h = x == 4? f2 : h; + H[d] = h; + } + if (H[lo] >= -1 || E1[lo] >= -1 || F1[lo] >= -1 || E2[lo] >= -1 || F2[lo] >= -1) wf->lo = lo; + if (H[hi] >= -1 || E1[hi] >= -1 || F1[hi] >= -1 || E2[hi] >= -1 || F2[hi] >= -1) wf->hi = hi; +} + +static wf_chkpt_t *wf_traceback_seg(void *km, wf_sss_t *sss, int32_t last, int32_t *n_seg) +{ + int32_t j; + wf_chkpt_t *seg; + *n_seg = sss->n; + seg = Kmalloc(km, wf_chkpt_t, sss->n); + for (j = sss->n - 1; j >= 0; --j) { + int32_t k, m; + wf_ss_t *p = &sss->a[j]; + for (k = 0, m = 0; k < p->n_intv; ++k) { + if (last >= m && last < m + (int32_t)p->intv[k]) + break; + m += (int32_t)p->intv[k]; + } + assert(k < p->n_intv); + seg[j].s = p->max_s - (p->n_intv - k - 1); + seg[j].d = (int32_t)(p->intv[k]>>32) + (last - m) / 5; + last = p->x[last]; + } + assert(last == -1); + return seg; +} + +wf_chkpt_t *mwf_wfa_seg(void *km, const mwf_opt_t *opt, int32_t tl, const char *pts, int32_t ql, const char *pqs, int32_t *n_seg_) +{ + int32_t max_pen, last, n_seg; + wf_stripe_t *wf, *sf; + wf_sss_t sss = {0,0,0}; + uint8_t *xbuf; + wf_chkpt_t *seg; + void *km_st; + + km_st = !(opt->flag&MWF_F_NO_KALLOC)? km_init2(km, 0) : 0; + max_pen = opt->x; + max_pen = max_pen > opt->o1 + opt->e1? max_pen : opt->o1 + opt->e1; + max_pen = max_pen > opt->o2 + opt->e2? max_pen : opt->o2 + opt->e2; + xbuf = Kcalloc(km_st, uint8_t, tl + ql + 1); + wf = wf_stripe_init(km_st, max_pen); + sf = wf_stripe_init(km_st, max_pen); + assert(pts); + + while (1) { + wf_slice_t *p = &wf->a[wf->top]; + int32_t d, lo, hi, *H = p->H; + for (d = p->lo; d <= p->hi; ++d) { + int32_t k; + if (H[d] < -1 || d + H[d] < -1 || H[d] >= tl || d + H[d] >= ql) continue; + k = wf_extend1_padded(pts, pqs, H[d], d); + if (k == tl - 1 && d + k == ql - 1) { + last = sf->a[sf->top].H[d]; + break; + } + H[d] = k; + } + if (d <= p->hi) break; + lo = wf->lo > -tl? wf->lo - 1 : -tl; + hi = wf->hi < ql? wf->hi + 1 : ql; + if ((wf->s + 1) % opt->step == 0) + wf_snapshot(km_st, &sss, sf); + wf_next_seg(km_st, opt, xbuf, wf, sf, lo, hi); + if ((wf->s&0xff) == 0) wf_stripe_shrink(wf, tl, ql); + } + seg = wf_traceback_seg(km, &sss, last, &n_seg); + if (km_st == 0) { + wf_snapshot_free(km_st, &sss); + wf_stripe_destroy(km_st, wf); + wf_stripe_destroy(km_st, sf); + kfree(km_st, xbuf); + } else km_destroy(km_st); + + seg = (wf_chkpt_t*)krelocate(km, seg, n_seg * sizeof(*seg)); + *n_seg_ = n_seg; + return seg; +} + +void mwf_wfa_exact(void *km, const mwf_opt_t *opt, int32_t tl, const char *ts, int32_t ql, const char *qs, mwf_rst_t *r) +{ + int32_t n_seg = 0; + wf_chkpt_t *seg = 0; + char *pts, *pqs; + + wf_pad_str(km, tl, ts, ql, qs, &pts, &pqs); + if (opt->step > 0) + seg = mwf_wfa_seg(km, opt, tl, pts, ql, pqs, &n_seg); + mwf_wfa_core(km, opt, tl, pts, ql, pqs, n_seg, seg, r); + kfree(km, seg); + kfree(km, pts); +} + +/* + * Heuristics + */ +static int32_t mg_lis_64(void *km, int32_t n, const uint64_t *a, int32_t *b) +{ + int32_t i, k, L = 0, *M, *P = b; + KMALLOC(km, M, n+1); + for (i = 0; i < n; ++i) { + int32_t lo = 1, hi = L, newL; + while (lo <= hi) { + int32_t mid = (lo + hi + 1) >> 1; + if (a[M[mid]] < a[i]) lo = mid + 1; + else hi = mid - 1; + } + newL = lo, P[i] = M[newL - 1], M[newL] = i; + if (newL > L) L = newL; + } + k = M[L]; + memcpy(M, P, n * sizeof(int32_t)); + for (i = L - 1; i >= 0; --i) b[i] = k, k = M[k]; + kfree(km, M); + return L; +} + +extern void radix_sort_gfa64(uint64_t*, uint64_t*); +extern unsigned char seq_nt4_table[256]; + +static int32_t mg_fc_kmer(int32_t len, const char *seq, int32_t rid, int32_t k, uint64_t *a) +{ + int32_t i, l, n; + uint64_t x, mask = (1ULL<= k) a[n++] = (x<<1|rid) << 32 | i; + } else l = 0, x = 0; + } + return n; +} + +static uint64_t *mg_chain(void *km, int32_t l1, const char *s1, int32_t l2, const char *s2, int32_t k, int32_t max_occ, int32_t *n_lis_) +{ + int32_t i, n_a, n_b, m_b, i0, n_lis, *lis; + uint64_t *a, *b; + + *n_lis_ = 0; + if (l1 < k || l2 < k) return 0; + assert(k >= 2 && k <= 15); + + // collect k-mers + KMALLOC(km, a, l1 + l2); + n_a = mg_fc_kmer(l1, s1, 0, k, a); + n_a += mg_fc_kmer(l2, s2, 1, k, &a[n_a]); + radix_sort_gfa64(a, a + n_a); + + // collect k-mer matches + n_b = m_b = 0, b = 0; + for (i0 = 0, i = 1; i <= n_a; ++i) { + if (i == n_a || a[i0]>>33 != a[i]>>33) { + if (i - i0 >= 2) { + int32_t j, s, t; + for (j = i0; j < i && (a[j]>>32&1) == 0; ++j) {} + if (j > i0 && j < i && j - i0 <= max_occ && i - j <= max_occ) { + for (s = i0; s < j; ++s) + for (t = j; t < i; ++t) { + if (n_b == m_b) KEXPAND(km, b, m_b); + b[n_b++] = a[s]<<32 | (uint32_t)a[t]; + } + } + } + i0 = i; + } + } + kfree(km, a); + + // find co-linear chain with LIS + radix_sort_gfa64(b, b + n_b); + for (i = 0; i < n_b; ++i) + b[i] = b[i]>>32 | b[i]<<32; + KMALLOC(km, lis, n_b); + n_lis = mg_lis_64(km, n_b, b, lis); + a = Kmalloc(km, uint64_t, n_lis); + for (i = 0; i < n_lis; ++i) a[i] = b[lis[i]]; + kfree(km, lis); + kfree(km, b); + b = Kmalloc(km, uint64_t, n_lis); + memcpy(b, a, sizeof(uint64_t) * n_lis); + kfree(km, a); + *n_lis_ = n_lis; + for (i = 0; i < n_lis; ++i) // switch back, such that seq1 on the high bits + b[i] = b[i]>>32 | b[i]<<32; + return b; +} + +static double mwf_ksim(void *km, int32_t l1, const char *s1, int32_t l2, const char *s2, int32_t k) +{ + int32_t i, i0, j, n_a, n1 = 0, n2 = 0, t1 = 0, t2 = 0; + double p1, p2; + uint64_t *a; + if (l1 < k || l2 < k) return 0; + assert(k >= 2 && k <= 15); + KMALLOC(km, a, l1 + l2); + n_a = mg_fc_kmer(l1, s1, 0, k, a); + n_a += mg_fc_kmer(l2, s2, 1, k, &a[n_a]); + radix_sort_gfa64(a, a + n_a); + for (i0 = 0, i = 1; i <= n_a; ++i) { + if (i == n_a || a[i0]>>33 != a[i]>>33) { + int32_t m1, m2, min; + for (j = i0; j < i && (a[j]>>32&1) == 0; ++j) {} + m1 = j - i0, m2 = i - j; + min = m1 < m2? m1 : m2; + n1 += m1, n2 += m2; + if (m1 > 0 && m2 > 0) + t1 += min, t2 += min; + i0 = i; + } + } + kfree(km, a); + p1 = (double)t1 / n1, p2 = (double)t2 / n2; + return p1 > p2? p1 : p2; +} + +#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) + +static void wf_cigar_push(void *km, wf_cigar_t *c, int32_t n_cigar, const uint32_t *cigar) +{ + if (n_cigar == 0) return; + wf_cigar_push1(km, c, cigar[0]&0xf, cigar[0]>>4); + if (c->n + n_cigar - 1 > c->m) { + c->m = c->n + n_cigar - 1; + kroundup32(c->m); + c->cigar = Krealloc(km, uint32_t, c->cigar, c->m); + } + memcpy(&c->cigar[c->n], &cigar[1], sizeof(*cigar) * (n_cigar - 1)); + c->n += n_cigar - 1; +} + +static int32_t wf_anchor_filter(int32_t n, uint64_t *a, int32_t tl, int32_t ql, int32_t k, int32_t min_l) +{ + int32_t i, st, x0, y0, x1, y1, j, l, m; + for (i = 0, x0 = y0 = x1 = y1 = 0, st = -1, l = 0; i <= n; ++i) { + int32_t x, y; + if (i == n) x = tl, y = ql; + else x = (int32_t)(a[i]>>32) + 1, y = (int32_t)a[i] + 1; + if (x - x0 != y - y0) { + //fprintf(stderr, "X\t%d\t(%d,%d) -> (%d,%d)\n", l, x0, y0, x, y); + if (l < min_l) + for (j = st > 0? st : 0; j < i; ++j) + a[j] = 0; + x0 = x, y0 = y, st = i, l = k; + } else l += x - x1; + x1 = x, y1 = y; + } + for (i = 0, m = 0; i < n; ++i) + if (a[i] != 0) a[m++] = a[i]; + return m; +} + +void mwf_wfa_chain(void *km, const mwf_opt_t *opt, int32_t tl, const char *ts, int32_t ql, const char *qs, mwf_rst_t *r) +{ + int32_t n_a, i, x0, y0; + uint64_t *a; + void *km_wfa; + wf_cigar_t c = {0,0,0}; + + km_wfa = !(opt->flag&MWF_F_NO_KALLOC)? km_init2(km, 0) : 0; + a = mg_chain(km_wfa, tl, ts, ql, qs, opt->kmer, opt->max_occ, &n_a); + n_a = wf_anchor_filter(n_a, a, tl, ql, opt->kmer, opt->min_len); + r->s = 0; + for (i = 0, x0 = y0 = 0; i <= n_a; ++i) { + int32_t x1, y1; + if (i == n_a) x1 = tl, y1 = ql; + else x1 = (int32_t)(a[i]>>32) + 1, y1 = (int32_t)a[i] + 1; + if (i < n_a && x1 - x0 == y1 - y0 && x1 - x0 <= opt->kmer) { + if (opt->flag&MWF_F_CIGAR) + wf_cigar_push1(km, &c, 7, x1 - x0); + } else if (x0 < x1 && y0 < y1) { + if (x1 - x0 >= 10000 && y1 - y0 >= 10000 && mwf_ksim(km, x1 - x0, &ts[x0], y1 - y0, &qs[y0], opt->kmer) < 0.02) { + if (opt->flag&MWF_F_CIGAR) { + wf_cigar_push1(km, &c, 2, x1 - x0); + wf_cigar_push1(km, &c, 1, y1 - y0); + } + r->s += opt->o2 * 2 + opt->e2 * ((x1 - x0) + (y1 - y0)); + } else { + mwf_rst_t q; + mwf_wfa_exact(km_wfa, opt, x1 - x0, &ts[x0], y1 - y0, &qs[y0], &q); + if (opt->flag&MWF_F_CIGAR) + wf_cigar_push(km, &c, q.n_cigar, q.cigar); + r->s += q.s; + kfree(km_wfa, q.cigar); + } + } else if (x0 < x1) { + wf_cigar_push1(km, &c, 2, x1 - x0); + r->s += opt->o2 + (x1 - x0) * opt->e2 < opt->o1 + (x1 - x0) * opt->e1? opt->o2 + (x1 - x0) * opt->e2 : opt->o1 + (x1 - x0) * opt->e1; + } else if (y0 < y1) { + wf_cigar_push1(km, &c, 1, y1 - y0); + r->s += opt->o2 + (y1 - y0) * opt->e2 < opt->o1 + (y1 - y0) * opt->e1? opt->o2 + (y1 - y0) * opt->e2 : opt->o1 + (y1 - y0) * opt->e1; + } + x0 = x1, y0 = y1; + } + if (km_wfa == 0) kfree(km_wfa, a); + km_destroy(km_wfa); + r->n_cigar = c.n, r->cigar = c.cigar; + r->cigar = (uint32_t*)krelocate(km, r->cigar, r->n_cigar * sizeof(*r->cigar)); +} + +void mwf_wfa_auto(void *km, const mwf_opt_t *opt0, int32_t tl, const char *ts, int32_t ql, const char *qs, mwf_rst_t *r) +{ + mwf_opt_t opt = *opt0; + opt.step = 0, opt.max_iter = 100000000; + mwf_wfa_exact(km, &opt, tl, ts, ql, qs, r); + if (r->s < 0) { + if (opt.flag & MWF_F_CIGAR) opt.step = 5000; + opt.max_iter = -1; + mwf_wfa_chain(km, &opt, tl, ts, ql, qs, r); + } +} diff --git a/miniwfa.h b/miniwfa.h new file mode 100644 index 0000000..903da95 --- /dev/null +++ b/miniwfa.h @@ -0,0 +1,95 @@ +/* + The MIT License + + Copyright (c) 2022- Dana-Farber Cancer Institute + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#ifndef MINIWFA_H +#define MINIWFA_H + +#include + +#define MWF_F_CIGAR 0x1 +#define MWF_F_NO_KALLOC 0x2 +#define MWF_F_DEBUG 0x10000 + +typedef struct { + int32_t flag; // bit flag; see MWF_F_* macros + int32_t x, o1, e1, o2, e2; // scoring + int32_t step; // distance between checkpoints in the low-memory mode + int32_t max_s; // stop the alignment if score is higher than this + int64_t max_iter; + // chaining heuristics + int32_t max_occ, kmer, min_len; +} mwf_opt_t; + +typedef struct { + int32_t s; // score + int32_t n_cigar; // number of CIGAR operators + int64_t n_iter; + uint32_t *cigar; // CIGAR in the htslib packing: len<<4|op +} mwf_rst_t; + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Set default parameters + * + * @param opt (out) options + */ +void mwf_opt_init(mwf_opt_t *opt); + +/** + * Align two sequences with WFA + * + * mwf_wfa_exact() finds the optimal alignment without heuristics. + * + * mwf_wfa_chain() does chaining and closes gaps in the chain. This is a + * heuristic algorithm and may miss the optimal alignment. + * + * mwf_wfa_auto() calls mwf_wfa_exact() for penalty up to 5000. If fails, + * it invokes mwf_wfa_chain() with a step size of 5000. + * + * @param km kalloc handler. Set to NULL to use malloc. + * @param opt parameters + * @param tl target sequence length + * @param ts target sequence + * @param ql query sequence length + * @param qs query sequence + * @param r (out) results + */ +void mwf_wfa_exact(void *km, const mwf_opt_t *opt, int32_t tl, const char *ts, int32_t ql, const char *qs, mwf_rst_t *r); +void mwf_wfa_chain(void *km, const mwf_opt_t *opt, int32_t tl, const char *ts, int32_t ql, const char *qs, mwf_rst_t *r); +void mwf_wfa_auto(void *km, const mwf_opt_t *opt, int32_t tl, const char *ts, int32_t ql, const char *qs, mwf_rst_t *r); + +// These functions are in "mwf-dbg.c". For debugging only. +int32_t mwf_cigar2score(const mwf_opt_t *opt, int32_t n_cigar, const uint32_t *cigar, int32_t *tl, int32_t *ql); +void mwf_assert_cigar(const mwf_opt_t *opt, int32_t n_cigar, const uint32_t *cigar, int32_t tl0, int32_t ql0, int32_t s0); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/misc.c b/misc.c new file mode 100644 index 0000000..44bc0b2 --- /dev/null +++ b/misc.c @@ -0,0 +1,12 @@ +#include +#include "mgpriv.h" +#include "ksort.h" + +int mg_verbose = 1; +int mg_dbg_flag = 0; +double mg_realtime0; + +#define sort_key_128x(a) ((a).x) +KRADIX_SORT_INIT(128x, mg128_t, sort_key_128x, 8) + +KSORT_INIT_GENERIC(uint32_t) diff --git a/misc/mgutils.js b/misc/mgutils.js new file mode 100755 index 0000000..de6d65d --- /dev/null +++ b/misc/mgutils.js @@ -0,0 +1,1451 @@ +#!/usr/bin/env k8 + +/******************************* + * Command line option parsing * + *******************************/ + +var getopt = function(args, ostr) { + var oli; // option letter list index + if (typeof(getopt.place) == 'undefined') + getopt.ind = 0, getopt.arg = null, getopt.place = -1; + if (getopt.place == -1) { // update scanning pointer + if (getopt.ind >= args.length || args[getopt.ind].charAt(getopt.place = 0) != '-') { + getopt.place = -1; + return null; + } + if (getopt.place + 1 < args[getopt.ind].length && args[getopt.ind].charAt(++getopt.place) == '-') { // found "--" + ++getopt.ind; + getopt.place = -1; + return null; + } + } + var optopt = args[getopt.ind].charAt(getopt.place++); // character checked for validity + if (optopt == ':' || (oli = ostr.indexOf(optopt)) < 0) { + if (optopt == '-') return null; // if the user didn't specify '-' as an option, assume it means null. + if (getopt.place < 0) ++getopt.ind; + return '?'; + } + if (oli+1 >= ostr.length || ostr.charAt(++oli) != ':') { // don't need argument + getopt.arg = null; + if (getopt.place < 0 || getopt.place >= args[getopt.ind].length) ++getopt.ind, getopt.place = -1; + } else { // need an argument + if (getopt.place >= 0 && getopt.place < args[getopt.ind].length) + getopt.arg = args[getopt.ind].substr(getopt.place); + else if (args.length <= ++getopt.ind) { // no arg + getopt.place = -1; + if (ostr.length > 0 && ostr.charAt(0) == ':') return ':'; + return '?'; + } else getopt.arg = args[getopt.ind]; // white space + getopt.place = -1; + ++getopt.ind; + } + return optopt; +} + +function it_index(a) { + if (a.length == 0) return -1; + a.sort(function(x, y) { return x[0] - y[0] }); + var last, last_i; + for (var i = 0; i < a.length; i += 2) last = a[i][2] = a[i][1], last_i = i; + for (var k = 1; 1<>k&1? last_i - (1<<(k-1)) : last_i + (1<<(k-1)); + if (last_i < a.length) last = last > a[last_i][2]? last : a[last_i][2]; + } + return k - 1; +} + +function it_overlap(a, st, en) { + if (a == null) return []; + var h, stack = [], b = []; + for (h = 0; 1<> h << h, i1 = i0 + (1<<(h+1)) - 1; + if (i1 >= a.length) i1 = a.length; + for (var i = i0; i < i1; ++i) + if (a[i][0] < en && st < a[i][1]) + b.push(a[i]); + } else if (w == 0) { // if left child not processed + stack.push([x, h, 1]); + var y = x - (1<<(h-1)); + if (y >= a.length || a[y][2] > st) + stack.push([y, h - 1, 0]); + } else if (x < a.length && a[x][0] < en) { + if (st < a[x][1]) b.push(a[x]); + stack.push([x + (1<<(h-1)), h - 1, 0]); + } + } + return b; +} + +function it_contained(a, st, en) { + if (a == null) return false; + var b = it_overlap(a, st, en); + var c = false; + for (var i = 0; i < b.length; ++i) { + if (b[i][0] <= st && en <= b[i][1]) + c = true; + } + return c; +} + +/**************************** + ***** mgutils commands ***** + ****************************/ + +function mg_cmd_renamefa(args) +{ + var c, sep = '#'; + while ((c = getopt(args, "d:")) != null) + if (c == 'd') sep = getopt.arg; + if (args.length - getopt.ind < 2) { + print("Usage: mgutils.js renamefa [-d delimitor] "); + return; + } + var prefix = args[getopt.ind]; + var file = new File(args[getopt.ind+1]); + var buf = new Bytes(); + while (file.readline(buf) >= 0) { + if (buf[0] != 62) { + print(buf); + } else { + var m, s = buf.toString(); + if ((m = /^>(.*)/.exec(s)) != null) { + var name = m[1].replace(/^\S+#/, ""); + print(">" + prefix + sep + name); + } else throw Error("Wrong FASTA format!"); + } + } + file.close(); + buf.destroy(); +} + +function mg_cmd_joinfa(args) +{ + var c, len_n = 20, min_len = 150, name = "decoy-cat"; + while ((c = getopt(args, "n:l:s:")) != null) { + if (c == 'l') min_len = parseInt(getopt.arg); + else if (c == 'n') len_n = parseInt(getopt.arg); + else if (c == 's') name = getopt.arg; + } + if (args.length - getopt.ind < 1) { + print("Usage: mgutils.js joinfa [options] "); + return; + } + var seq = new Bytes(), seq1 = new Bytes(), lineno = 0, nn = new Bytes(); + for (var i = 0; i < len_n; ++i) nn.set(78); + var buf = new Bytes(); + var file = new File(args[getopt.ind]); + while (file.readline(buf) >= 0) { + ++lineno; + if (buf[0] == 62) { + if (seq1.length >= min_len) { + if (seq.length > 0) seq.set(nn); + seq.set(seq1); + } + seq1.length = 0; + } else seq1.set(buf); + } + if (seq1.length >= min_len) { + if (seq.length > 0) seq.set(nn); + seq.set(seq1); + } + print(">" + name); + print(seq); + file.close(); + buf.destroy(); + seq.destroy(); + seq1.destroy(); +} + +function mg_cmd_anno(args) +{ + var c, min_rm_div = 0.2, min_rm_sc = 300, micro_cap = 6, min_feat_len = 30, min_centro_len = 200, mobile = false, max_mobile_div = 2.0, min_segdup_frac = 0.2; + var fn_rmout = null, fn_etrf = null, fn_dust = null, fn_gap = null, fn_paf = null, fn_centro = null, fn_bb = null, fn_sd = null; + while ((c = getopt(args, "e:p:g:d:r:c:l:S:b:s:m")) != null) { + if (c == 'l') min_feat_len = parseInt(getopt.arg); + else if (c == 'S') min_segdup_frac = parseFloat(getopt.arg); + else if (c == 'm') mobile = true; + else if (c == 'e') fn_etrf = getopt.arg; + else if (c == 'p') fn_paf = getopt.arg; + else if (c == 'g') fn_gap = getopt.arg; + else if (c == 'd') fn_dust = getopt.arg; + else if (c == 'r') fn_rmout = getopt.arg; + else if (c == 'c') fn_centro = getopt.arg; + else if (c == 'b') fn_bb = getopt.arg; + else if (c == 's') fn_sd = getopt.arg; + } + + if (args.length - getopt.ind < 1) { + print("Usage: anno.js [options] "); + print("Options:"); + print(" -l INT min feature length [" + min_feat_len + "]"); + print(" -S FLOAT min segdup length [" + min_segdup_frac + "]"); + print(" -r FILE RepeatMasker .out [null]"); + print(" -g FILE seqtk gap output for stretches of Ns [null]"); + print(" -d FILE minimap2/sdust output for LCRs [null]"); + print(" -e FILE etrf output [null]"); + print(" -p FILE PAF alignment against reference [null]"); + print(" -c FILE dna-brnn centromere results [null]"); + print(" -b FILE bubble file [null]"); + print(" -s FILE segdup file (paste gfa2bed bedcov) [null]"); + print(" -m annotate AluY and L1HS separately"); + exit(1); + } + + var file, buf = new Bytes(); + + var bb = {}, bba = [], seg = {}; + + file = new File(args[getopt.ind]); + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + if (t.length < 4) continue; + var key = t[0] + "_" + t[1] + "_" + t[2]; + var len = parseInt(t[3]); + if (len < parseInt(t[2]) - parseInt(t[1])) + throw Error("ERROR: event length smaller than interval length"); + bb[key] = [len, {}]; + bba.push(key); + } + file.close(); + + if (fn_bb) { + if (fn_sd) { // generated by "paste <(gfatools gfa2bed) <(bedtk cov segdup.bed gfa2bed.bed) | cut -f1-5,9,10" + file = new File(fn_sd); + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + seg[t[3]] = [parseInt(t[4]), parseInt(t[2]) - parseInt(t[1]), parseInt(t[6])]; + } + file.close(); + } + file = new File(fn_bb); // parse "gfatools bubble" output + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + var key = t[0] + "_" + t[1] + "_" + t[2]; + if (key in bb) { + bb[key].push(t[3], t[4], t[5], t[6], t[7], t[8], t[9], t[10]); + var s = t[11].split(","), tot_len = 0, tot_sd = 0, ref_len = 0; + var dup = {}; + for (var i = 1; i < s.length - 1; ++i) { + if (seg[s[i]] == null) continue; + if (dup[s[i]]) continue; + dup[s[i]] = 1; + tot_len += seg[s[i]][1], tot_sd += seg[s[i]][2]; + if (seg[s[i]][0] == 0) + ref_len += seg[s[i]][1]; + } + bb[key][7] = tot_len; + bb[key][8] = tot_sd; + bb[key][9] = ref_len; + } + } + file.close(); + } + + if (fn_rmout) { // parse RepeastMasker output + var motif0 = "GGAAT", motif_hash = {}, motif_mut_hash = {}; + { // dealing with possible (GGAAT)n rotations and mutations + var comp_tbl = { 'A':'T', 'T':'A', 'C':'G', 'G':'C' }; + var motif = [motif0], motif_alt = []; + + // reverse complement + for (var i = 0; i < motif.length; ++i) { + var x = motif[i], y = ""; + for (var j = x.length - 1; j >= 0; --j) { + y += comp_tbl[x[j]]; + } + motif_alt.push(y); + } + for (var i = 0; i < motif_alt.length; ++i) + motif.push(motif_alt[i]); + + // rotate + motif_alt = []; + for (var i = 0; i < motif.length; ++i) { + var x = motif[i]; + for (var j = 1; j < x.length; ++j) + motif_alt.push(x.substr(j) + x.substr(0, j)); + } + for (var i = 0; i < motif_alt.length; ++i) + motif.push(motif_alt[i]); + + for (var i = 0; i < motif.length; ++i) motif_hash[motif[i]] = i; + + // mutate + var bases = [ 'A', 'C', 'G', 'T' ]; + for (var x in motif_hash) { + var y = x; + for (var i = 0; i < x.length; ++i) { + for (var j = 0; j < bases.length; ++j) { + var a = x.split(""); + if (a[i] == bases[j]) continue; + a[i] = bases[j]; + motif_mut_hash[a.join("")] = 1; + } + } + } + } + + function process_rm_line(bb, lines) { + var h = {}; + if (lines.length == 0) return; + var key = lines[0][4]; + if (bb[key] == null) throw Error("ERROR: missing key: " + key); + var h = bb[key][1]; + for (var i = 0; i < lines.length; ++i) { + var t = lines[i]; + var st = parseInt(t[5]) - 1, en = parseInt(t[6]); + if (h[t[10]] == null) h[t[10]] = []; + h[t[10]].push([st, en]); + } + } + + file = new File(fn_rmout); + var lines = []; + while (file.readline(buf) >= 0) { + var line = buf.toString(); + var l2 = line.replace(/^\s+/, ""); + var m4, t = l2.split(/\s+/); + if (t.length < 15) continue; + if (t[9] == "ALR/Alpha") t[10] = "alpha"; + else if (t[9] == "HSATII") t[10] = "hsat2/3"; + else if (/^LTR\/ERV/.test(t[10])) t[10] = 'LTR/ERV'; + else if (/^LTR/.test(t[10])) t[10] = 'LTR/misc'; + else if (/^DNA/.test(t[10])) t[10] = 'DNA/misc'; + else if (/rRNA|scRNA|snRNA|srpRNA/.test(t[10])) t[10] = 'RNAmisc'; + else if (/^LINE/.test(t[10]) && t[10] != "LINE/L1") t[10] = 'LINE/misc'; + else if ((t[10] == "Simple_repeat" || t[10] == "Satellite") && ((m4 = /^\(([ACGT]+)\)n/.exec(t[9])) != null)) { + if (motif_hash[m4[1]] != null) { + t[10] = "hsat2/3"; + } else if (m4[1].length % motif0.length == 0) { + var c = 0, c_mut = 0; + for (var j = 0; j < m4[1].length; j += motif0.length) { + var s = m4[1].substr(j, j + motif0.length); + if (motif_hash[s] != null) + ++c; + else if (motif_mut_hash[s] != null) + ++c_mut; + } + if (c > 0 && (c + c_mut) * motif0.length == m4[1].length) + t[10] = "hsat2/3"; + } + } + + if (mobile) { + if (t[10] == "LINE/L1" && t[9] == "L1HS" && parseFloat(t[1]) < max_mobile_div) t[10] = "LINE/L1HS"; + if (t[10] == "SINE/Alu" && /^AluY/.test(t[9]) && parseFloat(t[1]) < max_mobile_div) t[10] = "SINE/AluY"; + } + if (t[10] == 'Simple_repeat' || t[10] == 'Low_complexity') t[10] = 'LCR'; + if (t[10] != 'LCR') { + // if (parseInt(t[0]) < min_rm_sc) continue; + // if (parseInt(t[1])/100 > min_rm_div) continue; + } + if (lines.length > 0 && lines[0][4] != t[4]) { + process_rm_line(bb, lines); + lines = []; + } + lines.push(t); + } + if (lines.length > 0) process_rm_line(bb, lines); + file.close(); + + for (var i = 0; i < bba.length; ++i) { + var h = bb[bba[i]][1], a = [], b = [], c_alu = [], c_l1 = []; + for (var key in h) { + if (/^(DNA|SINE|LINE|Retroposon|LTR)/.test(key)) + for (var j = 0; j < h[key].length; ++j) + a.push(h[key][j]); + if (/^(Satellite|hsat2\/3|alpha)/.test(key)) + for (var j = 0; j < h[key].length; ++j) + b.push(h[key][j]); + if (/^(SINE\/Alu)/.test(key)) + for (var j = 0; j < h[key].length; ++j) + c_alu.push(h[key][j]); + if (/^(LINE\/L1)/.test(key)) + for (var j = 0; j < h[key].length; ++j) + c_l1.push(h[key][j]); + } + if (a.length) h['_inter'] = a; + if (b.length) h['_sat'] = b; + if (c_alu.length) h['_alu'] = c_alu; + if (c_l1.length) h['_l1'] = c_l1; + } + } + + if (fn_etrf) { // parse etrf output + file = new File(fn_etrf); + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + var l = parseInt(t[4]); + if (l == 1) continue; + var anno = l <= micro_cap? 'micro' : 'mini'; + if (bb[t[0]][1][anno] == null) + bb[t[0]][1][anno] = []; + var st = parseInt(t[1]), en = parseInt(t[2]); + bb[t[0]][1][anno].push([st, en]); + if (bb[t[0]][1]['LCR'] == null) + bb[t[0]][1]['LCR'] = []; + bb[t[0]][1]['LCR'].push([st, en]); + } + file.close(); + } + + if (fn_dust) { // parse minimap2/sdust output + file = new File(fn_dust); + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + var anno = 'LCR'; + if (bb[t[0]][1][anno] == null) + bb[t[0]][1][anno] = []; + bb[t[0]][1][anno].push([parseInt(t[1]), parseInt(t[2])]); + } + file.close(); + } + + if (fn_paf) { // parse bubble-to-reference PAF for self alignment + file = new File(fn_paf); + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + var anno = 'self'; + if (bb[t[0]][1][anno] == null) + bb[t[0]][1][anno] = []; + bb[t[0]][1][anno].push([parseInt(t[2]), parseInt(t[3])]); + } + file.close(); + } + + if (fn_gap) { // parse assembly gaps, generated by "seqtk gap" + file = new File(fn_gap); + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + var anno = 'gap'; + if (bb[t[0]][1][anno] == null) + bb[t[0]][1][anno] = []; + bb[t[0]][1][anno].push([parseInt(t[1]), parseInt(t[2])]); + } + file.close(); + } + + if (fn_centro) { + file = new File(fn_centro); + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + var anno = t[3] == '1'? 'hsat2/3' : 'alpha'; + if (bb[t[0]][1][anno] == null) + bb[t[0]][1][anno] = []; + var st = parseInt(t[1]), en = parseInt(t[2]); + if (en - st >= min_centro_len) + bb[t[0]][1][anno].push([st, en]); + } + file.close(); + } + + for (var i = 0; i < bba.length; ++i) { + var m, key = bba[i], h = bb[key][1], len = bb[key][0]; + if ((m = /^(\S+)_(\d+)_(\d+)/.exec(key)) == null) + throw("Bug!"); + var x = {}, t = [m[1], m[2], m[3]]; + if (fn_bb) t.push(bb[key][2], bb[key][3], bb[key][4], bb[key][5], bb[key][6], bb[key][7], bb[key][8], bb[key][9]); + else t.push(len); + for (var c in h) { // calculated the merged length of each feature + var s, st = 0, en = 0, cov = 0; + s = h[c].sort(function(a, b) { return a[0] - b[0]; }); + for (var j = 0; j < s.length; ++j) { + if (s[j][0] > en) { + cov += en - st; + st = s[j][0], en = s[j][1]; + } else en = en > s[j][1]? en : s[j][1]; + } + cov += en - st; + if (cov >= min_feat_len) + x[c] = cov; + } + var type = "none"; + var max = 0, max2 = 0, max_c2 = null, max_c = null, sum = 0, sum_misc = 0; + var lcr = x['LCR'] == null? 0 : x['LCR']; + var self_len = x['self'] == null? 0 : x['self']; + for (var c in x) { + if (c == 'LCR' || c == 'self') continue; + if (c[0] == '_') continue; + sum += x[c]; + if (c != 'mini' && c != 'micro') sum_misc += x[c]; + if (max < x[c]) max2 = max, max_c2 = max_c, max = x[c], max_c = c; + else if (max2 < x[c]) max2 = x[c], max_c2 = c; + } + if (max >= len * 0.7) { + type = max_c; + } else if (lcr >= len * 0.7) { + type = 'lcr'; + if (max_c == 'mini' || max_c == 'micro') { + var y = x['mini'] == null? 0 : x['mini']; + y += x['micro'] == null? 0 : x['micro']; + if (max >= y * 0.7) type = max_c; + } + } else if ((max_c == 'mini' || max_c == 'micro') && max2 < max * 0.1) { + type = max_c; + } else if (x['_alu'] != null && x['_alu'] >= len * 0.7) { + type = 'SINE/Alu'; + } else if (x['_l1'] != null && x['_l1'] >= len * 0.7) { + type = 'LINE/L1'; + } else if (x['_inter'] != null && x['_inter'] >= len * 0.7) { + type = 'inter'; + } else if (x['_sat'] != null && x['_sat'] >= len * 0.5) { + type = 'Satellite'; + } else if (sum_misc + lcr >= len * 0.7) { + type = 'mixed'; + } else if (sum + lcr > len * 0.05) { + type = 'partial'; + } else if (self_len >= len * 0.5) { + type = 'self'; + } + if ((type == 'partial' || type == 'self' || type == 'none' || type == 'mixed') && fn_bb && t[8] >= 1000 && t[9] >= t[8] * min_segdup_frac) + type = 'segdup'; + t.push(type); + for (var c in x) + t.push(c + ':' + x[c]); + print(t.join("\t")); + } + + buf.destroy(); +} + +function mg_classify_repeat(anno) { + var type; + if (anno == "mini") type = "11_VNTR"; + else if (anno == "micro") type = "12_STR"; + else if (anno == "lcr") type = "13_Other-LCR"; + else if (anno == "LINE/L1" || anno == "LINE/L1HS") type = "02_L1"; + else if (anno == "SINE/Alu" || anno == "SINE/AluY") type = "01_Alu"; + else if (anno == "Retroposon/SVA") type = "03_SVA"; + else if (anno == "LTR/ERV") type = "04_ERV"; + else if (anno == "inter" || /^(DNA|LINE|SINE|LTR)/.test(anno)) type = "05_Other-TE"; + else if (/^Satellite/.test(anno) || anno == "alpha" || anno == "hsat2/3" || anno == "_sat") type = "10_Satellite"; + else if (anno == "self" || anno == "none") type = "30_Low-repeat"; + else if (anno == "mixed") type = "20_Other-repeat"; + else if (anno == "segdup") type = "21_SegDup"; + else if (anno == "partial") type = "30_Low-repeat"; + else type = "20_Other-repeat"; + return type; +} + +function mg_cmd_anno2tbl(args) +{ + var segdup_ratio = 0.7; + var buf = new Bytes(); + var file = args.length == 0? new File() : new File(args[0]); + var h = {}; + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + for (var i = 1; i <= 7; ++i) t[i] = parseInt(t[i]); + //if (t[5]) continue; + if (t[11] == "gap") continue; + if (/chrUn|_random/.test(t[0])) continue; + var na = t[4] < 4? t[4] : 4; + var key = mg_classify_repeat(t[11]); + if (h[key] == null) h[key] = [0, null, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + ++h[key][na]; + h[key][na+3] += t[7]; + if (t[8] >= 0 && t[10] >= 0) h[key][na+6] += t[8] - t[10]; + } + + file.close(); + buf.destroy(); + + for (var key in h) { + var label = key.replace(/^[0-9]+_/, ""); + print(key, label, h[key].slice(2).join("\t")); + } +} + +function mg_cmd_paf2bl(args) +{ + var c, min_de = 0.01, max_de = 0.1, sub_de = 0.002, min_mapq = 5, min_len = 500, is_sub = false; + while ((c = getopt(args, "d:s")) != null) { + if (c == 'd') min_de = parseFloat(getopt.arg); + else if (c == 's') is_sub = true; + } + if (args.length - getopt.ind < 1) { + print("Usage: mgutils.js paf2bl "); + print("Note: bedtk sub <(mgutils.js paf2bl ins.paf; cat bl100.bed) <(../mgutils.js paf2bl -s ins.paf) | bedtk merge"); + return; + } + var file = new File(args[getopt.ind]); + var buf = new Bytes(); + while (file.readline(buf) >= 0) { + var line = buf.toString(); + var m, t = line.split("\t"); + if (/\ttp:A:[SI]/.test(line)) continue; + if (parseInt(t[11]) < min_mapq) continue; + if (parseInt(t[10]) < min_len) continue; + if ((m = /\tde:f:(\S+)/.exec(line)) == null) continue; + var de = parseFloat(m[1]); + if (is_sub) { + if (de > sub_de) continue; + } else { + if (de < min_de || de > max_de) continue; + } + print(t[5], t[7], t[8]); + //print(line); + } + buf.destroy(); + file.close(); +} + +function mg_cmd_stableGaf(args) +{ + var c; + while ((c = getopt(args, "")) != null) { + } + if (args.length - getopt.ind < 1) { + print("Usage: mgutils.js stableGaf "); + return; + } + + var re = /\t(LN|SN|SO|SR):[Zi]:(\S+)/g; + var file, buf = new Bytes(); + + var pri_len = {}, segh = {}; + file = new File(args[getopt.ind]); + while (file.readline(buf) >= 0) { + var m, line = buf.toString(); + if ((m = /^S\t(\S+)\t(\S+)(\t.*)/.exec(line)) == null) continue; + var seg = m[1], len = m[2] == '*'? 0 : m[2].length, tags = m[3]; + var sn = null, so = -1, sr = -1; + while ((m = re.exec(tags)) != null) { + if (m[1] == "LN") len = parseInt(m[2]); + else if (m[1] == "SN") sn = m[2]; + else if (m[1] == "SO") so = parseInt(m[2]); + else if (m[1] == "SR") sr = parseInt(m[2]); + } + if (sn == null || so < 0 || sr < 0 || len <= 0) + throw Error("failed to parse tags '" + tags + "'"); + segh[seg] = [sn, so, so + len, sr]; + if (sr == 0) { + if (pri_len[sn] == null) pri_len[sn] = 0; + pri_len[sn] = pri_len[sn] > so + len? pri_len[sn] : so + len; + } + } + file.close(); + + re = /([><])([^\s><]+)/g; + file = args.length - getopt.ind < 2? new File() : new File(args[getopt.ind+1]); + while (file.readline(buf) >= 0) { + var m, line = buf.toString(); + if ((m = /^(\S+)\t(\d+\t\d+\t\d+)\t([+-])\t(\S+)\t(\d+)\t(\d+)\t(\d+)\t(.*)/.exec(line)) == null) + continue; + var s, a = []; + while ((s = re.exec(m[4])) != null) { + if (segh[s[2]] == null) + throw Error("failed to find segment '" + s[2] + "'"); + var h = segh[s[2]], add_new = true; + if (a.length) { + var b = a[a.length - 1]; + if (b[0] == s[1] && h[3] == b[4] && h[0] == b[1]) { + if (b[0] == '>') { + if (h[1] == b[3]) b[3] = h[2], add_new = false; + } else { + if (h[2] == b[2]) b[2] = h[1], add_new = false; + } + } + } + if (add_new) a.push([s[1], h[0], h[1], h[2], h[3]]); + } + var path_len = 0, path = ""; + for (var i = 0; i < a.length; ++i) + path_len += a[i][3] - a[i][2]; + if (path_len != parseInt(m[5])) + throw Error("inconsistent path length for '" + m[1] + "': " + path_len + "!=" + m[5]); + if (a.length == 1 && pri_len[a[0][1]] != null) { + m[6] = parseInt(m[6]); + m[7] = parseInt(m[7]); + if (a[0][0] == '>') { + m[6] += a[0][2], m[7] += a[0][2]; + } else { + m[3] = m[3] == '+'? '-' : '+'; + var st = a[0][2] + (path_len - 1 - m[7]); + var en = a[0][2] + (path_len - 1 - m[6]); + m[6] = st, m[7] = en; + } + path_len = pri_len[a[0][1]]; + path = a[0][1]; + } else { + var b = []; + for (var i = 0; i < a.length; ++i) + b.push(a[i][0] + a[i][1] + ':' + a[i][2] + '-' + a[i][3]); + path = b.join(""); + } + print(m[1], m[2], m[3], path, path_len, m[6], m[7], m[8]); + } + file.close(); + buf.destroy(); +} + +function mg_cmd_subgaf(args) // FIXME: this is BUGGY!!! +{ + if (args.length < 2) { + print("Usage: mgutils.js subgaf "); + exit(1); + } + + var m, ctg, st, en; + if ((m = /^(\S+):(\S+)-(\S+)/.exec(args[1])) != null) + ctg = m[1], st = parseInt(m[2]), en = parseInt(m[3]); + + var buf = new Bytes(); + var file = new File(args[0]); + var re = /([><])([^\s><]+):(\d+)-(\d+)/g; + + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + var l = parseInt(t[6]), s = parseInt(t[7]), e = parseInt(t[8]); + var regs = []; + if (t[5][0] == '>' || t[5][0] == '<') { + var m, x = 0; + //print(buf); + while ((m = re.exec(t[5])) != null) { + var a = parseInt(m[3]), b = parseInt(m[4]), c = b - a; + if (x == 0) { + if (b - a <= s) throw Error("Inconsistent!"); + a += s; + } + if (x + c == l) b -= l - e; + //print(m[2], a, b); + regs.push([m[2], a, b]); + x += c; + } + } else { + regs.push([t[5], s, e]); + } + var hit = false; + for (var i = 0; i < regs.length; ++i) { + if (regs[i][0] == ctg && regs[i][2] > st && en > regs[i][1]) + hit = true; + } + if (hit) print(buf); + } + + file.close(); + buf.destroy(); +} + +function mg_cmd_sveval(args) +{ + var c, flank = 100, min_var_len = 100, min_test_len = 50, min_sc = 20.0, non_chr = false, out_err = false, flt_vcf = false; + while ((c = getopt(args, "f:v:t:s:aeF")) != null) { + if (c == 'f') flank = parseInt(getopt.arg); + else if (c == 'v') min_var_len = parseInt(getopt.arg); + else if (c == 't') min_test_len = parseInt(getopt.arg); + else if (c == 's') min_sc = parseFloat(getopt.arg); + else if (c == 'a') non_chr = true; + else if (c == 'e') out_err = true; + else if (c == 'F') flt_vcf = true; + } + if (args.length - getopt.ind < 3) { + print("Usage: mgutils.js sveval "); + print("Options:"); + print(" -f INT length of flanking regions [" + flank + "]"); + print(" -v INT min INDEL length [" + min_var_len + "]"); + print(" -t INT min true INDEL length [" + min_test_len + "]"); + print(" -s INT min called score [" + min_sc + "]"); + print(" -e print errors"); + exit(1); + } + + var file, buf = new Bytes(); + + // parse true.bed + warn("Reading confident regions..."); + var bed = {} + file = new File(args[getopt.ind + 1]); + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + if (t.length < 3) continue; + if (!non_chr && /^(chr)?[XY]$/.test(t[0])) continue; + if (bed[t[0]] == null) bed[t[0]] = []; + bed[t[0]].push([parseInt(t[1]), parseInt(t[2])]); + } + file.close(); + for (var ctg in bed) it_index(bed[ctg]); + + // parse true.vcf + warn("Reading baseline variants..."); + var vcf = {}, n_vcf = 0; + file = new File(args[getopt.ind]); + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + if (t[0][0] == '#') continue; + if (t.length < 10) continue; + var flt = (t[6] != '.' && t[6] != 'PASS'); + if (flt_vcf && flt) continue; + if (bed[t[0]] == null) continue; + var ref = t[3]; + var st = parseInt(t[1]) - 1; + var en = st + ref.length; + var max_diff = 0; + var al = t[4].split(","); + al.unshift(ref); + for (var i = 1; i < al.length; ++i) { + var l = al[i].length - ref.length; + if (l < 0) l = -l; + if (max_diff < l) max_diff = l; + } + if (max_diff < min_test_len) continue; + var s = t[9].split(':'); + if (s.length == 0) continue; + var gt = s[0].split(/[|\/]/); + if (gt == 0) continue; + var max_ev = 0; + max_diff = 0; + for (var i = 0; i < gt.length; ++i) { + if (gt[i] == '.') continue; + var x = parseInt(gt[i]); + var l = al[x].length - ref.length; + var x = l > 0? l : -l; + if (max_diff < x) max_diff = x, max_ev = l; + } + if (max_diff < min_test_len) continue; + if (vcf[t[0]] == null) vcf[t[0]] = []; + vcf[t[0]].push([st, en, -1, max_diff, max_ev, flt, s[0]]); + } + file.close(); + for (var ctg in vcf) it_index(vcf[ctg]); + + // parse rst.txt + warn("Reading gt results..."); + var rst = {}; + file = new File(args[getopt.ind + 2]); + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + if (parseFloat(t[3]) < min_sc) continue; + if (bed[t[0]] == null) continue; + if (rst[t[0]] == null) rst[t[0]] = []; + var ref_len = t[7] == '*'? 0 : t[7].length; + var max_diff = 0, max_ev = 0; + for (var i = 8; i < t.length; ++i) { + var alt_len = t[i] == '*'? 0 : t[8].length; + var l = alt_len - ref_len; + var x = l > 0? l : -l; + if (max_diff < x) max_diff = x, max_ev = l; + } + var st = parseInt(t[1]), en = parseInt(t[2]); + rst[t[0]].push([st, en, -1, max_diff, max_ev]); + } + file.close(); + for (var ctg in rst) it_index(rst[ctg]); + + // sensitivity + var n_vcf = [0, 0, 0], fn = [0, 0, 0]; + for (var ctg in vcf) { + for (var i = 0; i < vcf[ctg].length; ++i) { + var v = vcf[ctg][i]; + if (v[3] < min_var_len) continue; + if (v[5]) continue; + var st = v[0] - flank, en = v[1] + flank; + if (st < 0) st = 0; + if (!it_contained(bed[ctg], st, en)) continue; + var sub = v[4] < 0? 1 : 2; + ++n_vcf[0], ++n_vcf[sub]; + var b = it_overlap(rst[ctg], st, en); + if (b.length == 0) { + if (out_err) print("FN", ctg, v[0], v[1], v[4], v[6]); + ++fn[0], ++fn[sub]; + } + } + } + + // specificity + var n_rst = [0, 0, 0], fp = [0, 0, 0]; + for (var ctg in rst) { + for (var i = 0; i < rst[ctg].length; ++i) { + var v = rst[ctg][i]; + if (v[3] < min_var_len) continue; + var st = v[0] - flank, en = v[1] + flank; + if (st < 0) st = 0; + if (!it_contained(bed[ctg], st, en)) continue; + var sub = v[4] < 0? 1 : 2; + ++n_rst[0], ++n_rst[sub]; + var b = it_overlap(vcf[ctg], st, en); + if (b.length == 0) { + if (out_err) print("FP", ctg, v[0], v[1], v[4]); + ++fp[0], ++fp[sub]; + } + } + } + + print("NA", fn[0], n_vcf[0], (fn[0]/n_vcf[0]).toFixed(4)); + print("ND", fn[1], n_vcf[1], (fn[1]/n_vcf[1]).toFixed(4)); + print("NI", fn[2], n_vcf[2], (fn[2]/n_vcf[2]).toFixed(4)); + print("PA", fp[0], n_rst[0], (fp[0]/n_rst[0]).toFixed(4)); + print("PD", fp[1], n_rst[1], (fp[1]/n_rst[1]).toFixed(4)); + print("PI", fp[2], n_rst[2], (fp[2]/n_rst[2]).toFixed(4)); +} + +function mg_cmd_extractseg(args) +{ + function process(ctg, first, last, is_end) { + if (ctg == null || first[0] == null || first[1] == null) return; + if (first[0][7] == first[1][7]) return; + if (first[0][7] < first[1][7]) { + if (last[0][7] >= first[1][7]) return; + if (is_end) print(ctg, last[0][8], first[1][7], '*', 0, '+'); + else print(ctg, last[0][7], first[1][8], '*', 0, '+'); + } else { + if (last[1][7] >= first[0][7]) return; + if (is_end) print(ctg, last[1][8], first[0][7], '*', 0, '-'); + else print(ctg, last[1][7], first[0][8], '*', 0, '-'); + } + } + + var c, min_len = 100000, is_end = false; + while ((c = getopt(args, "el:")) != null) { + if (c == 'l') min_len = parseInt(getopt.arg); + else if (c == 'e') is_end = true; + } + if (args.length - getopt.ind < 3) { + print("Usage: mgutils.js extractseg [...]"); + return; + } + + var seg = [args[getopt.ind], args[getopt.ind+1]]; + var buf = new Bytes(); + for (var i = getopt.ind + 2; i < args.length; ++i) { + var file = new File(args[i]); + var flt = false; + var first = [null, null], last = [null, null], ctg = null; + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + if (t[0] != "*") { + process(ctg, first, last, is_end); + flt = (parseInt(t[3]) - parseInt(t[2]) < min_len || parseInt(t[8]) - parseInt(t[7]) < min_len); + first = [null, null]; + last = [null, null]; + ctg = t[0]; + } else if (!flt) { + var s = t[1].substr(1); + t[7] = parseInt(t[7]), t[8] = parseInt(t[8]); + if (s == seg[0] && t[3] != '0') { + if (first[0] == null) first[0] = t.slice(0); + last[0] = t.slice(0); + } else if (s == seg[1] && t[3] != '0') { + if (first[1] == null) first[1] = t.slice(0); + last[1] = t.slice(0); + } + } + } + process(ctg, first, last, is_end); + file.close(); + } + buf.destroy(); +} + +function mg_cmd_bed2sql(args) +{ + var c; + while ((c = getopt(args, "")) != null) { + } + if (args.length - getopt.ind == 0) { + print("Usage: paste *.bed | mgutils.js bed2sql | sqlite3 rGFA.db"); + return; + } + + var file, buf = new Bytes(); + + var sample = []; + file = new File(args[getopt.ind]); + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + sample.push(t[0]); + } + file.close(); + + file = args.length - getopt.ind >= 2 && args[getopt.ind+1] != "-"? new File(args[getopt.ind+1]) : new File(); + print("DROP INDEX IF EXISTS idx_bwalk;"); + print("DROP INDEX IF EXISTS idx_cst;"); + print("DROP INDEX IF EXISTS idx_cen;"); + print("BEGIN TRANSACTION;"); + var wid = 0, bid = 0, ins_walk = []; + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + if (t.length != sample.length * 6) + throw Error("Different number of samples"); + var h = {}, w = [], j = 0; + for (var i = 5; i < t.length; i += 6, ++j) { + if (t[i] == ".") continue; + var s = t[i].split(":"); + if (!(s[0] in h)) { + h[s[0]] = w.length; + ins_walk.push([wid, bid, s[1], s[0]]); + w.push([s[0], s[1], wid++]); + } + var v = [], x = w[h[s[0]]]; + v.push("'" + bid + "'", "'" + sample[j] + "'", "'" + x[2] + "'", "'" + s[3] + "'"); + v.push("'" + s[4] + "'", "'" + s[5] + "'", "'" + (s[2] == '+'? 1 : -1) + "'"); + print("INSERT INTO call (bid,sample,wid,ctg,start,end,strand) VALUES (" + v.join(",") + ");"); + } + ++bid; + } + for (var i = 0; i < ins_walk.length; ++i) { + var w = ins_walk[i], v = []; + for (var j = 0; j < w.length; ++j) + v.push("'" + w[j] + "'"); + print("INSERT INTO bwalk (wid,bid,len,walk) VALUES (" + v.join(",") + ");"); + } + print("END TRANSACTION;"); + print("CREATE INDEX IF NOT EXISTS idx_bwalk ON bwalk (bid);"); + print("CREATE INDEX IF NOT EXISTS idx_cst ON call (ctg, start);"); + print("CREATE INDEX IF NOT EXISTS idx_cen ON call (ctg, end);"); + file.close(); + + buf.destroy(); +} + +function mg_cmd_merge(args) +{ + var c, fn_anno = null, fn_sample = null; + while ((c = getopt(args, "a:s:")) != null) { + if (c == 'a') fn_anno = getopt.arg; + else if (c == 's') fn_sample = getopt.arg; + } + if (args.length - getopt.ind == 0) { + print("Usage: paste *.bed | mgutils.js merge -"); + print("Options:"); + print(" -a FILE annotation [null]"); + print(" -s FILE list of samples [null]"); + return; + } + + var file, buf = new Bytes(); + var anno = {}; + if (fn_anno) { + file = new File(fn_anno); + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + var key = [t[0], t[1], t[2]].join("_"); + anno[key] = t[11]; + } + file.close(); + } + var hdr = ["#CHROM", "START", "END", "INFO", "FORMAT"]; + if (fn_sample) { + file = new File(fn_sample); + while (file.readline(buf) >= 0) { + var t = buf.toString().split(/\s+/); + hdr.push(t[0]); + } + file.close(); + } + file = args[getopt.ind] == "-"? new File() : new File(args[getopt.ind]); + print('##INFO='); + print('##INFO='); + print('##INFO='); + print('##INFO='); + print('##INFO='); + print('##INFO='); + print('##INFO='); + print('##INFO='); + print('##FORMAT='); + print('##FORMAT='); + print('##FORMAT='); + print('##FORMAT='); + print('##FORMAT='); + print(hdr.join("\t")); + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + var a = [t[0], t[1], t[2], "", "GT:CSTRAND:CTG:CS:CE"]; + var ah = {}, aa = [], b = [], ns = 0; + for (var j = 5; j < t.length; j += 6) { + if (t[j] == ".") { + b.push(["."]); + continue; + } + ++ns; + var s = t[j].split(":"); + if (ah[s[0]] == null) { + ah[s[0]] = aa.length; + aa.push({walk:s[0], len:s[1], cnt:0}); + } + var k = ah[s[0]]; + ++aa[k].cnt; + s[0] = k; + b.push(s); + } + for (var i = 0; i < aa.length; ++i) + aa[i].i = i; + aa.sort(function(a,b) { return b.cnt - a.cnt }); + var i2a = [], alen = [], awalk = [], ac = []; + for (var i = 0; i < aa.length; ++i) { + i2a[aa[i].i] = i; + alen[i] = aa[i].len; + awalk[i] = aa[i].walk; + ac[i] = aa[i].cnt; + } + for (var j = 0; j < b.length; ++j) { + if (b[j][0] != ".") { + var i = b[j].shift(); + b[j][0] = i2a[i]; + a.push(b[j].join(":")); + } else a.push("."); + } + var info = ["NS="+ns, "NA="+aa.length, "ALEN="+alen.join(","), "AC="+ac.join(",")]; + var key = [t[0], t[1], t[2]].join("_"); + if (anno[key] != null) info.push("ANNO="+anno[key]); + info.push("VS="+t[3], "VE="+t[4], "AWALK="+awalk.join(",")); + a[3] = info.join(";"); + print(a.join("\t")); + } + buf.destroy(); + file.close(); +} + +function mg_cmd_merge2vcf(args) { + var buf = new Bytes(); + var file = args.length == 0? new File() : new File(args[0]); + print("##fileformat=VCFv4.2"); + print('##ALT='); + print('##FORMAT='); + while (file.readline(buf) >= 0) { + var line = buf.toString(); + if (/^##/.test(line)) { + print(line); + continue; + } + var a, t = line.split("\t"); + if (line[0] == "#") { + a = ["#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT"]; + for (var i = 5; i < t.length; ++i) + a.push(t[i]); + } else { + a = [t[0], t[1], ".", "N", "", 30, "PASS", t[3] + ";END=" + t[2], "GT:GT0"]; + for (var i = 5; i < t.length; ++i) { + var s = t[i].split(":"); + if (s[0] == ".") a.push(s[0]); + else if (s[0] == "0") a.push("0:0"); + else a.push("1:" + s[0]); + } + } + print(a.join("\t")); + } + file.close(); + buf.destroy(); +} + +function mg_cmd_segfreq(args) { + var c, min_af = 0.05; + while ((c = getopt(args, "f:")) != null) { + if (c == 'f') min_af = parseFloat(getopt.arg); + } + if (args.length - getopt.ind < 2) { + print("Usage: mgutils.js segfreq [-f minFreq=0.05] [bubble.bed]"); + return 1; + } + var file, buf = new Bytes(); + + file = new File(args[getopt.ind]); + var h = {}, a = []; + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + h[t[3]] = a.length; + a.push([t[0], t[1], t[2], t[3], parseInt(t[4]), 0, 0, "N/A", "N/A", 0]); + } + file.close(); + + var re_info = /([^\s=;]+)=([^\s=;]+)/g; + var re_walk = /([><])([^\s><]+)/g; + var bb = {}; + file = new File(args[getopt.ind+1]); + while (file.readline(buf) >= 0) { + var m, t = buf.toString().split("\t", 4); + if (t[0][0] == "#") continue; + var anno = null, ac = null, walk = null; + while ((m = re_info.exec(t[3])) != null) { + if (m[1] == "ANNO") anno = m[2]; + else if (m[1] == "AWALK") walk = m[2].split(","); + else if (m[1] == "AC") { + ac = m[2].split(","); + for (var i = 0; i < ac.length; ++i) + ac[i] = parseInt(ac[i]); + } + } + if (ac == null || walk == null) throw Error("Missing AC or AWALK"); + if (ac.length != walk.length) throw Error("Inconsistent AC or AWALK"); + if (anno == null) anno = "N/A"; + bb[t[0]+"_"+t[1]+"_"+t[2]] = anno; + var ns = 0; + for (var i = 0; i < walk.length; ++i) + ns += ac[i]; + var dup = {}; + for (var i = 0; i < walk.length; ++i) { + if (walk[i] == "*") continue; + while ((m = re_walk.exec(walk[i])) != null) { + var s = m[2]; + if (h[s] == null) throw Error("Missing segment " + s); + if (dup[s]) continue; + dup[s] = 1; + var b = a[h[s]]; + b[5] = ns; + b[6] += ac[i]; + b[7] = anno; + b[8] = mg_classify_repeat(anno); + b[9] = walk.length; + } + } + } + file.close(); + + if (args.length - getopt.ind >= 3) { + file = new File(args[getopt.ind+2]); + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + var s = t[11].split(","); + var anno = bb[t[0]+"_"+t[1]+"_"+t[2]]; + if (anno == null) throw Error("Missing bubble"); + for (var i = 1; i < s.length - 1; ++i) { + if (h[s[i]] == null) throw Error("Inconsistent bubble file"); + var b = a[h[s[i]]]; + b[10] = t[0], b[11] = t[1], b[12] = t[2]; + b[7] = anno; + b[8] = mg_classify_repeat(anno); + } + } + file.close(); + } + + buf.destroy(); + + var replen = {}; + for (var i = 0; i < a.length; ++i) { + print(a[i].join("\t")); + var anno = a[i][8], len = parseInt(a[i][2]) - parseInt(a[i][1]); + if (a[i][4] > 0 && a[i][5] > 0 && a[i][6] >= a[i][5] * min_af) { + if (replen[anno] == null) replen[anno] = [0, 0, 0]; + if (a[i][9] == 2) replen[anno][0] += len; + else if (a[i][9] == 3) replen[anno][1] += len; + else if (a[i][9] > 3) replen[anno][2] += len; + } + } + for (var x in replen) { + var y = x.replace(/^\d+_/, ""); + warn(x, y, replen[x].join("\t")); + } +} + +function mg_cmd_genecopy(args) +{ + var c, opt = { min_cov:0.8, min_rel_cov:0.85, max_prev_ovlp:0.5, mm:4, gapo:5 }; + while ((c = getopt(args, "c:r:")) != null) { + if (c == 'c') opt.min_cov = parseFloat(getopt.arg); + else if (c == 'r') opt.min_rel_cov = parseFloat(getopt.arg); + } + if (args.length - getopt.ind < 2) { + print("Usage: mgutils.js genecopy [options] "); + print("Options:"); + print(" -c FLOAT min coverage [" + opt.min_cov + "]"); + print(" -r FLOAT min relative coverage [" + opt.min_rel_cov + "]"); + return; + } + var re_cg = /(\d+)([MIDNSHP=X])/g; + var re_walk = /([><])([^\s><]+):(\d+)-(\d+)/g; + var file, buf = new Bytes(); + + var src = {}; + file = new File(args[getopt.ind+1]); + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + src[t[3]] = [t[0], parseInt(t[1]), parseInt(t[2]), t[5] == '+'? 1 : -1]; + } + file.close(); + + file = new File(args[getopt.ind]); + var gene = {}, reg = {}; + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + + // check coverage + if (/\|([A-Z]+\d*\.\d+|ENSG\d+)$/.test(t[0])) continue; + for (var i = 1; i <= 3; ++i) t[i] = parseInt(t[i]); + for (var i = 6; i <= 11; ++i) t[i] = parseInt(t[i]); + if (t[3] - t[2] < t[1] * opt.min_cov) continue; + if (gene[t[0]] != null) { + var g0 = gene[t[0]][0]; + if (t[3] - t[2] < (g0[2] - g0[1]) * opt.min_rel_cov) + continue; + } + + // compute de + var m, cg = null; + for (var i = 12; i < t.length; ++i) { + if (t[i].substr(0, 4) == "cg:Z") + cg = t[i].substr(5); + } + if (cg == null) throw Error("no cg"); + var blen = 0, mlen = 0, sc = 0; + while ((m = re_cg.exec(cg)) != null) { + var len = parseInt(m[1]); + if (m[2] == '=') mlen += len, blen += len, sc += len; + else { + ++blen; + if (m[2] == '*') sc -= opt.mm; + else sc -= opt.gapo + len; + } + } + var de = (blen - mlen) / blen; + + // find intervals + var intv = []; + if (t[5][0] == '>' || t[5][0] == '<') { + var len = 0; + while ((m = re_walk.exec(t[5])) != null) { + var st = parseInt(m[3]), en = parseInt(m[4]); + var ss = st, ee = en; + if (t[7] >= len && t[7] < len + en - st) { + if (m[1] == '>') ss = st + t[7]; + else ee = en - t[7]; + } else if (t[8] >= len && t[8] < len + en - st) { + if (m[1] == '>') ee = st + t[8] - len; + else ss = st + t[6] - t[8]; + } + intv.push([m[2], ss, ee, m[1] == '>'? 1 : -1]); + len += en - st; + } + } else intv.push([t[5], t[7], t[8], t[4] == '+'? 1 : -1]); + + // save + if (gene[t[0]] == null) gene[t[0]] = []; + for (var j = 0; j < intv.length; ++j) { + var x = intv[j], pass = true; + if (reg[x[0]] == null) reg[x[0]] = []; + if (src[t[0]] != null) { + var y = src[t[0]]; + if (y[0] == x[0] && y[1] < x[2] && x[1] < y[2]) { + var l = (x[2] < y[2]? x[2] : y[2]) - (x[1] > y[1]? x[1] : y[1]); + if (l > (x[2] - x[1]) * 0.99) pass = false; + } + } + reg[x[0]].push([x[1], x[2], 0, t[0], gene[t[0]].length, pass, x[3]]); + } + gene[t[0]].push([t[1], t[2], t[3], sc, de, intv]); + } + file.close(); + buf.destroy(); + + // preparation + var a = []; + for (var g in gene) { + var x = gene[g]; + for (var i = 0; i < x.length; ++i) + a.push([x[i][3], g, i]); + } + a.sort(function(x,y) { return y[0]-x[0] }); + for (var x in reg) it_index(reg[x]); + + // select + var good_hit = []; + for (var i = 0; i < a.length; ++i) { + var x = a[i]; + var h = gene[x[1]][x[2]]; + var intv = h[5], cov_tot = 0, len_tot = 0, ovlp_gene = {}; + for (var j = 0; j < intv.length; ++j) { + var y = intv[j]; + len_tot += y[2] - y[1]; + if (reg[y[0]] == null) continue; + var st0 = y[1], en0 = y[2]; + var b = it_overlap(reg[y[0]], st0, en0); + var cov_st = 0, cov_en = 0, cov = 0; + for (var k = 0; k < b.length; ++k) { + if (b[k][5] || b[k][6] != y[3]) continue; + ovlp_gene[b[k][3]] = 1; + var st1 = b[k][0] > st0? b[k][0] : st0; + var en1 = b[k][1] < en0? b[k][1] : en0; + if (st1 > cov_en) { + cov += cov_en - cov_st; + cov_st = st1, cov_en = en1; + } else cov_en = cov_en > en1? cov_en : en1; + } + cov += cov_en - cov_st; + cov_tot += cov; + } + var ovlp_gene_arr = []; + for (var y in ovlp_gene) ovlp_gene_arr.push(y); + if (ovlp_gene_arr.length > 0) + print("OG", x[1], x[2], cov_tot, len_tot, ovlp_gene_arr); + if (cov_tot < len_tot * opt.max_prev_ovlp) { + good_hit.push([x[1], x[2]]); + for (var j = 0; j < intv.length; ++j) { + var y = intv[j]; + if (reg[y[0]] == null) continue; + var b = it_overlap(reg[y[0]], y[1], y[2]); + for (var k = 0; k < b.length; ++k) + if (b[k][3] == x[1] && b[k][4] == x[2]) + b[k][5] = false; + } + } + } + + // count good_hit + var out = {}; + for (var g in gene) out[g] = [gene[g].length, 0]; + for (var i = 0; i < good_hit.length; ++i) { + print("GH", good_hit[i][0], gene[good_hit[i][0]][good_hit[i][1]].join("\t")); + ++out[good_hit[i][0]][1]; + } + for (var g in out) + print("GC", g, out[g].join("\t")); +} + +/************************* + ***** main function ***** + *************************/ + +function main(args) +{ + if (args.length == 0) { + print("Usage: mgutils.js [arguments]"); + print("Commands:"); + print(" stableGaf convert unstable GAF to stable GAF"); + print(" renamefa add a prefix to sequence names in FASTA"); + print(" paf2bl blacklist regions from insert-to-ref alignment"); + print(" anno annotate short sequences"); + print(" anno2tbl summarize anno output"); + print(" extractseg extract a segment from GAF"); + print(" merge merge per-sample --call BED"); + print(" merge2vcf convert merge BED output to VCF"); + print(" segfreq compute node frequency from merged calls"); + print(" genecopy gene copy analysis"); + print(" bed2sql generate SQL from --call BED"); + //print(" subgaf extract GAF overlapping with a region (BUGGY)"); + //print(" sveval evaluate SV accuracy"); + exit(1); + } + + var cmd = args.shift(); + if (cmd == 'renamefa') mg_cmd_renamefa(args); + else if (cmd == 'paf2bl') mg_cmd_paf2bl(args); + else if (cmd == 'anno') mg_cmd_anno(args); + else if (cmd == 'anno2tbl') mg_cmd_anno2tbl(args); + else if (cmd == 'subgaf') mg_cmd_subgaf(args); + else if (cmd == 'sveval') mg_cmd_sveval(args); + else if (cmd == 'joinfa') mg_cmd_joinfa(args); + else if (cmd == 'stableGaf') mg_cmd_stableGaf(args); + else if (cmd == 'bed2sql') mg_cmd_bed2sql(args); + else if (cmd == 'extractseg') mg_cmd_extractseg(args); + else if (cmd == 'merge') mg_cmd_merge(args); + else if (cmd == 'merge2vcf') mg_cmd_merge2vcf(args); + else if (cmd == 'segfreq') mg_cmd_segfreq(args); + else if (cmd == 'genecopy') mg_cmd_genecopy(args); + else throw Error("unrecognized command: " + cmd); +} + +main(arguments); diff --git a/options.c b/options.c new file mode 100644 index 0000000..ea73149 --- /dev/null +++ b/options.c @@ -0,0 +1,134 @@ +#include +#include "mgpriv.h" +#include "sys.h" + +void mg_idxopt_init(mg_idxopt_t *io) +{ + memset(io, 0, sizeof(mg_idxopt_t)); + io->k = 17; + io->w = 11; + io->bucket_bits = 14; +} + +void mg_mapopt_init(mg_mapopt_t *mo) +{ + memset(mo, 0, sizeof(mg_mapopt_t)); + mo->seed = 11; + mo->occ_max1 = 50, mo->occ_max1_cap = 250; + mo->occ_max1_frac = 2e-4f; + mo->max_gap = 5000; + mo->max_gap_ref = -1; + mo->max_gap_pre = 1000; + mo->max_lc_skip = 25, mo->max_gc_skip = 25; + mo->max_lc_iter = 5000; + mo->bw = 500, mo->bw_long = 20000; + mo->rmq_size_cap = 100000; + mo->rmq_rescue_size = 1000; + mo->rmq_rescue_ratio = 0.1f; + mo->mini_batch_size = 500000000; + mo->div = 0.1f; + mo->chn_pen_gap = 1.0f, mo->chn_pen_skip = 0.05f; + mo->min_lc_cnt = 5, mo->min_lc_score = 40; + mo->min_gc_cnt = 5, mo->min_gc_score = 50; + mo->gdp_max_ed = 10000; + mo->lc_max_trim = 50; + mo->lc_max_occ = 2; + mo->mask_level = 0.5f; + mo->sub_diff = 6; + mo->best_n = 5; + mo->pri_ratio = 0.8f; + mo->ref_bonus = 0; + mo->pe_ori = 0; // FF + mo->min_cov_mapq = 20; + mo->min_cov_blen = 1000; + mo->cap_kalloc = 1000000000; +} + +void mg_ggopt_init(mg_ggopt_t *go) +{ + memset(go, 0, sizeof(mg_ggopt_t)); + go->algo = MG_G_NONE; + go->flag |= MG_G_NO_QOVLP; + go->min_map_len = 100000; + go->min_depth_len = 20000; + go->min_mapq = 5; + go->min_var_len = 50; + go->match_pen = 10; + // for ggs + go->ggs_shrink_pen = 9; + go->ggs_min_end_cnt = 10; + go->ggs_min_end_frac = 0.1f; + go->ggs_max_iden = 0.80f; + go->ggs_min_inv_iden = 0.95f; +} + +int mg_opt_set(const char *preset, mg_idxopt_t *io, mg_mapopt_t *mo, mg_ggopt_t *go) +{ + if (preset == 0) { + mg_idxopt_init(io); + mg_mapopt_init(mo); + mg_ggopt_init(go); + } else if (strcmp(preset, "lr") == 0) { // this is the default + } else if (strcmp(preset, "asm") == 0 || strcmp(preset, "ggs") == 0) { + io->k = 19, io->w = 10; + mo->flag |= MG_M_RMQ; + mo->occ_max1 = 10, mo->occ_max1_cap = 100; + mo->bw = 1000, mo->bw_long = 150000; + mo->max_gap = 10000, mo->max_gap_pre = 1000; + mo->min_lc_cnt = 5, mo->min_lc_score = 40; + mo->min_gc_cnt = 5, mo->min_gc_score = 1000; + mo->min_cov_mapq = 5; + mo->min_cov_blen = 100000; + mo->max_lc_skip = mo->max_gc_skip = 50; + mo->div = 0.01f; + mo->mini_batch_size = 4000000000LL; + if (strcmp(preset, "ggs") == 0) + go->algo = MG_G_GGSIMPLE, mo->best_n = 0; + } else if (strcmp(preset, "se") == 0 || strcmp(preset, "sr") == 0) { + io->k = 21, io->w = 10; + mo->flag |= MG_M_SR | MG_M_HEAP_SORT | MG_M_2_IO_THREADS; + mo->occ_max1 = 1000; + mo->occ_max1_cap = 2500; + mo->max_gap = 100; + mo->bw = mo->bw_long = 100; + mo->max_frag_len = 800; + mo->pri_ratio = 0.5f; + mo->min_lc_cnt = 2, mo->min_lc_score = 25; + mo->min_gc_cnt = 3, mo->min_gc_score = 40; + mo->mini_batch_size = 50000000; + mo->min_cov_blen = 50; + mo->chn_pen_gap = 0.2f; + mo->ref_bonus = 1; + if (strcmp(preset, "sr") == 0) { + mo->flag |= MG_M_FRAG_MODE | MG_M_FRAG_MERGE; + mo->pe_ori = 0<<1|1; // FR + } + } else return -1; + return 0; +} + +int mg_opt_check(const mg_idxopt_t *io, const mg_mapopt_t *mo, const mg_ggopt_t *go) +{ + if ((mo->flag & MG_M_FRAG_MODE) && !(mo->flag & MG_M_FRAG_MERGE)) { + if (mg_verbose >= 1) + fprintf(stderr, "[ERROR]\033[1;31m the fragment-without-merge mode is not implemented\033[0m\n"); + return -1; + } + return 0; +} + +void mg_opt_update(const mg_idx_t *gi, mg_mapopt_t *mo, mg_ggopt_t *go) +{ + float f[2]; + int32_t q[2]; + f[0] = 0.1f, f[1] = mo->occ_max1_frac; + mg_idx_cal_quantile(gi, 2, f, q); + if (q[0] > mo->lc_max_occ) mo->lc_max_occ = q[0]; + if (mo->lc_max_occ > mo->occ_max1_cap) mo->lc_max_occ = mo->occ_max1_cap; + if (q[1] > mo->occ_max1) mo->occ_max1 = q[1]; + if (mo->occ_max1 > mo->occ_max1_cap) mo->occ_max1 = mo->occ_max1_cap; + if (mo->bw_long < mo->bw) mo->bw_long = mo->bw; + if (mg_verbose >= 3) + fprintf(stderr, "[M::%s::%.3f*%.2f] occ_max1=%d; lc_max_occ=%d\n", __func__, + realtime() - mg_realtime0, cputime() / (realtime() - mg_realtime0), mo->occ_max1, mo->lc_max_occ); +} diff --git a/shortk.c b/shortk.c new file mode 100644 index 0000000..e4ec1eb --- /dev/null +++ b/shortk.c @@ -0,0 +1,251 @@ +#include "mgpriv.h" +#include "ksort.h" +#include "kavl.h" +#include "algo.h" +#include "khashl.h" + +typedef struct sp_node_s { + uint64_t di; // dist<<32 | unique_id + uint32_t v; + int32_t pre; + uint32_t hash; + int32_t is_0; + KAVL_HEAD(struct sp_node_s) head; +} sp_node_t, *sp_node_p; + +#define sp_node_cmp(a, b) (((a)->di > (b)->di) - ((a)->di < (b)->di)) +KAVL_INIT(sp, sp_node_t, head, sp_node_cmp) + +#define sp_node_lt(a, b) ((a)->di < (b)->di) +KSORT_INIT(sp, sp_node_p, sp_node_lt) + +typedef struct { + int32_t k; + int32_t qs, qe; + sp_node_t *p[MG_MAX_SHORT_K]; // this forms a max-heap +} sp_topk_t; + +KHASHL_MAP_INIT(KH_LOCAL, kh_sp_t, sp, uint32_t, sp_topk_t, kh_hash_uint32, kh_eq_generic) +KHASHL_MAP_INIT(KH_LOCAL, kh_sp2_t, sp2, uint32_t, uint64_t, kh_hash_uint32, kh_eq_generic) + +#define MG_SHORT_K_EXT 1000 + +static inline sp_node_t *gen_sp_node(void *km, const gfa_t *g, uint32_t v, int32_t d, int32_t id) +{ + sp_node_t *p; + KMALLOC(km, p, 1); + p->v = v, p->di = (uint64_t)d<<32 | id, p->pre = -1, p->is_0 = 1; + return p; +} + +mg_pathv_t *mg_shortest_k(void *km0, const gfa_t *g, uint32_t src, int32_t n_dst, mg_path_dst_t *dst, int32_t max_dist, int32_t max_k, int32_t *n_pathv) +{ + sp_node_t *p, *root = 0, **out; + sp_topk_t *q; + kh_sp_t *h; + kh_sp2_t *h2; + void *km; + khint_t k; + int absent; + int32_t i, j, n_done, n_found; + uint32_t id, n_out, m_out; + int8_t *dst_done; + mg_pathv_t *ret = 0; + uint64_t *dst_group, *seeds = 0; + void *h_seeds = 0; + mg128_v mini = {0,0,0}; + + if (n_pathv) *n_pathv = 0; + if (n_dst <= 0) return 0; + for (i = 0; i < n_dst; ++i) { // initialize + mg_path_dst_t *t = &dst[i]; + if (t->inner) + t->dist = 0, t->n_path = 1, t->path_end = -1; + else + t->dist = -1, t->n_path = 0, t->path_end = -1; + } + if (max_k > MG_MAX_SHORT_K) max_k = MG_MAX_SHORT_K; + km = (mg_dbg_flag&MG_DBG_NO_KALLOC) && (mg_dbg_flag&MG_DBG_SHORTK)? 0 : km_init2(km0, 0x4000); + + KCALLOC(km, dst_done, n_dst); + KMALLOC(km, dst_group, n_dst); + for (i = 0; i < n_dst; ++i) // multiple dst[] may have the same dst[].v. We need to group them first. + dst_group[i] = (uint64_t)dst[i].v<<32 | i; + radix_sort_gfa64(dst_group, dst_group + n_dst); + + h2 = sp2_init2(km); // this hash table keeps all destinations + sp2_resize(h2, n_dst * 2); + for (i = 1, j = 0; i <= n_dst; ++i) { + if (i == n_dst || dst_group[i]>>32 != dst_group[j]>>32) { + k = sp2_put(h2, dst_group[j]>>32, &absent); + kh_val(h2, k) = (uint64_t)j << 32 | (i - j); + assert(absent); + j = i; + } + } + + h = sp_init2(km); // this hash table keeps visited vertices + sp_resize(h, 16); + m_out = 16, n_out = 0; + KMALLOC(km, out, m_out); + + id = 0; + p = gen_sp_node(km, g, src, 0, id++); + p->hash = kh_hash_uint32(src); + kavl_insert(sp, &root, p, 0); + k = sp_put(h, src, &absent); + q = &kh_val(h, k); + q->k = 1, q->p[0] = p, q->qs = q->qe = -1; + + n_done = 0; + while (kavl_size(head, root) > 0) { + int32_t i, nv; + gfa_arc_t *av; + sp_node_t *r; + + r = kavl_erase_first(sp, &root); // take out the closest vertex in the heap (as a binary tree) + //fprintf(stderr, "XX\t%d\t%d\t%d\t%c%s[%d]\t%d\n", n_out, kavl_size(head, root), n_finished, "><"[(r->v&1)^1], g->seg[r->v>>1].name, r->v, (int32_t)(r->di>>32)); + if (n_out == m_out) KEXPAND(km, out, m_out); + r->di = r->di>>32<<32 | n_out; // lower 32 bits now for position in the out[] array + out[n_out++] = r; + + k = sp2_get(h2, r->v); + if (k != kh_end(h2)) { // we have reached one dst vertex + int32_t j, dist = r->di>>32, off = kh_val(h2, k) >> 32, cnt = (int32_t)kh_val(h2, k); + for (j = 0; j < cnt; ++j) { + mg_path_dst_t *t = &dst[(int32_t)dst_group[off + j]]; + int32_t done = 0; + if (t->inner) { + done = 1; + } else { + int32_t copy = 0; + //if (mg_dbg_flag & MG_DBG_GC1) fprintf(stderr, " src=%c%s[%d],qlen=%d\tdst=%c%s[%d]\ttarget_distx=%d,target_hash=%x\tdistx=%d,hash=%x\n", "><"[src&1], g->seg[src>>1].name, src, ql, "><"[t->v&1], g->seg[t->v>>1].name, t->v, t->target_dist - g->seg[src>>1].len, t->target_hash, dist - g->seg[src>>1].len, r->hash); + if (t->n_path == 0) { // keep the shortest path + copy = 1; + } else if (t->target_dist >= 0) { // we have a target distance; choose the closest + if (dist == t->target_dist && t->check_hash && r->hash == t->target_hash) { // we found the target path + copy = 1, done = 1; + } else { + int32_t d0 = t->dist, d1 = dist; + d0 = d0 > t->target_dist? d0 - t->target_dist : t->target_dist - d0; + d1 = d1 > t->target_dist? d1 - t->target_dist : t->target_dist - d1; + if (d1 < d0) copy = 1; + } + } + if (copy) { + t->path_end = n_out - 1, t->dist = dist, t->hash = r->hash, t->is_0 = r->is_0; + if (t->target_dist >= 0) { + if (dist == t->target_dist && t->check_hash && r->hash == t->target_hash) done = 1; + else if (dist > t->target_dist + MG_SHORT_K_EXT) done = 1; + } + } + ++t->n_path; + if (t->n_path >= max_k) done = 1; + } + if (dst_done[off + j] == 0 && done) + dst_done[off + j] = 1, ++n_done; + } + if (n_done == n_dst) break; + } + + nv = gfa_arc_n(g, r->v); + av = gfa_arc_a(g, r->v); + for (i = 0; i < nv; ++i) { // visit all neighbors + gfa_arc_t *ai = &av[i]; + int32_t d = (r->di>>32) + (uint32_t)ai->v_lv; + if (d > max_dist) continue; // don't probe vertices too far away + k = sp_put(h, ai->w, &absent); + q = &kh_val(h, k); + if (absent) { // a new vertex visited + q->k = 0, q->qs = q->qe = -1; + //if (ql && qs) fprintf(stderr, "ql=%d,src=%d\tv=%c%s[%d]\n", ql, src, "><"[ai->w&1], g->seg[ai->w>>1].name, ai->w); + } + if (q->k < max_k) { // enough room: add to the heap + p = gen_sp_node(km, g, ai->w, d, id++); + p->pre = n_out - 1; + p->hash = r->hash + kh_hash_uint32(ai->w); + p->is_0 = r->is_0; + if (ai->rank > 0) p->is_0 = 0; + kavl_insert(sp, &root, p, 0); + q->p[q->k++] = p; + ks_heapup_sp(q->k, q->p); + } else if (q->p[0]->di>>32 > d) { // shorter than the longest path so far: replace the longest + p = kavl_erase(sp, &root, q->p[0], 0); + if (p) { + p->di = (uint64_t)d<<32 | (id++); + p->pre = n_out - 1; + p->hash = r->hash + kh_hash_uint32(ai->w); + p->is_0 = r->is_0; + if (ai->rank > 0) p->is_0 = 0; + kavl_insert(sp, &root, p, 0); + ks_heapdown_sp(0, q->k, q->p); + } else { + fprintf(stderr, "Warning: logical bug in gfa_shortest_k(): q->k=%d,q->p[0]->{d,i}={%d,%d},d=%d,src=%u,max_dist=%d,n_dst=%d\n", q->k, (int32_t)(q->p[0]->di>>32), (int32_t)q->p[0]->di, d, src, max_dist, n_dst); + km_destroy(km); + return 0; + } + } // else: the path is longer than all the existing paths ended at ai->w + } + } + + kfree(km, dst_group); + kfree(km, dst_done); + sp_destroy(h); + mg_idx_hfree(h_seeds); + kfree(km, seeds); + kfree(km, mini.a); + // NB: AVL nodes are not deallocated. When km==0, they are memory leaks. + + for (i = 0, n_found = 0; i < n_dst; ++i) + if (dst[i].n_path > 0) ++n_found; + + if (n_found > 0 && n_pathv) { // then generate the backtrack array + int32_t n, *trans; + KCALLOC(km, trans, n_out); // used to squeeze unused elements in out[] + for (i = 0; i < n_dst; ++i) { // mark dst vertices with a target distance + mg_path_dst_t *t = &dst[i]; + if (t->n_path > 0 && t->target_dist >= 0 && t->path_end >= 0) + trans[(int32_t)out[t->path_end]->di] = 1; + } + for (i = 0; i < n_out; ++i) { // mark dst vertices without a target distance + k = sp2_get(h2, out[i]->v); + if (k != kh_end(h2)) { // TODO: check if this is correct! + int32_t off = kh_val(h2, k)>>32, cnt = (int32_t)kh_val(h2, k); + for (j = off; j < off + cnt; ++j) + if (dst[j].target_dist < 0) + trans[i] = 1; + } + } + for (i = n_out - 1; i >= 0; --i) // mark all predecessors + if (trans[i] && out[i]->pre >= 0) + trans[out[i]->pre] = 1; + for (i = n = 0; i < n_out; ++i) // generate coordinate translations + if (trans[i]) trans[i] = n++; + else trans[i] = -1; + + *n_pathv = n; + KMALLOC(km0, ret, n); + for (i = 0; i < n_out; ++i) { // generate the backtrack array + mg_pathv_t *p; + if (trans[i] < 0) continue; + p = &ret[trans[i]]; + p->v = out[i]->v, p->d = out[i]->di >> 32; + p->pre = out[i]->pre < 0? out[i]->pre : trans[out[i]->pre]; + } + for (i = 0; i < n_dst; ++i) // translate "path_end" + if (dst[i].path_end >= 0) + dst[i].path_end = trans[dst[i].path_end]; + } + + km_destroy(km); + return ret; +} + +void mg_sub_print_path(FILE *fp, const gfa_t *g, int32_t n, mg_pathv_t *path) +{ + int32_t i; + for (i = 0; i < n; ++i) { + mg_pathv_t *p = &path[i]; + fprintf(fp, "[%d]\t%d\t%s\t%d\t%d\n", i, p->v, g->seg[p->v>>1].name, p->d, p->pre); + } +} diff --git a/sketch.c b/sketch.c new file mode 100644 index 0000000..65fd15f --- /dev/null +++ b/sketch.c @@ -0,0 +1,109 @@ +#include +#include +#include +#include +#define __STDC_LIMIT_MACROS +#include "kvec-km.h" +#include "mgpriv.h" + +unsigned char seq_nt4_table[256] = { + 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 +}; + +static inline uint64_t hash64(uint64_t key, uint64_t mask) +{ + key = (~key + (key << 21)) & mask; // key = (key << 21) - key - 1; + key = key ^ key >> 24; + key = ((key + (key << 3)) + (key << 8)) & mask; // key * 265 + key = key ^ key >> 14; + key = ((key + (key << 2)) + (key << 4)) & mask; // key * 21 + key = key ^ key >> 28; + key = (key + (key << 31)) & mask; + return key; +} + +/** + * Find symmetric (w,k)-minimizers on a DNA sequence + * + * @param km thread-local memory pool; using NULL falls back to malloc() + * @param str DNA sequence + * @param len length of $str + * @param w find a minimizer for every $w consecutive k-mers + * @param k k-mer size + * @param rid reference ID; will be copied to the output $p array + * @param p minimizers + * p->a[i].x = kMer<<8 | kmerSpan + * p->a[i].y = rid<<32 | lastPos<<1 | strand + * where lastPos is the position of the last base of the i-th minimizer, + * and strand indicates whether the minimizer comes from the top or the bottom strand. + * Callers may want to set "p->n = 0"; otherwise results are appended to p + */ +void mg_sketch(void *km, const char *str, int len, int w, int k, uint32_t rid, mg128_v *p) +{ + uint64_t shift1 = 2 * (k - 1), mask = (1ULL<<2*k) - 1, kmer[2] = {0,0}; + int i, j, l, buf_pos, min_pos, kmer_span = 0; + mg128_t buf[256], min = { UINT64_MAX, UINT64_MAX }; + + assert(len > 0 && (w > 0 && w < 256) && (k > 0 && k <= 28)); // 56 bits for k-mer; could use long k-mers, but 28 enough in practice + memset(buf, 0xff, w * 16); + kv_resize(mg128_t, km, *p, p->n + len/w); + + for (i = l = buf_pos = min_pos = 0; i < len; ++i) { + int c = seq_nt4_table[(uint8_t)str[i]]; + mg128_t info = { UINT64_MAX, UINT64_MAX }; + if (c < 4) { // not an ambiguous base + int z; + kmer_span = l + 1 < k? l + 1 : k; + kmer[0] = (kmer[0] << 2 | c) & mask; // forward k-mer + kmer[1] = (kmer[1] >> 2) | (3ULL^c) << shift1; // reverse k-mer + if (kmer[0] == kmer[1]) continue; // skip "symmetric k-mers" as we don't know it strand + z = kmer[0] < kmer[1]? 0 : 1; // strand + ++l; + if (l >= k && kmer_span < 256) { + info.x = hash64(kmer[z], mask) << 8 | kmer_span; + info.y = (uint64_t)rid<<32 | (uint32_t)i<<1 | z; + } + } else l = 0, kmer_span = 0; + buf[buf_pos] = info; // need to do this here as appropriate buf_pos and buf[buf_pos] are needed below + if (l == w + k - 1 && min.x != UINT64_MAX) { // special case for the first window - because identical k-mers are not stored yet + for (j = buf_pos + 1; j < w; ++j) + if (min.x == buf[j].x && buf[j].y != min.y) kv_push(mg128_t, km, *p, buf[j]); + for (j = 0; j < buf_pos; ++j) + if (min.x == buf[j].x && buf[j].y != min.y) kv_push(mg128_t, km, *p, buf[j]); + } + if (info.x <= min.x) { // a new minimum; then write the old min + if (l >= w + k && min.x != UINT64_MAX) kv_push(mg128_t, km, *p, min); + min = info, min_pos = buf_pos; + } else if (buf_pos == min_pos) { // old min has moved outside the window + if (l >= w + k - 1 && min.x != UINT64_MAX) kv_push(mg128_t, km, *p, min); + for (j = buf_pos + 1, min.x = UINT64_MAX; j < w; ++j) // the two loops are necessary when there are identical k-mers + if (min.x >= buf[j].x) min = buf[j], min_pos = j; // >= is important s.t. min is always the closest k-mer + for (j = 0; j <= buf_pos; ++j) + if (min.x >= buf[j].x) min = buf[j], min_pos = j; + if (l >= w + k - 1 && min.x != UINT64_MAX) { // write identical k-mers + for (j = buf_pos + 1; j < w; ++j) // these two loops make sure the output is sorted + if (min.x == buf[j].x && min.y != buf[j].y) kv_push(mg128_t, km, *p, buf[j]); + for (j = 0; j <= buf_pos; ++j) + if (min.x == buf[j].x && min.y != buf[j].y) kv_push(mg128_t, km, *p, buf[j]); + } + } + if (++buf_pos == w) buf_pos = 0; + } + if (min.x != UINT64_MAX) + kv_push(mg128_t, km, *p, min); +} diff --git a/sys.c b/sys.c new file mode 100644 index 0000000..df9a351 --- /dev/null +++ b/sys.c @@ -0,0 +1,147 @@ +#include +#include "sys.h" + +#if defined(WIN32) || defined(_WIN32) +#include + +struct timezone +{ + __int32 tz_minuteswest; /* minutes W of Greenwich */ + int tz_dsttime; /* type of dst correction */ +}; + +/* + * gettimeofday.c + * Win32 gettimeofday() replacement + * taken from PostgreSQL, according to + * https://stackoverflow.com/questions/1676036/what-should-i-use-to-replace-gettimeofday-on-windows + * + * src/port/gettimeofday.c + * + * Copyright (c) 2003 SRA, Inc. + * Copyright (c) 2003 SKC, Inc. + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose, without fee, and without a + * written agreement is hereby granted, provided that the above + * copyright notice and this paragraph and the following two + * paragraphs appear in all copies. + * + * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, + * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING + * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS + * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS + * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, + * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +/* FILETIME of Jan 1 1970 00:00:00. */ +static const unsigned __int64 epoch = ((unsigned __int64) 116444736000000000ULL); + +/* + * timezone information is stored outside the kernel so tzp isn't used anymore. + * + * Note: this function is not for Win32 high precision timing purpose. See + * elapsed_time(). + */ +int gettimeofday(struct timeval * tp, struct timezone *tzp) +{ + FILETIME file_time; + SYSTEMTIME system_time; + ULARGE_INTEGER ularge; + + GetSystemTime(&system_time); + SystemTimeToFileTime(&system_time, &file_time); + ularge.LowPart = file_time.dwLowDateTime; + ularge.HighPart = file_time.dwHighDateTime; + + tp->tv_sec = (long) ((ularge.QuadPart - epoch) / 10000000L); + tp->tv_usec = (long) (system_time.wMilliseconds * 1000); + + return 0; +} + +// taken from https://stackoverflow.com/questions/5272470/c-get-cpu-usage-on-linux-and-windows +double cputime() +{ + HANDLE hProcess = GetCurrentProcess(); + FILETIME ftCreation, ftExit, ftKernel, ftUser; + SYSTEMTIME stKernel; + SYSTEMTIME stUser; + + GetProcessTimes(hProcess, &ftCreation, &ftExit, &ftKernel, &ftUser); + FileTimeToSystemTime(&ftKernel, &stKernel); + FileTimeToSystemTime(&ftUser, &stUser); + + double kernelModeTime = ((stKernel.wHour * 60.) + stKernel.wMinute * 60.) + stKernel.wSecond * 1. + stKernel.wMilliseconds / 1000.; + double userModeTime = ((stUser.wHour * 60.) + stUser.wMinute * 60.) + stUser.wSecond * 1. + stUser.wMilliseconds / 1000.; + + return kernelModeTime + userModeTime; +} + +long peakrss(void) { return 0; } +#else +#include +#include + +double cputime(void) +{ + struct rusage r; + getrusage(RUSAGE_SELF, &r); + return r.ru_utime.tv_sec + r.ru_stime.tv_sec + 1e-6 * (r.ru_utime.tv_usec + r.ru_stime.tv_usec); +} + +long peakrss(void) +{ + struct rusage r; + getrusage(RUSAGE_SELF, &r); +#ifdef __linux__ + return r.ru_maxrss * 1024; +#else + return r.ru_maxrss; +#endif +} + +#endif /* WIN32 || _WIN32 */ + +double realtime(void) +{ + struct timeval tp; + gettimeofday(&tp, NULL); + return tp.tv_sec + tp.tv_usec * 1e-6; +} + +void mg_err_fputs(const char *str, FILE *fp) +{ + int ret; + ret = fputs(str, fp); + if (ret == EOF) { + fprintf(stderr, "[ERROR] failed to write the results\n"); + exit(EXIT_FAILURE); + } +} + +void mg_err_fwrite(const void *p, size_t size, size_t nitems, FILE *fp) +{ + int ret; + ret = fwrite(p, size, nitems, fp); + if (ret == EOF) { + fprintf(stderr, "[ERROR] failed to write data\n"); + exit(EXIT_FAILURE); + } +} + +void mg_err_fread(void *p, size_t size, size_t nitems, FILE *fp) +{ + int ret; + ret = fread(p, size, nitems, fp); + if (ret == EOF) { + fprintf(stderr, "[ERROR] failed to read data\n"); + exit(EXIT_FAILURE); + } +} diff --git a/sys.h b/sys.h new file mode 100644 index 0000000..78b6cd6 --- /dev/null +++ b/sys.h @@ -0,0 +1,20 @@ +#ifndef MG_SYS_H +#define MG_SYS_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +void mg_err_fputs(const char *str, FILE *fp); + +double realtime(void); +double cputime(void); +long peakrss(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/test/MT-chimp.fa b/test/MT-chimp.fa new file mode 100644 index 0000000..9ce567f --- /dev/null +++ b/test/MT-chimp.fa @@ -0,0 +1,277 @@ +>PT#NC_001643.1X +GATCACAGGTCTATCACCCTATTAACCAGTCACGGGAGCCTTCCATGCATTTGGTATTTT +CGTCTGGGGGGTGTGCACGCGATAGCATTGCGAAACGCTGGCCCCGGAGCACCCTATGTC +GCAGTATCTGTCTTTGATTCCTGCCCCATTGTATTATTTATCGCACCTACGTTCAATATT +ACGACCTAGCATACCTACTAAAGTGTGTTGATTAATTAATGCTTGCAGGACATAACAACA +GCAGCAAAATGCTCACATAACTGCTTTCCACACCAACATCATAACAAAAAATTCCCACAA +ACCCCCCCTTCCCCCCGGCCACAGCACTCAAACAAATCTCTGCCAAACCCCAAAAACAAA +GAACCCAGACGCCAGCCTAGCCAGACTTCAAATTTCATCTTTAGGCGGTATGCACTTTTA +ACAGTCACCCCTCAATTAACATGCCCTCCCCCCTCAACTCCCATTCTACTAGCCCCAGCA +ACGTAACCCCCTACTCACCCTACTCAACACATATACCGCTGCTAACCCCATACCCTGAAC +CAACCAAACCCCAAAGACACCCCGTTTATGTAGCTTACCCCCTCAAAGCAATACACTGAA +AATGTTTCGACGGGTTTACATCACCCCATAAACAAACAGGTTTGGTCCTAGCCTTTCTAT +TAGCTCTTAGTAAGATTACACATGCAAGCATCCCCGCCCCGTGAGTCACCCTCTAAATCG +CCATGATCAAAAGGAACAAGTATCAAGCACGCAGCAATGCAGCTCAAAACGCTTAGCCTA +GCCACACCCCCACGGGAGACAGCAGTGATAAACCTTTAGCAATAAACGAAAGTTTAACTA +AGCCATACTAACCTCAGGGTTGGTCAATTTCGTGCTAGCCACCGCGGTCATACGATTAAC +CCAAGTCAATAGAAACCGGCGTAAAGAGTGTTTTAGATCACCCCCCCATAAAGCTAAAAT +TCACCTGAGTTGTAAAAAACTCCAGCTGATACAAAATAAACTACGAAAGTGGCTTTAACA +CATCTGAATACACAATAGCTAAGACCCAAACTGGGATTAGATACCCCACTATGCTTAGCC +CTAAACTTCAACAGTTAAATTAACAAAACTGCTCGCCAGAACACTACGAGCCACAGCTTA +AAACTCAAAGGACCTGGCGGTGCTTCATATCCCTCTAGAGGAGCCTGTTCTGTAATCGAT +AAACCCCGATCAACCTCACCGCCTCTTGCTCAGCCTATATACCGCCATCTTCAGCAAACC +CTGATGAAGGTTACAAAGTAAGCACAAGTACCCACGTAAAGACGTTAGGTCAAGGTGTAG +CCTATGAGGTGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAATTACGATAACCCTTA +TGAAACCTAAGGGTCAAAGGTGGATTTAGCAGTAAACTAAGAGTAGAGTGCTTAGTTGAA +CAGGGCCCTGAAGCGCGTACACACCGCCCGTCACCCTCCTCAAGTATACTTCAAAGGATA +CTTAACTTAAACCCCCTACGTATTTATATAGAGGAGATAAGTCGTAACATGGTAAGTGTA +CTGGAAAGTGCACTTGGACGAACCAGAGTGTAGCTTAACATAAAGCACCCAACTTACACT +TAGGAGATTTCAACTCAACTTGACCACTCTGAGCCAAACCTAGCCCCAAACCCCCTCCAC +CCTACTACCAAACAACCTTAACCAAACCATTTACCCAAATAAAGTATAGGCGATAGAAAT +TGTAAACCGGCGCAATAGACATAGTACCGCAAGGGAAAGATGAAAAATTATACCCAAGCA +TAATACAGCAAGGACTAACCCCTGTACCTTTTGCATAATGAATTAACTAGAAATAACTTT +GCAAAGAGAACCAAAGCTAAGACCCCCGAAACCAGACGAGCTACCTAAGAACAGCTAAAA +GAGCACACCCGTCTATGTAGCAAAATAGTGGGAAGATTTATAGGTAGAGGCGACAAACCT +ACCGAGCCTGGTGATAGCTGGTTGTCCAAGATAGAATCTTAGTTCAACTTTAAATTTACC +TACAGAACCCTCTAAATCCCCTTGTAAACTTAACTGTTAGTCCAAAGAGGAACAGCTCTT +TAGACACTAGGAAAAAACCTTGTAAAGAGAGTAAAAAATTTAACACCCATAGTAGGCCTA +AAAGCAGCCACCAATTAAGAAAGCGTTCAAGCTCAACACCCACAACCTTAAAGATCCCAA +ACATACAACCGAACTCCTTACACCCAATTGGACCAATCTATTACCCCATAGAAGAACTAA +TGTTAGTATAAGTAACATGAAAACATTCTCCTCCGCATAAGCCTACATCAGACCAAAATA +TTAAACTGACAATTAACAGCCTAATATCTACAATCAACCAACAAGCCATTATTACCCCCG +CTGTTAACCCAACACAGGCATGCCCACAAGGAAAGGTTAAAAAAAGTAAAAGGAACTCGG +CAAATCTTACCCCGCCTGTTTACCAAAAACATCACCTCTAGCATTACCAGTATTAGAGGC +ACCGCCTGCCCGGTGACATATGTTTAACGGCCGCGGTACCCTAACCGTGCAAAGGTAGCA +TAATCACTTGTTCCTTAAATAGGGACTTGTATGAATGGCTCCACGAGGGTTTAGCTGTCT +CTTACTTTCAACCAGTGAAATTGACCTACCCGTGAAGAGGCGGGCATAACATAACAAGAC +GAGAAGACCCTATGGAGCTTTAATTCATTAATGCAAACAATACTTAACAAACCTACAGGT +CCTAAACTATTAAACCTGCATTAAAAATTTCGGTTGGGGCGACCTCGGAGCACAACCCAA +CCTCCGAGCAATACATGCTAAGACCTCACCAGTCAAAGCGAATTACTACATCCAATTGAT +CCAATGACTTGACCAACGGAACAAGTTACCCTAGGGATAACAGCGCAATCCTATTCCAGA +GTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGATCAGGACATCCCGATGGTGCA +GCCGCTATTAAAGGTTCGTTTGTTCAACGATTAAAGTCCTACGTGATCTGAGTTCAGACC +GGAGTAATCCAGGTCGGTTTCTATCTGTTCTAAATTTCTCCCTGTACGAAAGGACAAGAG +AAATGAGGCCTACTTCACAAAGCGCCTTCCCCAATAAATGATATTATCTCAATTTAGCGC +CATGCCAACACCCACTCAAGAACAGAGTTTGTTAAGATGGCAGAGCCCGGTAATTGCATA +AAACTTAAAACTTTACAATCAGAGGTTCAATTCCTCTTCTTGACAACACACCCATGACCA +ACCTCCTACTCCTCATTGTACCCATCCTAATCGCAATAGCATTCCTAATGCTAACCGAAC +GAAAAATTCTAGGCTACATACAACTACGCAAAGGTCCCAACATTGTAGGTCCTTACGGGC +TATTACAGCCCTTCGCTGACGCCATAAAACTCTTCACTAAAGAACCCTTAAAACCCTCCA +CTTCAACCATTACCCTCTACATCACCGCCCCAACCCTAGCCCTCACCATTGCCCTCTTAC +TATGAACCCCCCTCCCCATACCCAACCCCCTAGTCAATCTTAACTTAGGCCTCCTATTTA +TTCTAGCCACCTCCAGCCTAGCCGTTTACTCAATCCTCTGATCAGGGTGAGCATCAAACT +CGAACTACGCCTTAATCGGTGCACTACGAGCAGTAGCCCAAACAATCTCATACGAAGTCA +CTCTAGCCATTATCCTACTGTCAACGCTACTAATAAGTGGCTCCTTCAATCTCTCTACCC +TTGTCACAACACAAGAGCACCTCTGACTAATCCTGCCAACATGACCCCTGGCCATAATAT +GATTTATCTCTACACTAGCAGAGACCAACCGAACTCCCTTCGACCTTACTGAAGGAGAAT +CTGAACTAGTCTCAGGCTTTAATATCGAGTATGCCGCAGGCCCCTTTGCCCTATTTTTCA +TAGCCGAATACATAAACATTATTATAATAAACACCCTCACTGCTACAATCTTCCTAGGAG +CAACATACAATACTCACTCCCCTGAACTCTACACGACATATTTTGTCACCAAAGCTCTAC +TTCTAACCTCCCTGTTCCTATGAATTCGAACAGCATATCCCCGATTTCGCTACGACCAGC +TCATACACCTCCTATGAAAAAACTTCCTACCACTCACCCTAGCATCACTCATGTGATATA +TCTCCATACCCACTACAATCTCCAGCATCCCCCCTCAAACCTAAGAAATATGTCTGATAA +AAGAATTACTTTGATAGAGTAAATAATAGGAGTTCAAATCCCCTTATTTCTAGGACTATA +AGAATCGAACTCATCCCTGAGAATCCAAAATTCTCCGTGCCACCTATCACACCCCATCCT +AAAGTAAGGTCAGCTAAATAAGCTATCGGGCCCATACCCCGAAAATGTTGGTTACACCCT +TCCCGTACTAATTAATCCCCTAGCCCAACCCATCATCTACTCTACCATCCTTACAGGCAC +GCTCATTACAGCGCTAAGCTCACACTGATTTTTCACCTGAGTAGGCCTAGAAATAAATAT +ACTAGCTTTTATCCCAATCCTAACCAAAAAAATAAGCCCCCGCTCCACAGAAGCCGCCAT +CAAATACTTTCTCACACAAGCAACTGCGTCCATAATTCTCCTGATAGCTATCCTCTCCAA +CAGCATACTCTCCGGACAATGAACCATAACCAATACTACCAATCAATACTCATCATTAAT +AATTATAATAGCAATGGCAATAAAACTAGGAATAGCCCCCTTTCACTTTTGAGTTCCAGA +AGTTACCCAAGGCACCCCCCTAATATCCGGCCTACTCCTCCTCACATGACAAAAATTAGC +CCCTATTTCAATTATATACCAAATCTCCTCATCACTGAACGTAAACCTTCTCCTCACCCT +TTCAATCTTGTCCATTATAGCAGGCAGCTGAGGCGGACTAAACCAAACCCAACTACGCAA +AATCCTAGCATACTCCTCAATCACCCACATAGGCTGAATAATAGCAGTCCTACCATATAA +CCCTAACATAACCATTCTTAATTTAACCATTTACATCATCCTAACTACTACCGCATTTCT +GCTACTCAACTTAAACTCCAGCACCACAACCCTACTACTATCTCGCACCTGAAACAAGCT +AACATGATTAACTCCCCTAATTCCATCCACCCTCCTCTCCCTAGGAGGCCTACCCCCACT +AACTGGCTTCTTACCCAAATGAGTTATCATCGAAGAATTCACAAAAAATAATAGCCTCAT +CATCCCCACCATCATAGCCATCATCACTCTCCTTAACCTCTATTTCTACCTACGCCTAAT +CTACTCCACCTCAATTACACTACTTCCCATATCTAATAACGTAAAAATAAAATGACAATT +CGAACATACAAAACCCACCCCCTTCCTCCCTACACTCATCACCCTTACCACACTGCTTCT +ACCCATCTCCCCCTTCATACTAATAATCTTATAGAAATTTAGGTTAAGCACAGACCAAGA +GCCTTCAAAGCCCTCAGCAAGTTACAATACTTAATTTCTGCAACAACTAAGGACTGCAAA +ACCCCACTCTGCATCAACTGAACGCAAATCAGCCACTTTAATTAAGCTAAGCCCTTACTA +GATTAATGGGACTTAAACCCACAAACATTTAGTTAACAGCTAAACACCCTAATCAACTGG +CTTCAATCTACTTCTCCCGCCGCAAGAAAAAAAGGCGGGAGAAGCCCCGGCAGGTTTGAA +GCTGCTTCTTCGAATTTGCAATTCAATATGAAAATCACCTCAGAGCTGGTAAAAAGAGGC +TTAACCCCTGTCTTTAGATTTACAGTCCAATGCTTCACTCAGCCATTTTACCCCACCCTA +CTGATGTTCACCGACCGCTGACTATTCTCTACAAACCACAAAGATATTGGAACACTATAC +CTACTATTCGGTGCATGAGCTGGAGTCCTGGGCACAGCCCTAAGTCTCCTTATTCGGGCT +GAACTAGGCCAACCAGGCAACCTCCTAGGTAATGACCACATCTACAATGTCATCGTCACA +GCCCATGCATTCGTAATAATCTTCTTCATAGTAATGCCTATTATAATCGGAGGCTTTGGC +AACTGGCTAGTTCCCTTGATAATTGGTGCCCCCGACATGGCATTCCCCCGCATAAACAAC +ATAAGCTTCTGGCTCCTGCCCCCTTCTCTCCTACTTCTACTTGCATCTGCCATAGTAGAA +GCCGGCGCGGGAACAGGTTGAACAGTCTACCCTCCCTTAGCGGGAAACTACTCGCATCCT +GGAGCCTCCGTAGACCTAACCATCTTCTCCTTACATCTGGCAGGCATCTCCTCTATCCTA +GGAGCCATTAACTTCATCACAACAATTATTAATATAAAACCTCCTGCCATGACCCAATAC +CAAACACCCCTCTTCGTCTGATCCGTCCTAATCACAGCAGTCTTACTTCTCCTATCCCTC +CCAGTCCTAGCTGCTGGCATCACCATACTATTGACAGATCGTAACCTCAACACTACCTTC +TTCGACCCAGCCGGGGGAGGAGACCCTATTCTATATCAACACTTATTCTGATTTTTTGGC +CACCCCGAAGTTTATATTCTTATCCTACCAGGCTTCGGAATAATTTCCCACATTGTAACT +TATTACTCCGGAAAAAAAGAACCATTTGGATATATAGGCATGGTTTGAGCTATAATATCA +ATTGGCTTCCTAGGGTTTATCGTGTGAGCACACCATATATTTACAGTAGGGATAGACGTA +GACACCCGAGCCTATTTCACCTCCGCTACCATAATCATTGCTATTCCTACCGGCGTCAAA +GTATTCAGCTGACTCGCTACACTTCACGGAAGCAATATGAAATGATCTGCCGCAGTACTC +TGAGCCCTAGGGTTTATCTTTCTCTTCACCGTAGGTGGCCTAACCGGCATTGTACTAGCA +AACTCATCATTAGACATCGTGCTACACGACACATACTACGTCGTAGCCCACTTCCACTAC +GTTCTATCAATAGGAGCTGTATTCGCCATCATAGGAGGCTTCATTCACTGATTCCCCCTA +TTCTCAGGCTATACCCTAGACCAAACCTATGCCAAAATCCAATTTGCCATCATGTTCATT +GGCGTAAACCTAACCTTCTTCCCACAGCACTTCCTTGGCCTATCTGGGATGCCCCGACGT +TACTCGGACTACCCCGATGCATACACCACATGAAATGTCCTATCATCCGTAGGCTCATTT +ATCTCCCTGACAGCAGTAATATTAATAATTTTCATGATTTGAGAAGCCTTTGCTTCAAAA +CGAAAAGTCCTAATAGTAGAAGAGCCCTCCGCAAACCTGGAATGACTATATGGATGCCCC +CCACCCTACCACACATTCGAAGAACCCGTATACATAAAATCTAGACAAAAAAGGAAGGAA +TCGAACCCCCTAAAGCTGGTTTCAAGCCAACCCCATGACCTCCATGACTTTTTCAAAAAG +ATATTAGAAAAACTATTTCATAACTTTGTCAAAGTTAAATTACAGGTTAACCCCCGTATA +TCTTAATGGCACATGCAGCGCAAGTAGGTCTACAAGATGCTACTTCCCCTATCATAGAAG +AACTTATTATCTTTCACGACCATGCCCTCATAATTATCTTTCTCATCTGCTTTCTAGTCC +TATACGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAGTATTTCAGACGCCC +AGGAAATAGAAACCGTCTGAACTATCCTGCCCGCCATCATCCTAGTCCTTATTGCCCTAC +CATCCCTGCGTATCCTTTACATAACAGACGAGGTCAACGACCCCTCCTTTACTATTAAAT +CAATCGGCCATCAATGATATTGAACCTACGAATACACCGACTACGGCGGGCTAATCTTCA +ACTCCTACATACTCCCCCCATTATTTCTAGAACCAGGTGATCTACGACTCCTTGACGTTG +ATAACCGAGTGGTCCTCCCAGTTGAAGCCCCCGTTCGTATAATAATTACATCACAAGATG +TTCTACACTCATGAGCTGTTCCCACATTAGGCCTAAAAACAGACGCAATTCCCGGACGCC +TAAACCAAACCACTTTCACCGCCACACGACCAGGAGTATACTACGGCCAATGCTCAGAAA +TCTGTGGAGCAAACCACAGTTTTATACCCATCGTCCTAGAATTAATCCCTCTAAAAATCT +TTGAAATAGGACCCGTATTCACTCTATAGCACCTTCTCTACCCCTCTCCAGAGCTCACTG +TAAAGCTAACCTAGCATTAACCTTTTAAGTTAAAGATTAAGAGGACCGACACCTCTTTAC +AGTGAAATGCCCCAACTAAATACCGCCGTATGACCCACCATAATTACCCCCATACTCCTG +ACACTATTTCTCGTCACCCAACTAAAAATATTAAATTCAAATTACCATCTACCCCCCTCA +CCAAAACCCATAAAAATAAAAAACTACAATAAACCCTGAGAACCAAAATGAACGAAAATC +TATTCGCTTCATTCGCTGCCCCCACAATCCTAGGCTTACCCGCCGCAGTACTAATCATTC +TATTCCCCCCTCTACTGGTCCCCACTTCTAAACATCTCATCAACAACCGACTAATTACCA +CCCAACAATGACTAATTCAACTGACCTCAAAACAAATAATAACTATACACAGCACTAAAG +GACGAACCTGATCTCTCATACTAGTATCCTTAATCATTTTTATTACCACAACCAATCTTC +TTGGGCTTCTACCCCACTCATTCACACCAACCACCCAACTATCTATAAACCTAGCCATGG +CTATCCCCCTATGAGCAGGCGCAGTAGTCATAGGCTTTCGCTTTAAGACTAAAAATGCCC +TAGCCCACTTCTTACCGCAAGGCACACCTACACCCCTTATCCCCATACTAGTTATCATCG +AAACTATTAGCCTACTCATTCAACCAATAGCCTTAGCCGTACGTCTAACCGCTAACATTA +CTGCAGGCCACCTACTCATGCACCTAATTGGAAGCGCCACACTAGCATTATCAACTATCA +ATCTACCCTATGCACTCATTATCTTCACAATTCTAATCCTACTGACTATTCTAGAGATCG +CCGTCGCCTTAATCCAAGCCTACGTTTTTACACTTCTAGTGAGCCTCTACCTGCACGACA +ACACATAATGACCCACCAATCACATGCCTACCACATAGTAAAACCCAGCCCATGACCCCT +AACAGGGGCCCTCTCGGCCCTCCTAATAACCTCCGGCCTGGCCATATGATTCCACTTCTA +CTCCACAACACTACTCACACTAGGCTTACTAACTAACACATTGACCATATATCAATGATG +ACGCGATGTTATACGAGAAGGCACATACCAAGGCCACCACACACCACCCGTCCAAAAAGG +TCTCCGATATGGGATAATTCTTTTTATTACCTCAGAAGTTTTTTTCTTTGCAGGATTTTT +TTGAGCTTTCTACCACTCCAGCCTAGCCCCTACCCCCCAGCTAGGAGGACACTGGCCCCC +AACAGGTATTACCCCACTAAATCCCCTAGAAGTCCCACTCCTAAACACATCTGTATTACT +CGCATCAGGAGTATCAATTACTTGAGCCCATCACAGCTTAATAGAAAATAACCGAAACCA +AATAATTCAAGCACTGCTTATTACGATTCTACTAGGTCTTTATTTTACCCTCCTACAAGC +CTCAGAATATTTCGAATCCCCTTTTACCATTTCCGATGGCATCTACGGCTCAACATTCTT +TGTAGCCACAGGCTTCCACGGACTCCACGTCATTATTGGATCAACTTTCCTCACTATCTG +CCTCATCCGCCAACTAATATTTCACTTCACATCCAAACATCACTTCGGCTTTCAAGCCGC +CGCCTGATACTGACACTTCGTAGATGTAGTCTGACTATTTCTATATGTCTCTATTTACTG +ATGAGGATCTTACTCTTTTAGTATAAGTAGTACCGTTAACTTCCAATTAACTAGTTTTGA +CAACATTCAAAAAAGAGTAATAAACTTCGTCCTAATTTTAATAACCAATACCCTTCTAGC +CCTACTACTGATAATTATCACATTCTGACTACCACAACTCAACAGCTACATAGAAAAATC +TACCCCTTACGAATGTGGCTTCGACCCTATATCCCCCGCCCGCGTCCCCTTCTCCATAAA +ATTTTTCCTAGTAGCCATCACCTTCCTATTATTTGACCTAGAAATTGCCCTCCTATTGCC +CTTACCTTGAGCCCTACAAACGGCCAACCTACCACTAATAGTCACATCATCCCTCTTATT +AATTACTATCCTAGCCCTAAGCCTCGCCTACGAATGATTACAAAAAGGGTTAGACTGAAC +CGAATTGGTATATAGTTTAAATAAAACGAATGATTTCGACTCATTAAATTATGATAATCA +TATTTACCAAATGCCCCTTATTTATATAAATATTATACTAGCATTTACCATCTCACTTCT +AGGAATACTAGTATATCGCTCACACCTAATATCTTCCCTACTATGCCTAGAAGGAATAAT +ACTATCACTGTTCATCATAGCCACCCTCATAACCCTCAATACTCACTCCCTCTTAGCCAA +TATTGTACCCATCACCATACTAGTCTTTGCTGCCTGCGAAGCAGCAGTAGGTCTAGCACT +ACTAGTTTCAATCTCTAACACATATGGCTTAGACTACGTACATAACCTAAACCTACTCCA +ATGCTAAAACTAATCATCCCGACAATTATATTACTACCACTAACATGATTCTCTAAAAAA +CGTATAATTTGAATCAACACAACCACTCACAGCCTAATTATCAGCACCATTCCCTTACTA +TTTTTTAACCAAATTAACAACAACCTATTCAGCTGTTCCCTGCCCTTCTCCTCCGACCCC +TTAACAACTCCCCTCCTAATATTAACTGCTTGACTTCTACCCCTCACAATCATAGCAAGC +CAGCGCCACCTATCCAACGAACCACTATCACGAAAAAAACTCTACCTCTCCATGCTAATT +TCCCTCCAAATCTCCTTAATTATAACATTCTCGGCCACAGAGCTAATTATATTTTATATC +TTCTTCGAAACCACACTTATCCCCACCCTGGCTATCATCACCCGATGGGGTAACCAACCA +GAACGCCTGAACGCAGGTACATACTTCCTATTCTATACCCTAGTAGGCTCCCTCCCCCTA +CTCATCGCACTAATCTATACCCACAACACCCTAGGCTCACTAAATATCCTATTACTCACT +CTTACAACCCAAGAACTATCAAACACCTGAGCCAACAACTTAATATGACTAGCGTACACG +ATGGCTTTCATGGTAAAAATACCCCTTTACGGACTCCACCTATGACTCCCTAAAGCCCAT +GTCGAAGCCCCTATTGCCGGGTCAATGGTACTTGCTGCAGTACTCTTAAAATTAGGTGGC +TATGGCATAATACGCCTCACACTCATCCTCAACCCCCTAACAAAACATATAGCCTATCCC +TTCCTCATGTTGTCCTTATGAGGTATAATCATAACAAGCTCCATCTGCCTGCGACAAACA +GACCTAAAATCGCTCATTGCATACCCTTCAGTCAGCCACATAGCCCTCGTAGTAACAGCC +ATTCTCATCCAAACCCCCTGAAGCTTCACCGGCGCAATTATCCTCATAATCGCCCACGGA +CTTACATCCTCATTATTATCCTGCCTAGCAAACTCAAATTATGAACGCACCCACAGTCGC +ATCATAATTCTCTCCCAAGGACTTCAAACTCTACTCCCACTAATAGCCTTTTGATGACTC +CTGGCAAGCCTCGCTAACCTCGCCCTACCCCCTACCATTAATCTCCTAGGGGAACTCTCC +GTGCTAGTAACCTCATTCTCCTGATCAAATACCACTCTCCTACTCACAGGATTCAACATA +CTAATCACAGCCCTGTACTCCCTCTACATGTTTACCACAACACAATGAGGCTCACTCACC +CACCACATTAATAGCATAAAGCCCTCATTCACACGAGAAAACACTCTCATATTTTTACAC +CTATCCCCCATCCTCCTTCTATCCCTCAATCCTGATATCATCACTGGATTCACCTCCTGT +AAATATAGTTTAACCAAAACATCAGATTGTGAATCTGACAACAGAGGCTCACGACCCCTT +ATTTACCGAGAAAGCTTATAAGAACTGCTAACTCGTATTCCCATGCCTAACAACATGGCT +TTCTCAACTTTTAAAGGATAACAGTTATCCATTGGTCTTAGGCCCCAAAAATTTTGGTGC +AACTCCAAATAAAAGTAATAACCATGTATGCTACCATAACCACCTTAGCCCTAACTTCCT +TAATTCCCCCCATCCTCGGCGCCCTCATTAACCCTAACAAAAAAAACTCATACCCCCATT +ACGTGAAATCCATTATCGCATCCACCTTTATCATTAGCCTTTTCCCCACAACAATATTCA +TATGCCTAGACCAAGAAACTATTATCTCGAACTGACACTGAGCAACAACCCAAACAACCC +AACTCTCCCTGAGCTTTAAACTAGACTATTTCTCCATAACATTTATCCCCGTAGCACTGT +TCGTTACATGATCCATCATAGAATTCTCACTATGATATATAGACTCAGACCCCAACATCA +ACCAATTCTTCAAATACTTACTTATCTTCCTAATTACTATACTAATCCTAGTCACCGCTA +ACAACCTATTCCAACTCTTCATCGGCTGAGAAGGCGTAGGAATTATATCCTTTCTACTCA +TTAGCTGATGGTACGCCCGAACAGATGCCAACACAGCAGCCATCCAAGCAATCCTATATA +ACCGTATCGGTGATATTGGTTTTGTCCTAGCCCTAGCATGATTTCTCCTACACTCCAACT +CATGAGATCCACAACAAATAATCCTCCTAAGTACTAATACAGACCTTACTCCACTACTAG +GCTTCCTCCTAGCAGCAGCAGGCAAATCAGCTCAACTAGGCCTTCACCCCTGACTCCCCT +CAGCCATAGAAGGCCCTACCCCTGTTTCAGCCCTACTCCACTCAAGCACCATAGTCGTAG +CAGGAATCTTCCTACTCATCCGCTTCTACCCCCTAGCAGAGAATAACCCACTAATCCAAA +CTCTCACGCTATGCCTAGGCGCTATCACCACCCTATTCGCAGCAGTCTGCGCCCTCACAC +AAAATGACATCAAAAAAATCGTGGCCTTCTCCACTTCAAGCCAACTAGGACTCATAATAG +TTACAATCGGTATCAACCAACCACACCTAGCATTCCTTCACATCTGCACCCACGCTTTCT +TCAAAGCCATACTATTCATATGCTCCGGATCCATTATTCACAACCTCAATAATGAGCAAG +ACATTCGAAAAATAGGAGGATTACTCAAAACCATACCCCTCACTTCAACCTCCCTCACCA +TTGGGAGCCTAGCATTAGCAGGAATACCCTTCCTCACAGGTTTCTACTCCAAAGACCTCA +TCATCGAAACCGCTAACATATCATACACAAACGCCTGAGCCCTATCTATTACTCTCATCG +CCACCTCTCTGACAAGCGCCTACAGCACCCGAATAATCCTCCTCACCCTAACAGGTCAAC +CTCGCTTCCCAACCCTCACCAACATTAACGAAAACAACCCCACTCTGTTAAATCCCATTA +AACGCCTAACCATTGGAAGCTTATTTGCAGGATTTCTCATTACCAACAACATTCTCCCCA +TATCTACTCCCCAAGTGACAATTCCCCTTTACTTAAAACTTACAGCCCTAGGCGTTACTT +CCCTAGGACTTCTAACAGCCCTAGACCTCAATTACCTAACCAGCAAGCTCAAAATAAAAT +CCCCACTATATACATTTCACTTCTCTAATATACTCGGATTCTACCCTAACATTATACACC +GCTCGATCCCCTATCTAGGCCTTCTTACAAGCCAAAACCTACCCCTACTTCTTCTAGACC +TGACCTGACTAGAGAAACTATTACCTAAAACAATTTCACAGTACCAAATCTCCGCTTCCA +TTACCACCTCAACCCAAAAAGGCATGATCAAACTTTATTTCCTCTCTTTTTTCTTCCCTC +TCATCTTAACCTTACTCCTAATCACATAACCTATTCCCCCGAGCAATCTCAATCACAATG +TATACACCAACAAACAATGTCCAACCAGTAACTACTACTAACCAACGCCCATAATCATAT +AAGGCCCCCGCACCAATAGGATCCTCCCGAATCAGCCCTGGCCCCTCCCCTTCATAAATT +ATTCAACTTCCCACGCTATTAAAATTTACCACAACCACCATCCCATCATACCCTTTTACC +CATAACACTAATCCTACCTCCATCGCCAGTCCTACTAAAACACTAACCAAAACCTCAACC +CCTGACCCCCATGCCTCAGGATACTCCTCAATAGCCATAGCCGTAGTATACCCAAAAACA +ACCATTATTCCCCCCAAATAAATTAAAAAAACCATTAAACCTATATAACCTCCCCCATAA +TTCAAAATGATGGCACACCCAACTACACCACTAACAATCAATACTAAACCCCCATAAATG +GGAGAAGGCTTAGAAGAAAACCCCACAAACCCTATCACTAAACTCACACTCAATAAAAAT +AAAGCATATGTCATTATTCTCGCACGGACTACAACCACGACCAATGATATGAAAAACCAT +CGTTGTATTTCAACTACAAGAACACCAATGACCCCGACACGCAAAATTAACCCACTAATA +AAATTAATTAATCACTCATTTATCGACCTCCCCACCCCATCCAACATTTCCGCATGATGG +AACTTCGGCTCACTTCTCGGCGCCTGCCTAATCCTTCAAATTACCACAGGATTATTCCTA +GCTATACACTACTCACCAGACGCCTCAACCGCCTTCTCGTCGATCGCCCACATCACCCGA +GACGTAAACTATGGTTGGATCATCCGCTACCTCCACGCTAACGGCGCCTCAATATTTTTT +ATCTGCCTCTTCCTACACATCGGCCGAGGTCTATATTACGGCTCATTTCTCTACCTAGAA +ACCTGAAACATTGGCATTATCCTCTTGCTCACAACCATAGCAACAGCCTTTATGGGCTAT +GTCCTCCCATGAGGCCAAATATCCTTCTGAGGAGCCACAGTAATTACAAACCTACTGTCC +GCTATCCCATACATCGGAACAGACCTGGTCCAGTGAGTCTGAGGAGGCTACTCAGTAGAC +AGCCCTACCCTTACACGATTCTTCACCTTCCACTTTATCTTACCCTTCATCATCACAGCC +CTAACAACACTTCATCTCCTATTCTTACACGAAACAGGATCAAATAACCCCCTAGGAATC +ACCTCCCACTCCGACAAAATTACCTTCCACCCCTACTACACAATCAAAGATATCCTTGGC +TTATTCCTTTTCCTCCTTATCCTAATGACATTAACACTATTCTCACCAGGCCTCCTAGGC +GATCCAGACAACTATACCCTAGCTAACCCCCTAAACACCCCACCCCACATTAAACCCGAG +TGATACTTTCTATTTGCCTACACAATCCTCCGATCCATCCCCAACAAACTAGGAGGCGTC +CTCGCCCTACTACTATCTATCCTAATCCTAACAGCAATCCCTGTCCTCCACACATCCAAA +CAACAAAGCATAATATTTCGCCCACTAAGCCAACTGCTTTACTGACTCCTAGCCACAGAC +CTCCTCATCCTAACCTGAATCGGAGGACAACCAGTAAGCTACCCCTTCATCACCATCGGA +CAAATAGCATCCGTATTATACTTCACAACAATCCTAATCCTAATACCAATCGCCTCTCTA +ATCGAAAACAAAATACTTGAATGAACCTGCCCTTGTAGTATAAACTAATACACCGGTCTT +GTAAACCGGAAACGAAAACTTTCTTCCAAGGACAAATCAGAGAAAAAGTAATTAACTTCA +CCATCAGCACCCAAAGCTAAGATTCTAATTTAAACTATTCTCTGTTCTTTCATGGGGAAG +CAAATTTAGGTACCACCTAAGTACTGGCTCATTCATTACAACCGCTATGTATTTCGTACA +TTACTGCCAGCCACCATGAATATCGTACAGTACCATATCACCCAACTACCTATAGTACAT +AAAATCCACTCCCACATCAAAACCTTCACTCCATGCTTACAAGCACGCACAACAATCAAC +TCCCAACTGTCGAACATAAAACACAATTCCAACGACACCCCTCCCCCACCCCGATACCAA +CAGACCTATCTCCCCTTGACAGAACATAGTACATACAACCATACACCGTACATAGCACAT +TACAGTCAAACCCCTCCTCGCCCCCACGGATGCTCCCCCTCAGATAGGAATCCCTTGGTC +ACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTGACTCTCCTCGCTCCGGGCCCATA +ACATCTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACCTCAGGGCCATG +AAGTTCAAAAGACTCCCACACGTTCCCCTTAAATAAGACATCACGATG diff --git a/test/MT-human.fa b/test/MT-human.fa new file mode 100644 index 0000000..fb5bad3 --- /dev/null +++ b/test/MT-human.fa @@ -0,0 +1,239 @@ +>HS#NC_012920.1 +GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGG +GTATGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTC +CTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACTTACTAAAGTGTGTTA +ATTAATTAATGCTTGTAGGACATAATAATAACAATTGAATGTCTGCACAGCCACTTTCCACACAGACATC +ATAACAAAAAATTTCCACCAAACCCCCCCTCCCCCGCTTCTGGCCACAGCACTTAAACACATCTCTGCCA +AACCCCAAAAACAAAGAACCCTAACACCAGCCTAACCAGATTTCAAATTTTATCTTTTGGCGGTATGCAC +TTTTAACAGTCACCCCCCAACTAACACATTATTTTCCCCTCCCACTCCCATACTACTAATCTCATCAATA +CAACCCCCGCCCATCCTACCCAGCACACACACACCGCTGCTAACCCCATACCCCGAACCAACCAAACCCC +AAAGACACCCCCCACAGTTTATGTAGCTTACCTCCTCAAAGCAATACACTGAAAATGTTTAGACGGGCTC +ACATCACCCCATAAACAAATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAA +GCATCCCCGTTCCAGTGAGTTCACCCTCTAAATCACCACGATCAAAAGGAACAAGCATCAAGCACGCAGC +AATGCAGCTCAAAACGCTTAGCCTAGCCACACCCCCACGGGAAACAGCAGTGATTAACCTTTAGCAATAA +ACGAAAGTTTAACTAAGCTATACTAACCCCAGGGTTGGTCAATTTCGTGCCAGCCACCGCGGTCACACGA +TTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCACCCCCTCCCCAATAAAGCTAAAACT +CACCTGAGTTGTAAAAAACTCCAGTTGACACAAAATAGACTACGAAAGTGGCTTTAACATATCTGAACAC +ACAATAGCTAAGACCCAAACTGGGATTAGATACCCCACTATGCTTAGCCCTAAACCTCAACAGTTAAATC +AACAAAACTGCTCGCCAGAACACTACGAGCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATC +CCTCTAGAGGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTCTTGCTCAGCCTATATA +CCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGTAAGCGCAAGTACCCACGTAAAGACGTTAGGTC +AAGGTGTAGCCCATGAGGTGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAGCCCTTAT +GAAACTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTAAGAGTAGAGTGCTTAGTTGAACAGGGCCCTGA +AGCGCGTACACACCGCCCGTCACCCTCCTCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCA +TTTATATAGAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACGAACCAGAGTGTA +GCTTAACACAAAGCACCCAACTTACACTTAGGAGATTTCAACTTAACTTGACCGCTCTGAGCTAAACCTA +GCCCCAAACCCACTCCACCTTACTACCAGACAACCTTAGCCAAACCATTTACCCAAATAAAGTATAGGCG +ATAGAAATTGAAACCTGGCGCAATAGATATAGTACCGCAAGGGAAAGATGAAAAATTATAACCAAGCATA +ATATAGCAAGGACTAACCCCTATACCTTCTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAGCC +AAAGCTAAGACCCCCGAAACCAGACGAGCTACCTAAGAACAGCTAAAAGAGCACACCCGTCTATGTAGCA +AAATAGTGGGAAGATTTATAGGTAGAGGCGACAAACCTACCGAGCCTGGTGATAGCTGGTTGTCCAAGAT +AGAATCTTAGTTCAACTTTAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAGTC +CAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGAGTAAAAAATTTAACACCCATAG +TAGGCCTAAAAGCAGCCACCAATTAAGAAAGCGTTCAAGCTCAACACCCACTACCTAAAAAATCCCAAAC +ATATAACTGAACTCCTCACACCCAATTGGACCAATCTATCACCCTATAGAAGAACTAATGTTAGTATAAG +TAACATGAAAACATTCTCCTCCGCATAAGCCTGCGTCAGATTAAAACACTGAACTGACAATTAACAGCCC +AATATCTACAATCAACCAACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAGGA +AAGGTTAAAAAAAGTAAAAGGAACTCGGCAAATCTTACCCCGCCTGTTTACCAAAAACATCACCTCTAGC +ATCACCAGTATTAGAGGCACCGCCTGCCCAGTGACACATGTTTAACGGCCGCGGTACCCTAACCGTGCAA +AGGTAGCATAATCACTTGTTCCTTAAATAGGGACCTGTATGAATGGCTCCACGAGGGTTCAGCTGTCTCT +TACTTTTAACCAGTGAAATTGACCTGCCCGTGAAGAGGCGGGCATAACACAGCAAGACGAGAAGACCCTA +TGGAGCTTTAATTTATTAATGCAAACAGTACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCATT +AAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAACCTCCGAGCAGTACATGCTAAGACTTCACCAG +TCAAAGCGAACTACTATACTCAATTGATCCAATAACTTGACCAACGGAACAAGTTACCCTAGGGATAACA +GCGCAATCCTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGATCAGGACATCCCG +ATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGATTAAAGTCCTACGTGATCTGAGTTCAGACCGG +AGTAATCCAGGTCGGTTTCTATCTACNTTCAAATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCCT +ACTTCACAAAGCGCCTTCCCCCGTAAATGATATCATCTCAACTTAGTATTATACCCACACCCACCCAAGA +ACAGGGTTTGTTAAGATGGCAGAGCCCGGTAATCGCATAAAACTTAAAACTTTACAGTCAGAGGTTCAAT +TCCTCTTCTTAACAACATACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAATCGCAATGGCA +TTCCTAATGCTTACCGAACGAAAAATTCTAGGCTATATACAACTACGCAAAGGCCCCAACGTTGTAGGCC +CCTACGGGCTACTACAACCCTTCGCTGACGCCATAAAACTCTTCACCAAAGAGCCCCTAAAACCCGCCAC +ATCTACCATCACCCTCTACATCACCGCCCCGACCTTAGCTCTCACCATCGCTCTTCTACTATGAACCCCC +CTCCCCATACCCAACCCCCTGGTCAACCTCAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAG +CCGTTTACTCAATCCTCTGATCAGGGTGAGCATCAAACTCAAACTACGCCCTGATCGGCGCACTGCGAGC +AGTAGCCCAAACAATCTCATATGAAGTCACCCTAGCCATCATTCTACTATCAACATTACTAATAAGTGGC +TCCTTTAACCTCTCCACCCTTATCACAACACAAGAACACCTCTGATTACTCCTGCCATCATGACCCTTGG +CCATAATATGATTTATCTCCACACTAGCAGAGACCAACCGAACCCCCTTCGACCTTGCCGAAGGGGAGTC +CGAACTAGTCTCAGGCTTCAACATCGAATACGCCGCAGGCCCCTTCGCCCTATTCTTCATAGCCGAATAC +ACAAACATTATTATAATAAACACCCTCACCACTACAATCTTCCTAGGAACAACATATGACGCACTCTCCC +CTGAACTCTACACAACATATTTTGTCACCAAGACCCTACTTCTAACCTCCCTGTTCTTATGAATTCGAAC +AGCATACCCCCGATTCCGCTACGACCAACTCATACACCTCCTATGAAAAAACTTCCTACCACTCACCCTA +GCATTACTTATATGATATGTCTCCATACCCATTACAATCTCCAGCATTCCCCCTCAAACCTAAGAAATAT +GTCTGATAAAAGAGTTACTTTGATAGAGTAAATAATAGGAGCTTAAACCCCCTTATTTCTAGGACTATGA +GAATCGAACCCATCCCTGAGAATCCAAAATTCTCCGTGCCACCTATCACACCCCATCCTAAAGTAAGGTC +AGCTAAATAAGCTATCGGGCCCATACCCCGAAAATGTTGGTTATACCCTTCCCGTACTAATTAATCCCCT +GGCCCAACCCGTCATCTACTCTACCATCTTTGCAGGCACACTCATCACAGCGCTAAGCTCGCACTGATTT +TTTACCTGAGTAGGCCTAGAAATAAACATGCTAGCTTTTATTCCAGTTCTAACCAAAAAAATAAACCCTC +GTTCCACAGAAGCTGCCATCAAGTATTTCCTCACGCAAGCAACCGCATCCATAATCCTTCTAATAGCTAT +CCTCTTCAACAATATACTCTCCGGACAATGAACCATAACCAATACTACCAATCAATACTCATCATTAATA +ATCATAATAGCTATAGCAATAAAACTAGGAATAGCCCCCTTTCACTTCTGAGTCCCAGAGGTTACCCAAG +GCACCCCTCTGACATCCGGCCTGCTTCTTCTCACATGACAAAAACTAGCCCCCATCTCAATCATATACCA +AATCTCTCCCTCACTAAACGTAAGCCTTCTCCTCACTCTCTCAATCTTATCCATCATAGCAGGCAGTTGA +GGTGGATTAAACCAAACCCAGCTACGCAAAATCTTAGCATACTCCTCAATTACCCACATAGGATGAATAA +TAGCAGTTCTACCGTACAACCCTAACATAACCATTCTTAATTTAACTATTTATATTATCCTAACTACTAC +CGCATTCCTACTACTCAACTTAAACTCCAGCACCACGACCCTACTACTATCTCGCACCTGAAACAAGCTA +ACATGACTAACACCCTTAATTCCATCCACCCTCCTCTCCCTAGGAGGCCTGCCCCCGCTAACCGGCTTTT +TGCCCAAATGGGCCATTATCGAAGAATTCACAAAAAACAATAGCCTCATCATCCCCACCATCATAGCCAC +CATCACCCTCCTTAACCTCTACTTCTACCTACGCCTAATCTACTCCACCTCAATCACACTACTCCCCATA +TCTAACAACGTAAAAATAAAATGACAGTTTGAACATACAAAACCCACCCCATTCCTCCCCACACTCATCG +CCCTTACCACGCTACTCCTACCTATCTCCCCTTTTATACTAATAATCTTATAGAAATTTAGGTTAAATAC +AGACCAAGAGCCTTCAAAGCCCTCAGTAAGTTGCAATACTTAATTTCTGTAACAGCTAAGGACTGCAAAA +CCCCACTCTGCATCAACTGAACGCAAATCAGCCACTTTAATTAAGCTAAGCCCTTACTAGACCAATGGGA +CTTAAACCCACAAACACTTAGTTAACAGCTAAGCACCCTAATCAACTGGCTTCAATCTACTTCTCCCGCC +GCCGGGAAAAAAGGCGGGAGAAGCCCCGGCAGGTTTGAAGCTGCTTCTTCGAATTTGCAATTCAATATGA +AAATCACCTCGGAGCTGGTAAAAAGAGGCCTAACCCCTGTCTTTAGATTTACAGTCCAATGCTTCACTCA +GCCATTTTACCTCACCCCCACTGATGTTCGCCGACCGTTGACTATTCTCTACAAACCACAAAGACATTGG +AACACTATACCTATTATTCGGCGCATGAGCTGGAGTCCTAGGCACAGCTCTAAGCCTCCTTATTCGAGCC +GAGCTGGGCCAGCCAGGCAACCTTCTAGGTAACGACCACATCTACAACGTTATCGTCACAGCCCATGCAT +TTGTAATAATCTTCTTCATAGTAATACCCATCATAATCGGAGGCTTTGGCAACTGACTAGTTCCCCTAAT +AATCGGTGCCCCCGATATGGCGTTTCCCCGCATAAACAACATAAGCTTCTGACTCTTACCTCCCTCTCTC +CTACTCCTGCTCGCATCTGCTATAGTGGAGGCCGGAGCAGGAACAGGTTGAACAGTCTACCCTCCCTTAG +CAGGGAACTACTCCCACCCTGGAGCCTCCGTAGACCTAACCATCTTCTCCTTACACCTAGCAGGTGTCTC +CTCTATCTTAGGGGCCATCAATTTCATCACAACAATTATCAATATAAAACCCCCTGCCATAACCCAATAC +CAAACGCCCCTCTTCGTCTGATCCGTCCTAATCACAGCAGTCCTACTTCTCCTATCTCTCCCAGTCCTAG +CTGCTGGCATCACTATACTACTAACAGACCGCAACCTCAACACCACCTTCTTCGACCCCGCCGGAGGAGG +AGACCCCATTCTATACCAACACCTATTCTGATTTTTCGGTCACCCTGAAGTTTATATTCTTATCCTACCA +GGCTTCGGAATAATCTCCCATATTGTAACTTACTACTCCGGAAAAAAAGAACCATTTGGATACATAGGTA +TGGTCTGAGCTATGATATCAATTGGCTTCCTAGGGTTTATCGTGTGAGCACACCATATATTTACAGTAGG +AATAGACGTAGACACACGAGCATATTTCACCTCCGCTACCATAATCATCGCTATCCCCACCGGCGTCAAA +GTATTTAGCTGACTCGCCACACTCCACGGAAGCAATATGAAATGATCTGCTGCAGTGCTCTGAGCCCTAG +GATTCATCTTTCTTTTCACCGTAGGTGGCCTGACTGGCATTGTATTAGCAAACTCATCACTAGACATCGT +ACTACACGACACGTACTACGTTGTAGCCCACTTCCACTATGTCCTATCAATAGGAGCTGTATTTGCCATC +ATAGGAGGCTTCATTCACTGATTTCCCCTATTCTCAGGCTACACCCTAGACCAAACCTACGCCAAAATCC +ATTTCACTATCATATTCATCGGCGTAAATCTAACTTTCTTCCCACAACACTTTCTCGGCCTATCCGGAAT +GCCCCGACGTTACTCGGACTACCCCGATGCATACACCACATGAAACATCCTATCATCTGTAGGCTCATTC +ATTTCTCTAACAGCAGTAATATTAATAATTTTCATGATTTGAGAAGCCTTCGCTTCGAAGCGAAAAGTCC +TAATAGTAGAAGAACCCTCCATAAACCTGGAGTGACTATATGGATGCCCCCCACCCTACCACACATTCGA +AGAACCCGTATACATAAAATCTAGACAAAAAAGGAAGGAATCGAACCCCCCAAAGCTGGTTTCAAGCCAA +CCCCATGGCCTCCATGACTTTTTCAAAAAGGTATTAGAAAAACCATTTCATAACTTTGTCAAAGTTAAAT +TATAGGCTAAATCCTATATATCTTAATGGCACATGCAGCGCAAGTAGGTCTACAAGACGCTACTTCCCCT +ATCATAGAAGAGCTTATCACCTTTCATGATCACGCCCTCATAATCATTTTCCTTATCTGCTTCCTAGTCC +TGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACATCTCAGACGCTCAGGAAATAGA +AACCGTCTGAACTATCCTGCCCGCCATCATCCTAGTCCTCATCGCCCTCCCATCCCTACGCATCCTTTAC +ATAACAGACGAGGTCAACGATCCCTCCCTTACCATCAAATCAATTGGCCACCAATGGTACTGAACCTACG +AGTACACCGACTACGGCGGACTAATCTTCAACTCCTACATACTTCCCCCATTATTCCTAGAACCAGGCGA +CCTGCGACTCCTTGACGTTGACAATCGAGTAGTACTCCCGATTGAAGCCCCCATTCGTATAATAATTACA +TCACAAGACGTCTTGCACTCATGAGCTGTCCCCACATTAGGCTTAAAAACAGATGCAATTCCCGGACGTC +TAAACCAAACCACTTTCACCGCTACACGACCGGGGGTATACTACGGTCAATGCTCTGAAATCTGTGGAGC +AAACCACAGTTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAATAGGGCCCGTATTT +ACCCTATAGCACCCCCTCTACCCCCTCTAGAGCCCACTGTAAAGCTAACTTAGCATTAACCTTTTAAGTT +AAAGATTAAGAGAACCAACACCTCTTTACAGTGAAATGCCCCAACTAAATACTACCGTATGGCCCACCAT +AATTACCCCCATACTCCTTACACTATTCCTCATCACCCAACTAAAAATATTAAACACAAACTACCACCTA +CCTCCCTCACCAAAGCCCATAAAAATAAAAAATTATAACAAACCCTGAGAACCAAAATGAACGAAAATCT +GTTCGCTTCATTCATTGCCCCCACAATCCTAGGCCTACCCGCCGCAGTACTGATCATTCTATTTCCCCCT +CTATTGATCCCCACCTCCAAATATCTCATCAACAACCGACTAATCACCACCCAACAATGACTAATCAAAC +TAACCTCAAAACAAATGATAACCATACACAACACTAAAGGACGAACCTGATCTCTTATACTAGTATCCTT +AATCATTTTTATTGCCACAACTAACCTCCTCGGACTCCTGCCTCACTCATTTACACCAACCACCCAACTA +TCTATAAACCTAGCCATGGCCATCCCCTTATGAGCGGGCACAGTGATTATAGGCTTTCGCTCTAAGATTA +AAAATGCCCTAGCCCACTTCTTACCACAAGGCACACCTACACCCCTTATCCCCATACTAGTTATTATCGA +AACCATCAGCCTACTCATTCAACCAATAGCCCTGGCCGTACGCCTAACCGCTAACATTACTGCAGGCCAC +CTACTCATGCACCTAATTGGAAGCGCCACCCTAGCAATATCAACCATTAACCTTCCCTCTACACTTATCA +TCTTCACAATTCTAATTCTACTGACTATCCTAGAAATCGCTGTCGCCTTAATCCAAGCCTACGTTTTCAC +ACTTCTAGTAAGCCTCTACCTGCACGACAACACATAATGACCCACCAATCACATGCCTATCATATAGTAA +AACCCAGCCCATGACCCCTAACAGGGGCCCTCTCAGCCCTCCTAATGACCTCCGGCCTAGCCATGTGATT +TCACTTCCACTCCATAACGCTCCTCATACTAGGCCTACTAACCAACACACTAACCATATACCAATGATGG +CGCGATGTAACACGAGAAAGCACATACCAAGGCCACCACACACCACCTGTCCAAAAAGGCCTTCGATACG +GGATAATCCTATTTATTACCTCAGAAGTTTTTTTCTTCGCAGGATTTTTCTGAGCCTTTTACCACTCCAG +CCTAGCCCCTACCCCCCAATTAGGAGGGCACTGGCCCCCAACAGGCATCACCCCGCTAAATCCCCTAGAA +GTCCCACTCCTAAACACATCCGTATTACTCGCATCAGGAGTATCAATCACCTGAGCTCACCATAGTCTAA +TAGAAAACAACCGAAACCAAATAATTCAAGCACTGCTTATTACAATTTTACTGGGTCTCTATTTTACCCT +CCTACAAGCCTCAGAGTACTTCGAGTCTCCCTTCACCATTTCCGACGGCATCTACGGCTCAACATTTTTT +GTAGCCACAGGCTTCCACGGACTTCACGTCATTATTGGCTCAACTTTCCTCACTATCTGCTTCATCCGCC +AACTAATATTTCACTTTACATCCAAACATCACTTTGGCTTCGAAGCCGCCGCCTGATACTGGCATTTTGT +AGATGTGGTTTGACTATTTCTGTATGTCTCCATCTATTGATGAGGGTCTTACTCTTTTAGTATAAATAGT +ACCGTTAACTTCCAATTAACTAGTTTTGACAACATTCAAAAAAGAGTAATAAACTTCGCCTTAATTTTAA +TAATCAACACCCTCCTAGCCTTACTACTAATAATTATTACATTTTGACTACCACAACTCAACGGCTACAT +AGAAAAATCCACCCCTTACGAGTGCGGCTTCGACCCTATATCCCCCGCCCGCGTCCCTTTCTCCATAAAA +TTCTTCTTAGTAGCTATTACCTTCTTATTATTTGATCTAGAAATTGCCCTCCTTTTACCCCTACCATGAG +CCCTACAAACAACTAACCTGCCACTAATAGTTATGTCATCCCTCTTATTAATCATCATCCTAGCCCTAAG +TCTGGCCTATGAGTGACTACAAAAAGGATTAGACTGAACCGAATTGGTATATAGTTTAAACAAAACGAAT +GATTTCGACTCATTAAATTATGATAATCATATTTACCAAATGCCCCTCATTTACATAAATATTATACTAG +CATTTACCATCTCACTTCTAGGAATACTAGTATATCGCTCACACCTCATATCCTCCCTACTATGCCTAGA +AGGAATAATACTATCGCTGTTCATTATAGCTACTCTCATAACCCTCAACACCCACTCCCTCTTAGCCAAT +ATTGTGCCTATTGCCATACTAGTCTTTGCCGCCTGCGAAGCAGCGGTGGGCCTAGCCCTACTAGTCTCAA +TCTCCAACACATATGGCCTAGACTACGTACATAACCTAAACCTACTCCAATGCTAAAACTAATCGTCCCA +ACAATTATATTACTACCACTGACATGACTTTCCAAAAAACACATAATTTGAATCAACACAACCACCCACA +GCCTAATTATTAGCATCATCCCTCTACTATTTTTTAACCAAATCAACAACAACCTATTTAGCTGTTCCCC +AACCTTTTCCTCCGACCCCCTAACAACCCCCCTCCTAATACTAACTACCTGACTCCTACCCCTCACAATC +ATGGCAAGCCAACGCCACTTATCCAGTGAACCACTATCACGAAAAAAACTCTACCTCTCTATACTAATCT +CCCTACAAATCTCCTTAATTATAACATTCACAGCCACAGAACTAATCATATTTTATATCTTCTTCGAAAC +CACACTTATCCCCACCTTGGCTATCATCACCCGATGAGGCAACCAGCCAGAACGCCTGAACGCAGGCACA +TACTTCCTATTCTACACCCTAGTAGGCTCCCTTCCCCTACTCATCGCACTAATTTACACTCACAACACCC +TAGGCTCACTAAACATTCTACTACTCACTCTCACTGCCCAAGAACTATCAAACTCCTGAGCCAACAACTT +AATATGACTAGCTTACACAATAGCTTTTATAGTAAAGATACCTCTTTACGGACTCCACTTATGACTCCCT +AAAGCCCATGTCGAAGCCCCCATCGCTGGGTCAATAGTACTTGCCGCAGTACTCTTAAAACTAGGCGGCT +ATGGTATAATACGCCTCACACTCATTCTCAACCCCCTGACAAAACACATAGCCTACCCCTTCCTTGTACT +ATCCCTATGAGGCATAATTATAACAAGCTCCATCTGCCTACGACAAACAGACCTAAAATCGCTCATTGCA +TACTCTTCAATCAGCCACATAGCCCTCGTAGTAACAGCCATTCTCATCCAAACCCCCTGAAGCTTCACCG +GCGCAGTCATTCTCATAATCGCCCACGGGCTTACATCCTCATTACTATTCTGCCTAGCAAACTCAAACTA +CGAACGCACTCACAGTCGCATCATAATCCTCTCTCAAGGACTTCAAACTCTACTCCCACTAATAGCTTTT +TGATGACTTCTAGCAAGCCTCGCTAACCTCGCCTTACCCCCCACTATTAACCTACTGGGAGAACTCTCTG +TGCTAGTAACCACGTTCTCCTGATCAAATATCACTCTCCTACTTACAGGACTCAACATACTAGTCACAGC +CCTATACTCCCTCTACATATTTACCACAACACAATGGGGCTCACTCACCCACCACATTAACAACATAAAA +CCCTCATTCACACGAGAAAACACCCTCATGTTCATACACCTATCCCCCATTCTCCTCCTATCCCTCAACC +CCGACATCATTACCGGGTTTTCCTCTTGTAAATATAGTTTAACCAAAACATCAGATTGTGAATCTGACAA +CAGAGGCTTACGACCCCTTATTTACCGAGAAAGCTCACAAGAACTGCTAACTCATGCCCCCATGTCTAAC +AACATGGCTTTCTCAACTTTTAAAGGATAACAGCTATCCATTGGTCTTAGGCCCCAAAAATTTTGGTGCA +ACTCCAAATAAAAGTAATAACCATGCACACTACTATAACCACCCTAACCCTGACTTCCCTAATTCCCCCC +ATCCTTACCACCCTCGTTAACCCTAACAAAAAAAACTCATACCCCCATTATGTAAAATCCATTGTCGCAT +CCACCTTTATTATCAGTCTCTTCCCCACAACAATATTCATGTGCCTAGACCAAGAAGTTATTATCTCGAA +CTGACACTGAGCCACAACCCAAACAACCCAGCTCTCCCTAAGCTTCAAACTAGACTACTTCTCCATAATA +TTCATCCCTGTAGCATTGTTCGTTACATGGTCCATCATAGAATTCTCACTGTGATATATAAACTCAGACC +CAAACATTAATCAGTTCTTCAAATATCTACTCATCTTCCTAATTACCATACTAATCTTAGTTACCGCTAA +CAACCTATTCCAACTGTTCATCGGCTGAGAGGGCGTAGGAATTATATCCTTCTTGCTCATCAGTTGATGA +TACGCCCGAGCAGATGCCAACACAGCAGCCATTCAAGCAATCCTATACAACCGTATCGGCGATATCGGTT +TCATCCTCGCCTTAGCATGATTTATCCTACACTCCAACTCATGAGACCCACAACAAATAGCCCTTCTAAA +CGCTAATCCAAGCCTCACCCCACTACTAGGCCTCCTCCTAGCAGCAGCAGGCAAATCAGCCCAATTAGGT +CTCCACCCCTGACTCCCCTCAGCCATAGAAGGCCCCACCCCAGTCTCAGCCCTACTCCACTCAAGCACTA +TAGTTGTAGCAGGAATCTTCTTACTCATCCGCTTCCACCCCCTAGCAGAAAATAGCCCACTAATCCAAAC +TCTAACACTATGCTTAGGCGCTATCACCACTCTGTTCGCAGCAGTCTGCGCCCTTACACAAAATGACATC +AAAAAAATCGTAGCCTTCTCCACTTCAAGTCAACTAGGACTCATAATAGTTACAATCGGCATCAACCAAC +CACACCTAGCATTCCTGCACATCTGTACCCACGCCTTCTTCAAAGCCATACTATTTATGTGCTCCGGGTC +CATCATCCACAACCTTAACAATGAACAAGATATTCGAAAAATAGGAGGACTACTCAAAACCATACCTCTC +ACTTCAACCTCCCTCACCATTGGCAGCCTAGCATTAGCAGGAATACCTTTCCTCACAGGTTTCTACTCCA +AAGACCACATCATCGAAACCGCAAACATATCATACACAAACGCCTGAGCCCTATCTATTACTCTCATCGC +TACCTCCCTGACAAGCGCCTATAGCACTCGAATAATTCTTCTCACCCTAACAGGTCAACCTCGCTTCCCC +ACCCTTACTAACATTAACGAAAATAACCCCACCCTACTAAACCCCATTAAACGCCTGGCAGCCGGAAGCC +TATTCGCAGGATTTCTCATTACTAACAACATTTCCCCCGCATCCCCCTTCCAAACAACAATCCCCCTCTA +CCTAAAACTCACAGCCCTCGCTGTCACTTTCCTAGGACTTCTAACAGCCCTAGACCTCAACTACCTAACC +AACAAACTTAAAATAAAATCCCCACTATGCACATTTTATTTCTCCAACATACTCGGATTCTACCCTAGCA +TCACACACCGCACAATCCCCTATCTAGGCCTTCTTACGAGCCAAAACCTGCCCCTACTCCTCCTAGACCT +AACCTGACTAGAAAAGCTATTACCTAAAACAATTTCACAGCACCAAATCTCCACCTCCATCATCACCTCA +ACCCAAAAAGGCATAATTAAACTTTACTTCCTCTCTTTCTTCTTCCCACTCATCCTAACCCTACTCCTAA +TCACATAACCTATTCCCCCGAGCAATCTCAATTACAATATATACACCAACAAACAATGTTCAACCAGTAA +CTACTACTAATCAACGCCCATAATCATACAAAGCCCCCGCACCAATAGGATCCTCCCGAATCAACCCTGA +CCCCTCTCCTTCATAAATTATTCAGCTTCCTACACTATTAAAGTTTACCACAACCACCACCCCATCATAC +TCTTTCACCCACAGCACCAATCCTACCTCCATCGCTAACCCCACTAAAACACTCACCAAGACCTCAACCC +CTGACCCCCATGCCTCAGGATACTCCTCAATAGCCATCGCTGTAGTATATCCAAAGACAACCATCATTCC +CCCTAAATAAATTAAAAAAACTATTAAACCCATATAACCTCCCCCAAAATTCAGAATAATAACACACCCG +ACCACACCGCTAACAATCAATACTAAACCCCCATAAATAGGAGAAGGCTTAGAAGAAAACCCCACAAACC +CCATTACTAAACCCACACTCAACAGAAACAAAGCATACATCATTATTCTCGCACGGACTACAACCACGAC +CAATGATATGAAAAACCATCGTTGTATTTCAACTACAAGAACACCAATGACCCCAATACGCAAAACTAAC +CCCCTAATAAAATTAATTAACCACTCATTCATCGACCTCCCCACCCCATCCAACATCTCCGCATGATGAA +ACTTCGGCTCACTCCTTGGCGCCTGCCTGATCCTCCAAATCACCACAGGACTATTCCTAGCCATGCACTA +CTCACCAGACGCCTCAACCGCCTTTTCATCAATCGCCCACATCACTCGAGACGTAAATTATGGCTGAATC +ATCCGCTACCTTCACGCCAATGGCGCCTCAATATTCTTTATCTGCCTCTTCCTACACATCGGGCGAGGCC +TATATTACGGATCATTTCTCTACTCAGAAACCTGAAACATCGGCATTATCCTCCTGCTTGCAACTATAGC +AACAGCCTTCATAGGCTATGTCCTCCCGTGAGGCCAAATATCATTCTGAGGGGCCACAGTAATTACAAAC +TTACTATCCGCCATCCCATACATTGGGACAGACCTAGTTCAATGAATCTGAGGAGGCTACTCAGTAGACA +GTCCCACCCTCACACGATTCTTTACCTTTCACTTCATCTTGCCCTTCATTATTGCAGCCCTAGCAACACT +CCACCTCCTATTCTTGCACGAAACGGGATCAAACAACCCCCTAGGAATCACCTCCCATTCCGATAAAATC +ACCTTCCACCCTTACTACACAATCAAAGACGCCCTCGGCTTACTTCTCTTCCTTCTCTCCTTAATGACAT +TAACACTATTCTCACCAGACCTCCTAGGCGACCCAGACAATTATACCCTAGCCAACCCCTTAAACACCCC +TCCCCACATCAAGCCCGAATGATATTTCCTATTCGCCTACACAATTCTCCGATCCGTCCCTAACAAACTA +GGAGGCGTCCTTGCCCTATTACTATCCATCCTCATCCTAGCAATAATCCCCATCCTCCATATATCCAAAC +AACAAAGCATAATATTTCGCCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCTCATTCT +AACCTGAATCGGAGGACAACCAGTAAGCTACCCTTTTACCATCATTGGACAAGTAGCATCCGTACTATAC +TTCACAACAATCCTAATCCTAATACCAACTATCTCCCTAATTGAAAACAAAATACTCAAATGGGCCTGTC +CTTGTAGTATAAACTAATACACCAGTCTTGTAAACCGGAGATGAAAACCTTTTTCCAAGGACAAATCAGA +GAAAAAGTCTTTAACTCCACCATTAGCACCCAAAGCTAAGATTCTAATTTAAACTATTCTCTGTTCTTTC +ATGGGGAAGCAGATTTGGGTACCACCCAAGTATTGACTCACCCATCAACAACCGCTATGTATTTCGTACA +TTACTGCCAGCCACCATGAATATTGTACGGTACCATAAATACTTGACCACCTGTAGTACATAAAAACCCA +ATCCACATCAAAACCCCCTCCCCATGCTTACAAGCAAGTACAGCAATCAACCCTCAACTATCACACATCA +ACTGCAACTCCAAAGCCACCCCTCACCCACTAGGATACCAACAAACCTACCCACCCTTAACAGTACATAG +TACATAAAGCCATTTACCGTACATAGCACATTACAGTCAAATCCCTTCTCGTCCCCATGGATGACCCCCC +TCAGATAGGGGTCCCTTGACCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTGCTACTCTCCTCG +CTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGTC +ATAAAGCCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATG + diff --git a/test/MT-orangA.fa b/test/MT-orangA.fa new file mode 100644 index 0000000..e1cef04 --- /dev/null +++ b/test/MT-orangA.fa @@ -0,0 +1,276 @@ +>PA#NC_002083.1X +GATCACAGGCCTATCACCCTATTAATCACTCACGGGAGCTCTCCATGCATCTGGTATTTT +TTCGGGGGGGGATGCACGCGATAGCATCGCGGGCCGCTGGAACCGGAGCACCCTATGTCG +CAGGATCTGTCTTTGATTCCTACCTCATGCCATTATTAATCGCGCCTAATATCCAATATC +CTAGCCCCACCCTCAGTGTTTGAAGCTGCTATTTAATTTATGCTAGAGGACATAAAATTA +CCAAAAAAAAATAAACGAACTCTCAACAACCCTACCCCATCAACCCAACAAAATCCAATT +TTTATCTTTAGGCTATGTGCACTTTCAACAGGCACCCCTCAACTAACACAATCTCCTTCT +TATCCCACCCACCAACCCCCCCCCCCCCTTCCTCCCTCTTTCTCCATTTTCCCCACAAAC +ACCGCTACTACCCCCACACCCCAGACCAACCCAACCCAAAAGACACCCCGCACGGTTTAT +GTAGCTTATTCTATCCAAAGCAATGCACTGAAAATGTCTCGACGGGCCCACACGCCCCAT +AAACAAATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTGAGGTTACACATGCAAGC +ATCCCCGCCCCAGTGAGTCGCCCTCCAAGTCACTCTGACTAAGAGGAGCAAGCATCAAGC +ACGCAACAGCGCAGCTCAAGACGCTCAGCCTAGCCACACCCCCACGGGAGACAGCAGTGA +TAAGTCTTTAGCAATAAACGAAAGTTCAACTAAGCTACACTAACCCCAGGGTTGGTCAAC +TTCGTGCCAGCCACCGCGGTCACACGATTAGCCCAAGTTAATAGAGATCGGCGTAGAGAG +TGTTTTAGATTCTTTTTCTCCCCAATAAAGCTAAAATTTACCTGAGTTGTAGAAAACTTA +AGCTAATACAAAATAAACTACGAAAGTGGCTTTAATATATCTGAACACACAATAGCTAAG +GCCCAAACTGGGATTAGATACCCCACTATGCTTAGCCCTAAACTTTAACAGTTAAATCAA +CAAAACTGCTCGCCAGAACACTACGAGCCACAGCTTAAAACTCAAAGGACCTGGCGGTGC +TTCATATCCCTCTAGAGGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACC +CCTTGCTCAGCCTATATACCGCCATCTTCAGCAAACCCTGATGAAGGCCACGAAGTAAGC +GCAAGCATCCACATAAAGACGTTAGGTCAAGGTGTAGCCCATGGAGTGGCAAGAAATGGG +CTACATTTTCTACTTCAGAAAACTACGATAGCCCTCATGAAACCTGAGGGTCGAAGGTGG +ATTTAGCAGTAAACTAAGAGTAGAGTGCTTAGTTGAACAGGGCCCTGAAGCGCGTACACA +CCGCCCGTCACCCTCTTCAAGTATATTTCAGGGACTACCTAACTAAAACCCCCACGCATC +TATATAGAGGAGGCAAGTCGTAACATGGTAAGCGTACTGGAAAGTGCGCTTGGACGAACC +AGAGGGTAGCTTAACACAAAGCACCCGGCTTACACCTGGGAGATTTCAATTCAACCTGGC +CCCTCTGAGCTAACCCTAGCCCCAAACCCAACCCACCCTACTACCAACCAACCCTAACCA +AACCATTCACCCAAACAAAGTATAGGCGATAGAAATTACAATCCGGCGCAATAGACACAG +TACCGTAAGGGAAAGATGAAAAAACACAACCAAGCACAACATAGCAAGGACTAACCCCTG +TACCTTTTGCATAATGAATTAACTAGAAACAACTTTGCAAGGAGAGCCAAAGCCAAGACC +CCCGAAACCAGACGAGCTACCCATAAACAGCTAAAAGAGCACACCCGTCTATGTAGCAAA +ATAGTGGGAAGATTTATGGGTAGAGGCGACAAACCTACCGAGCCTGGTGATAGCTGGTTG +TCCAAGACAGAATCTTAGTTCAACTTTAAATTTACTTACAGAACCCCTAATCCCCTCGTA +AATTTAATTGCTAGTCTAAAGAGGAACAGCTCTTTAGACACTAGGAAAAAACCTTAAAAA +GAGAGTAAAAAACACAACACCCATAGTGGGCCCAAAAGCAGCCATCAATTAAGAAAGCGT +TCAAGCTCGACACCTAAACACCAAAAAATACCAAACACAAAACTGAACTCCTTACTCCCC +ATTGGACTAATCTATTGCCCCATAGAAGAAACAATGTTAGTATAAGTAACATGAAGATAT +TCTCCCCCGCATAAGTCTACGTCAGACCGAAACATCACACTGACAATTAACGGTCCAATA +TGCATAGTTAACAAATAAACTATTATTTTTTCCCCCCGTTAATCCAACACAGGCATGCCT +ATAAGGAAAGGTTAAAAAAAGTAAAAGGAACTCGGCAAATCTCACCCCGCCTGTTTACCA +AAAACATCACCTCTAGCATTACCAGTATTAGAGGCACCGCCTGCCCGGTGACATACGTTT +AACGGCCGCGGTACCCTGACCGTGCAAAGGTAGCATAATCACTTGTTCCTTAAATGGGGA +CTTGTATGAATGGCTTCACGAGGGTTCGACTGTCTCTTACTTTTAACCAGTGAAATTGAC +CTGCCCGTGAAGAGGCGGGCATAACATAACAAGACGAGAAGACCCTATGGAGCTTCAATT +TACCAGTGCAAATAACATACAACAAGCCCACAGGCCCTAAATCACCAAACCTGCACTGAA +GATTTCGGTTGGGGCGACCTCGGAGCACAACCCAACCTCCGAGAAACACATGTTAAGACC +TCACAAGTCAAAACGAACTTCCACACACAATTGATCCAACAACTTGACCAACGGAACAAG +TTACCCTAGGGATAACAGCGCAATCCTGTTCTAGAGTCCATATCAACAACAGGGTTTACG +ACCTCGATGTTGGATCAGGACATCCTAATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTC +AACGATTAAAGTCCTACGTGATCTGAGTTCAGACCGGAGCAATCCAGGTCGGTTTCTATC +TATTTCACATTTCTCCCTGTACGAAAGGACAAGAGAAATGGGGCCTACTTCACATAAGCG +CCTTTCCCAAACAAATGATATCATCTCAATTTAACACCACACCAACACCCACCCAAGAAA +AGGGCTATGTTAAGATGGCAGAGCCCGGTAACTGCATAAAATTTAAAGCTTTACAGTCAG +AGGTTCAACTCCTCTTCTTAACAATATGCCCATAATCAACCTCCTACTCCTCATTATATC +CATCCTAATCGCCATAGCATTTCTAATGCTAACCGAACGAAAAATCCTAGGCCACACACA +ACTACGCAAAGGGCCCAACATTGTGGGCCCCTACGGCTTACTACAACCCTTTGCCGACGC +CCTAAAACTATTCACCAAAGAACCCCTAAAACCCTCCACATCAACCATCACCCTTTACAT +TATTTCCCCCGCCCTAGCCCTTACCATTGCCCTCCTACTATGAACCCCCCTCCCTATGCC +CATCCCCCTAATCAACCTCAACTTAGGCCTCCTATTTATCCTAGCCGCGTCAAGCCTAAC +CGTCTACTCCATCCTCTGATCAGGATGAGCATCTAACTCAAACTACGCCCTAATCGGCGC +ATTGCGGGCGGTAGCCCAAACGATCTCATACGAAATTACCCTAGCCCTTATCCTGTTATC +AGTACTACTAATAAGCGGCTCTTTTAACCTCTCCGCCCTCATCACAACACAAGAACACTC +ATGACTACTTCTACCATCATGACCTCTAGCCCTAATATGATTTATTTCAACACTAGCAGA +AACCAACCGAGCCCCCTTCGACCTCACCGAAGGAGAATCCGAACTAGTTTCGGGCTTTAA +CACTGAATACGCCGCAGGTCCATTCGCCCTATTCTTCATAGCCGAATATACAAACATTAT +CTTAATAAACGCCCTCACCACTATAATTTTCCTAGGAACAACATTCAACATCCACTCCCC +AGAACTCTACACAACCCTCTTCACCATCAAAACCCTACTCCTAACCTCCCTATTCCTATG +AATTCGATCAACATACCCCCGATTCCGCTACGACCAACTCATGCACCTTCTATGAAAAAA +TTTCCTGCCACTCACCCTAGCACTACTAATATGACACATCTCCGTACCCATTGCAACCTC +CGGCATTCCCCCACAAACCTAAGAAATATGTCTGACAAAAGAGTTACTTTGATAGAGTAA +AAAATAGAGGTCTAAATCCCCTTATTTCTAGGATTATGGGAGTTGAACCCACCCCTGAGA +ATCCAAAATTCTCCGTGCCACCCATCACACCCTATCCTAAAGTAAGGTCAGCTAAATAAG +CTATCGGGCCCATACCCCGAAAATGTTGGTTATACCCTTCCCGTACTAATTAACCCCTTG +GCCCAACCCATCATTTACCCCACCATCTTCACAGGCACGCTCATTACAGCACTGAGCTCC +CACTGATTCTTTGCCTGACTGGGACTAGAAATAAATATACTCGCTTTCATCCCAGTCCTA +ACCAAAAAAACAAGCCCCCGCTCCACAGAAGCCGCCATTAAATATTTCCTCACACAGGCA +ACCGCATCCATAATCCTCCTGATAGCCATCCTCTACAACAACATACTTTCCGGACAGTGA +ACCACAACCAACACCACCAACCCATATTCATCTCTAATAATCGTAACCGCCCTAGCAATG +AAGCTAGGAATAGCCCCCTTCCACTTTTGAGTCCCAGAAGTCACCCAAGGAGTCCCCCTG +ACATCCGGCTTACTCCTCCTTACATGACAAAAATTAGCCCCCATTTCAATTATATACCAA +ATATCTTCATCGGTAGACACAAACATCCTCCTCACCCTCTCAATTCTATCTATCCTAGTA +GGCGGCTGAGGCGGACTAAACCAAACCCAACTACGCAAAATCCTGGCATACTCCTCAATC +ACCCATATAGGATGAATAATAGCAGTACTACCATATAACCCAGACATCACTATCCTCAAC +CTAATCATCTACATCATCCTGACAACTACCGCATTCCTAATCCTCGACTTAAACTCTAGT +GTCACAATCCTAATATTAACCCGCACCTGGAACAAGCTGACATGACTAATACCCTTAATC +CCATCAACCTTATTATCCCTAGGGGGCCTGCCACCACTAACCGGCTTCCTGCCCAAATGA +GCCATCATTGAAGAATTTGCAAAAAATGGCAATCTCATTACCCCCACAATCATGGCTATT +ATCACCCTCCTCAACCTCTACTTCTACGTACGCCTAATCTACGCCACCTCAATCACACTA +CTCCCCATATCTAACAACGCAAAAATGAAATGACAGTTCGAAAACACAAAACCCACCCCT +CTTCTCCCCACACTCACCATTCTTACCACCCTACTCCTACCTATCTCCCCTCTCATCCTA +TCTATCTCATAGAAATTTAGGTTAACACAGACCAAGAGCCTTCAAAGCCCTCAGCAAGTC +ACAGCACTTAATTTCTGTAACACTAAGGACTGCAAAGCCCCGCTCTGCATCAACTGAACG +CAAACCAGCCACTTTAATTAAGCTAAGCCCTCCCTAGACCGATGGGACTTAAACCCACAA +ACATTTAGTTAACAGCTAAACACCCTAATCAATTGGCTTCAGTCCACTTCTCCCGCCGCG +GGGAAAAAGGCGGGAGAAGCCCCGGCAGGCCTTAAAGCTGCTCCTTCGAATTTGCAATTC +AACATGACAATCACCTCGGGGCTGGTAAAAAGAGGTCTAACCCCTGTTCTTAGATTTACA +GCCTAATGCCTTAACTCGGCCATTTTACCCCCCCCCCCCCTTTTTTTCTCCACTAATGTT +CGCCGACCGCTGGCTATTCTCCACGAACCACAAAGACATCGGGACACTATACCTGTTATT +CGGCGCATGGGCTGGAGTCCTAGGCACTGCCCTAAGCCTCCTCATTCGAGCTGAACTGGG +CCAACCCGGCAACCTTCTAGGCAATGACCATATCTACAATGTCATCGTCACAGCTCATGC +ATTCGTAATAATTTTCTTTATAGTCATACCCATTATAATTGGAGGCTTTGGCAACTGACT +AGTGCCCCTAATAATCGGCGCCCCCGATATAGCATTCCCGCGCATAAATAATATAAGCTT +CTGACTCCTCCCCCCCTCCTTTCTCCTACTGCTCGCTTCTGCTACAGTAGAGGCTGGCGC +AGGAACAGGCTGAACAGTCTATCCGCCCCTAGCAGGAAACTACTCTCACCCAGGAGCCTC +TGTAGACTTAACAATCTTCTCTTTACACCTAGCAGGCATTTCCTCTATCCTAGGAGCTAT +CAATTTCATCACAACAATTATTAATATAAAACCCCCTGCAATATCCCAATACCAAACCCC +CCTCTTCGTCTGATCAGTCTTGATCACAGCAGTCCTACTTCTCCTTTCCCTCCCAGTCCT +AGCCGCTGGCATCACCATACTACTAACAGATCGCAACCTAAACACCACATTCTTTGACCC +AGCCGGAGGTGGAGATCCCATCCTATATCAGCACCTATTCTGATTTTTTGGCCACCCTGA +AGTCTACATTCTCATCCTGCCGGGTTTCGGCATAATCTCCCACATCGTAACACACTATTC +CGGAAAAGAAGAGCCATTTGGGTACATAGGCATAGTCTGAGCCATAGTCTCAATTGGCTT +CCTGGGCTTTATCGTATGGGCCCACCACATATTCACAGTAGGAATAGACGTGGACACACG +AGCCTACTTCACCTCCGCTACCATAATCATTGCCATCCCCACCGGCGTCAAAGTATTTAG +CTGACTCGCTACACTCCACGGAAGCAACACTAAATGATCTGCCGCAATCCTCTGAGCCTT +AGGATTCATTTTCCTCTTCACCGTAGGCGGCCTAACAGGCATCGTACTAGCAAACTCATC +ACTAGACATTGTATTACACGATACATACTACGTTGTAGCCCACTTTCATTACGTCCTATC +AATAGGAGCTGTATTCGCCATCATGGGAGGCTTCATCCACTGGTTCCCACTATTCTCAGG +CTACACCTTAGACCAGACCTATGCTAAAATTCACTTCATCACCATATTTATCGGCGTAAA +TTTAACTTTCTTCCCACAACATTTCCTCGGCCTGTCAGGCATACCCCGACGCTACTCCGA +CTACCCCGACGCGTACACCACCTGAAATATTTTATCATCCGCAGGCTCATTTATCTCCCT +AACAGCAGTCATACTAATAATTTTCATAATTTGAGAAGCCTTCGCCTCAAAACGAAAAGT +CCCAATAGTTGAACAACCCTCCACAAGCCTAGAGTGATTGTACGGATGCCCCCCACCCTA +CCACACATTTGAAGAACCCGTCTATATAAAACCAGAACAAAAAAGGAAGGAATCGAACCT +CCTAAAGCTGGTTTCAAGCCAACCCCACAACCTCCATGACTTTTTCAAGAGATACTAGAA +AAACCATTTCATGACTTTGTCAAAGTTAAGTTACAGGCCAAACCCTGTGTATCTTAATGG +CGCACGCAGCACAGGTAGGTTTACAAGACGCTACCTCTCCTATCATAGAAGAATTGGTCA +TCTTTCACGACCACGCCCTCATAATCATTTTCCTAATCTGCTTCCTAGTCCTGTACGCCC +TATTCCTAACACTCACAACAAAACTCACCAACACCAGCATCTCAGACGCCCAAGAGATAG +AGACTATTTGAACTATCCTACCGGCCATCATCCTAATTCTAATCGCCCTCCCATCCCTAC +GCATCCTCTACTTAACAGACGAGATCAACGACCCTTCCTTCACCATCAAATCAATCGGTC +ATCAATGATACTGAACCTACGAGTACACTGACTACGGTGGATTGATCTTCAACTCTTACA +TGCTCCCACCACTATTCCTAGAACCAGGCGACCTTCGACTCCTCGACGTCGACAACCGAG +TAGTCCTCCCAGTCGAAGCTCCCGTTCGCATAATAATCACATCCCAAGACGTCTTACACT +CATGAACTGTACCCTCACTAGGCCTGAAAACGGACGCAATCCCCGGACGCCTAAACCAAA +CCACATTCACTGCCACGCGACCAGGAGTGTACTATGGCCAATGCTCAGAAATCTGTGGAG +CTAACCACAGCTTTATGCCTATCGTCCTAGAACTAATCCCCCTAAAAATCTTCGAAATAG +GGCCCGTATTCACTTTATAACTTCCCCCACCCCCACAACCCATCCTACCCCCTTTCCTGA +GGCCCACTGCAAAGCTAATCTAGCATTAACCTTTTAAGTTAAAGACTAAGAGAATCAACC +CCTCTTTGCAGTGAAATGCCCCAACTAAATACCACCACATGGCCCACCATCATCACCCCA +ATACTCCTTGCACTATTCCTCATCACTCAACTAAAACTACTAAACTCACACCTCCACCCA +CCCACCCCACCAAAATTCACTAAACCAAAACTCCACGCCAAACCCTGAGGACCAAAATGA +ACGAAAGTCTATTTACCCCATTCATTACCCCCACAGTACTAGGCCTCCCCGCCGCAGTAC +TAGTCATCTTATTTCCCCCCTTACTGATCCCCACCTCCAAACATCTCATCAACAACCGAC +TAATTATTATCCAACAATGACTAATCCGACTCATCCTAAAACAAATAATAACCACCCATA +ACGCTAAAGGACGAACTTGATCCCTCATACTAACGTCCCTAATCATTTTCATCGCCTCAA +CCAACCTCCTAGGACTCCTCCCCTACTCATTTACACCAACCACCCAACTATCCATAAATT +TAGCTATAGCAATTCCCTTATGAGCAAGCACGGTAGCTATGGGCCTTCGCTTCAAAGCCA +AAATTACCCTAACCCACCTCTTACCACAAGGTACCCCCACACCTCTCATCCCTATACTAA +TTATTATTGAAACCGTCAGCCTTTTCATTCAACCACTAGCCTTAGCCGTACGCCTAACTG +CTAACATCACTGCAGGCCACCTACTCATGCACCTAATCGGAAGCTCTGCACTAGCTATAC +TAGCCATCAACCTCCCCCTAACCCTCATCACCCTTACAATCTTAACCCTGCTAACAATCC +TGGAGACTGCCATCGCCCTAATTCAAGCCTACGTCTTCACACTTCTAGTAAGCCTCTACC +TGCACGACAACTCATAATGGCCCATCAATCACACGCCTACCACATAGTAAAACCTAGCCC +ATGACCCCTAACAGGAGCTCTCTCAGCCCTCCTAACAACATCTGGCCTAACCATGTGATT +CCACTTCCACTCCACAACCCTACTATTAACAGGCCTACTAACCAATGCACTAACCATATA +CCAATGGTGACGAGATGTAGTGCGAGAAAGCACATACCAAGGCCACCACACACTACCCGT +CCAAAAAGGCCTCCGATATGGAATAATCCTATTCATCACTTCAGAAGTCTTTTTCTTCGC +CGGATTCTTCTGAGCATTCTACCACTCCAGCCTAGCCCCCACCCCTCAACTTGGAGGACA +CTGACCCCCAACAGGCATTATCCCCCTCAACCCCCTAGAAGTCCCACTCCTAAACACATC +CGTACTACTCGCATCAGGAGTCTCAATTACCTGAGCCCATCACAGCCTGATGGAAAATAA +TCGAACCCAAATAATTCAAGCACTACTCATCACAATCTTACTAGGCATCTACTTCACTCT +CCTTCAGGCTTCAGAATACATTGAAGCTCCTTTCACCATCTCTGACGGCATCTACGGCTC +AACATTCTTCATAGCCACGGGATTCCACGGCCTCCACGTCATTATCGGATCAACTTTCCT +CACTGTATGCCTAGCCCGCCAGCTATTATTCCACTTCACATCCAAACATCACTTTGGCTT +TGAGGCCGCCGCCTGATACTGGCACTTTGTAGACGTAGTCTGACTGTTTCTGTACGTCTC +CATCTACTGATGAGGTTCCTACTCTTTTAGTATAAACAGTACCGTTAACTTCCAATTAAC +TAGTTTTGACAACGCCCAAAAAAGAGTAATTAACTTCGTCCTAGCTCTAACAGTCAACAC +CCTCCTAGCCCTGCTACTAATAACCATCACATTCTGACTACCACAACTCTACCCCTACAT +AGAAAAATCCGACCCATACGAATGTGGATTTGACCCCGCATACCCCGCTCGCATTCCTTT +CTCCATAAAATTTTTCTTAGTAGCCATCACCTTCCTACTATTCGACCTAGAAATCGCCCT +GCTACTACCCCTGCCATGGGCCCTACAAACAACCAACTTACCACTAATAACTACATCATC +ACTTATATTAATTATCATCCTAGCCCTAGGCCTAACTTACGAATGATCACAAAAAGGATT +AGACTGAGCCGAATTGGTAAATAGTTTAAACAAAACAAATGATTTCGACTCATTAAATTA +TGACAGCCATATTTACCAAATGCCCCTTATCTACATAAATATCACACTAGCATTCACCAT +ATCACTCCTAGGCATACTAGTCTACCGCTCACACCTAATATCTTCTCTACTATGTCTAGA +AGGAATAATATTATCATTGTTCATTATAATTACTCTCATAACCCTCAACACCCACTCTCT +CCTAGCTAACATCATACCCATCACCATGCTAGTCTTCGCTGCCTGCGAAGCAGCAGTAGG +CCTCGCCCTACTAGCCTCAATCTCCAATACATACGGCCTAGACTACGTCAACAACCTAAA +CCTACTTCAATGCTAAAACTAATTATCCCAACAATCATACTGCTGCCCCTAACATGACTC +TCCAAAACGCACATAATCTGAATCAACACCACCACCCACAGCCTAATCATCAGCTCCATC +CCCCTACTATTCCTCAATCAAACCAACAGCAACCTGTACAGCTACTCCCTTCTTTTCTCC +TCCGACCCCTTATCAACCCCCCTTCTAATACTAACAACCTGACTCCTACCCCTCATAATT +ATAGCAAGCCAACACCATCTATCCAACGAACCCCCATCACGAAAAAAATTATACCTCACC +ATACTAATCTCTCTTCAAATCTCCCTAATCATAACATTCACAGCCACAGAGCTAATTATA +TTTTATATCCTCTTCGAAACCACTCTCATCCCCACCCTAGTCATTATCACCCGCTGAGGC +AACCAGCCAGAGCGCTTAAATGCAGGCACATACTTTCTATTCTACACACTAGTAGGCTCC +CTCCCCCTACTCATTGCCCTAATCCACACCTACAACACCCTAGGCTCGCTTAACATTGTA +TTACTAACTCTCACCGCCCGGGAGCTAACAGACTCCTGATCCAACAGCCTAATATGACTA +GCGTACACAATAGCTTTCATAGTAAAAATACCCCTCTACGGACTACACCTATGACTCCCT +AAAGCCCATGTAGAAGCCCCCATTGCCGGCTCAATAGTACTCGCCGCAGTGCTCTTAAAA +CTAGGTGGTTACGGTATAATACGCCTTATCCCCATTCTCAATCCCCTAACTAAACACATA +GCCTACCCCTTTATCATACTATCCCTATGAGGCATAATCATAACAAGCTCCATCTGCTTA +CGACAAACCGACCTAAAATCACTCATCGCATACTCCTCAGTCAGCCACATAGCGCTTGTT +GTAGCAGCTATCCTCATTCAAACCCCCTGAAGCTTCACCGGCGCAACCACCCTCATAATT +GCCCATGGACTCACATCCTCCCTACTGTTCTGCCTAGCAAACTCAAACTACGAACGAACC +CACAGCCGCATCATAATCCTCTCTCAAGGCCTTCAAACTCTACTCCCCCTAATAGCCCTC +TGATGACTTCTAGCAAGCCTCACTAACCTTGCCCTACCACCCACCATCAACCTACTAGGA +GAACTCTCCGTACTAATAGCCATATTCTCTTGATCTAACATCACCATCCTACTAACAGGA +CTCAACATACTAATCACAACCCTATACTCTCTCTATATATTCACCACAACACAACGAGGT +ACACCCACACATCACACCAACAACATAAAACCTTCTTTCACACGTGAAAACACCCTCATG +CTCATACACCTATCCCCCATTCTCCTCTTGTCCCTCAACCCCAGCATCATCGCTGGATTC +GCCTACTGTAAATATAGTTTAACCAAAACATCAGATTGTGAATCTAATAATAGGGCCCAC +AACCCCTTATTTACCGAGAAAGCTCACAAGAACTGCTAACTCTCACCCCATGTGTAACAA +CATGGCTTTCTCAACTTTTAAAGGATAACAGCTATCCCTTGGTCTTAGGACCCAAAAATT +TTGGTGCAACTCCAAATAAAAGTAACAGCCATGTTTACCACCATAACTGCCCTCACCTTG +ACTTCCCTAATCCCCCCCATTACCGCTACCCTCATTAACCCCAACAAAAAAAACTCATAC +CCCCACTATGTAAAAACTGCCATCGCATCCGCCTTTACTATCAGCCTTATCCCAACAACA +ATATTTATCTGCCTAGGACAAGAAACCATCGTCACAAACTGATGCTGAACAACCACCCAG +ACACTACAACTCTCACTAAGCTTCAAACTTGACTACTTCTCCATAACATTCCTCCCCGTA +GCACTACTCATCACTTGATCCATTATAGAATTTTCACTATGGTATATAGCCTCAGACCCA +AACATCAACCAATTTCTCAAATTCCTCCTTATTTTCCTAATCACCATAATTATCCTAGTC +ACTGCCAATAACCTACTCCAACTCTTCATCGGCTGAGAGGGCGTAGGGATCATATCCTTC +CTGCTCATTAGTTGATGATACGCCCGAACAGACGCCAACACGGCAGCTATTCAAGCAATC +CTATACAATCGTATCGGCGATATTGGCTTCATCCTGGCTCTAGCATGATTCCTCCTACAC +TCCAACTCATGGGAACTACAACAAGTATTCCTCCTAAACAATAACCCTAACCTCCTCCCA +CTACTAGGACTCCTCCTAGCCGCAGCTGGCAAATCAGCCCAACTAGGCCTTCACCCCTGA +CTACCCTCAGCCATAGAAGGCCCAACCCCCGTCTCAGCCCTACTTCACTCAAGCACCATG +GTCGTGGCTGGGGTCTTCCTACTCATCCGCTTTCACCCATTAACAGAAAACAGCCCACAT +ATCCAAACCCTTACACTATGCTTAGGGGCCATCACCACCCTGTTCGCAGCAATCTGCGCC +CTCACACAAAACGACATTAAGAAAATCGTAGCTTTCTCCACCTCAAGTCAACTAGGACTT +ATAATGGTCACAATTGGCATTAACCAGCCACACCTGGCACTCCTCCACATCTGCACCCAC +GCCTTCTTCAAAGCCCTTTTATTCATATGTTCTGGGTCCATCATCCACAACCTCAACAAT +GAGCAAGACATCCGAAAAATAGGAGGACTACTCAAAACCATACCCCTAACCTCAACCTCC +CTCACTATCAGCAGCCTAGCCCTCGCAGGAATACCCTTCCTCTCAGGCTTCTACTCCAAA +GACCTCATTATCGAGACCGCAAACATATCCTATACCAACACCTGAGCCCTGTCTATCACT +CTCATCGCCACCTCCTTAACAGGCGCCTACAGCACTCGAATAATCCTCCACACCCTTACA +AGCAAACCCCACTTCCCAACCCCAATCTCTATCAATGAAAACAACCCCACTCTACTTAAA +CCCATCAAGCGCCTTATGCTAGGAAGCCTATTCGCAGGATTCCTAATCACCAACAACATC +CCCCCTATATCCCTGCCCCAAGTAACAACCCCCCCTTACCTAAAACTCGCAGCTCTAGCT +GCCACCCTCCTAGGTCTCCTAGTAGCCCTAGACTTAAACTACCTAGCCAACAAACTCAAG +ACAAAAACCCCTCCACCCACATTCTATTTCTCCATCATACTCGGATTCTACCCTAGCATC +ATCCACCGCATAATCCCCCACCTAAGCCTTCTCATAAGCCAAAACTTATCCCTACTCCTA +CTAGACCTAACCTGACTAAAAAAACTAATACCCAAAACAATCTCACAACACCAAACCTCA +GCCTCCATCACTATTTCAACCCAAAAAGGTTTAATCAAACTCTACTTCCTCTCTTTCCTC +ATCCCACTCCTCCTAATCCTCCTTATAATCTCATAACCTATTACCCCGAGCAATCTCAAT +TACAACATAAACACCAACAAATAACGTTCAACCAGTAACCACCACCAACCAACGCCCATA +ATCATATAAAGCCCCCGCACCAATAGGATCCTCCCGAATCAACCCCGACCCTTCCCCTTC +ATAAATTATCCAGCTCCCCACGCTATTAAAATTCACCACTACCACCACTCCATCATACTC +TTTTACCCACAACACCAGCCCCACTTCCATCACTAATCCCACCAGAACACTCACCAATAC +CTCAACCCCTGACCCCCATGCCTCAGGATATTCCTCAATAGCTATTGCCGTAGTATACCC +AAAAACAACCATCATACCCCCTAAATAAATTAAAAAAACCATTAAACCCATATAACCTCC +CCCACAATTTAAAATAACTGCACACCCAACCGCACCACTAATAATCAACACTAAACCCCC +ATAAATAGGAGAGGGCTTAGAAGAAAACCCCACGAACCCTATCACTAAAATTACACTCAA +CAGAAACAAAGCATATGTCATTGTTCTCGCATAGACTGTGACTATGACCAATGGTATGAA +AAAACATCGTTGTACCTCAACTACAAGAACACTAATGACCTCAACACGTAAAACCAACCC +ACTAATAAAATTAATCAACCACTCACTTATCGACCTCCCCACCCCATCAAACATCTCCGC +ATGATGGAACTTCGGCTCACTCCTAGGCGCCTGCTTAATCATCCAAATCACCACTGGACT +ATTCCTAGCTATACATTATTCACCAGACGCCTCCACTGCCTTTTCATCAATCGCCCACAT +CACTCGAGATGTAAACTACGGCTGAATAATTCGCCACCTCCACGCTAACGGCGCCTCAAT +ATTCTTTATCTGCCTCTTCTTACATATCGGCCGAGGCCTATACTATGGCTCATTCACCCA +CCTAGAAACCTGAAACATCGGCATCATCCTACTATTTACAACTATAATAACAGCCTTCAT +AGGTTACGTCCTCCCATGAGGCCAAATATCCTTCTGAGGAGCCACAGTAATCACAAATCT +ACTGTCCGCCATCCCATACATTGGAACAGACCTGGTCCAATGAGTCTGAGGTGGCTACTC +AGTAAATAGCCCCACTCTAACACGATTCTTCACCCTACACTTCATACTACCCTTCATTAT +TACAGCCCTAACAACTCTACACCTCTTATTCCTACACGAAACAGGATCAAATAACCCCCT +GGGAATCCCCTCCCATTCCGACAAAATCACCTTCCACCCCTACTACACAATCAAAGACAT +CCTAGGCCTACTCCTTTTTCTCCTCGCCCTAATAACACTAACACTACTCTCACCAGACCT +CCTAAGCGACCCAGACAACTACACCTTAGCTAACCCCCTAAGCACCCCACCCCACATTAA +ACCCGAATGATATTTCCTATTCGCCTACGCAATCCTACGATCCGTCCCCAACAAACTAGG +AGGTGTAATAGCCCTCATACTATCCATCCTAATCCTAACAACAATCCCTGCCCTTCACAT +GTCCAAGCAACAGAGCATAACATTTCGCCCATTGAGCCAATTCCTATATTGACTTTTAAT +CGCCGACCTTCTAATTCTCACCTGAATTGGAGGGCAACCAGTAAGCTACCCCTTCATCAC +CATTAGCCAAGTAGCATCCACATTGTACTTCACTACTATCCTTCTACTTATACCAGCCTC +TTCCCTGATCGAAAACCACATACTCAAATGAACCTGCCCCTGTAGTACAAATAAGTACAC +CAGCCTTGTAACCTGAAAATGAAGACCCTCTTCCATGGGCAAAAAAAATCAGAGAAAAAG +CACTTAACTTCACCGTCAGCCCCCAAAGCCAACATTCTAATTTTAAACTACTCTCTGTTC +TTTCATGGGGGACCAGATTTGGGTGCCACCCCAGTACTGACCCATTTCTAACGGCCTATG +TATTTCGTACATTCCTGCTAGCCAACATGAATATCACCCAACACAACAATCGCTTAACCA +ACTATAATGCATACAAAACTCCAACCACACTCGACCTCCACACCCCGCTTACAAGCAAGT +ACCCCCCCATGCCCCCCCACCCAAACACATACACCGATCTCTCCACATAACCCCTCAACC +CCCAGCATATCAACAGACCAAACAAACCTTAAAGTACATAGCACATACTATCCTAACCGC +ACATAGCACATCCCGTTAAAACCCTGCTCATCCCCACGGATGCCCCCCCTCAGTTAGTAA +TCCCTTACTCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTGCTACTCCCCTCGC +TCCGGGCCCATAAAACCTGGGGGTAGCTAAAGTGAGCTGTATCCGGCATCTGGTTCTTAC +TTCAGGGCCATAAAACCCAAGATCGCCCACACGTTCCCCTTAAATAAGACATCACGATG diff --git a/test/MT.gfa b/test/MT.gfa new file mode 100644 index 0000000..2c6b58c --- /dev/null +++ b/test/MT.gfa @@ -0,0 +1,19 @@ +S MTh0 GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTATGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACTTACTAAAGTGTGTTAATTAATTAATGCTTGTAGGACATAATAATAACAATTGAATGTCTGCACAGCCACTTTCCACACAGACATCATAACAAAAAATTTCCACCAAACCCCCCCTCCCCCGCTTCTGGCCACAGCACTTAAACACATCTCTGCCAAACCCCAAAAACAAAGAACCCTAACACCAGCCTAACCAGATTTCAAATTTTATCTTTTGGCGGTATGCACTTTTAACAGTCACCCCCCAACTAACACATTATTTTCCCCTCCCACTCCCATACTACTAATCTCATCAATACAACCCCCGCCCATCCTACCCAGCACACACACACCGCTGCTAACCCCATACCCCGAACCAACCAAACCCCAAAGACACCCCCCACAGTTTATGTAGCTTACCTCCTCAAAGCAATACACTGAAAATGTTTAGACGGGCTCACATCACCCCATAAACAAATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAAGCATCCCCGTTCCAGTGAGTTCACCCTCTAAATCACCACGATCAAAAGGAACAAGCATCAAGCACGCAGCAATGCAGCTCAAAACGCTTAGCCTAGCCACACCCCCACGGGAAACAGCAGTGATTAACCTTTAGCAATAAACGAAAGTTTAACTAAGCTATACTAACCCCAGGGTTGGTCAATTTCGTGCCAGCCACCGCGGTCACACGATTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCACCCCCTCCCCAATAAAGCTAAAACTCACCTGAGTTGTAAAAAACTCCAGTTGACACAAAATAGACTACGAAAGTGGCTTTAACATATCTGAACACACAATAGCTAAGACCCAAACTGGGATTAGATACCCCACTATGCTTAGCCCTAAACCTCAACAGTTAAATCAACAAAACTGCTCGCCAGAACACTACGAGCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATCCCTCTAGAGGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTCTTGCTCAGCCTATATACCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGTAAGCGCAAGTACCCACGTAAAGACGTTAGGTCAAGGTGTAGCCCATGAGGTGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAGCCCTTATGAAACTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTAAGAGTAGAGTGCTTAGTTGAACAGGGCCCTGAAGCGCGTACACACCGCCCGTCACCCTCCTCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCATTTATATAGAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACGAACCAGAGTGTAGCTTAACACAAAGCACCCAACTTACACTTAGGAGATTTCAACTTAACTTGACCGCTCTGAGCTAAACCTAGCCCCAAACCCACTCCACCTTACTACCAGACAACCTTAGCCAAACCATTTACCCAAATAAAGTATAGGCGATAGAAATTGAAACCTGGCGCAATAGATATAGTACCGCAAGGGAAAGATGAAAAATTATAACCAAGCATAATATAGCAAGGACTAACCCCTATACCTTCTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAGCCAAAGCTAAGACCCCCGAAACCAGACGAGCTACCTAAGAACAGCTAAAAGAGCACACCCGTCTATGTAGCAAAATAGTGGGAAGATTTATAGGTAGAGGCGACAAACCTACCGAGCCTGGTGATAGCTGGTTGTCCAAGATAGAATCTTAGTTCAACTTTAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAGTCCAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGAGTAAAAAATTTAACACCCATAGTAGGCCTAAAAGCAGCCACCAATTAAGAAAGCGTTCAAGCTCAACACCCACTACCTAAAAAATCCCAAACATATAACTGAACTCCTCACACCCAATTGGACCAATCTATCACCCTATAGAAGAACTAATGTTAGTATAAGTAACATGAAAACATTCTCCTCCGCATAAGCCTGCGTCAGATTAAAACACTGAACTGACAATTAACAGCCCAATATCTACAATCAACCAACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAGGAAAGGTTAAAAAAAGTAAAAGGAACTCGGCAAATCTTACCCCGCCTGTTTACCAAAAACATCACCTCTAGCATCACCAGTATTAGAGGCACCGCCTGCCCAGTGACACATGTTTAACGGCCGCGGTACCCTAACCGTGCAAAGGTAGCATAATCACTTGTTCCTTAAATAGGGACCTGTATGAATGGCTCCACGAGGGTTCAGCTGTCTCTTACTTTTAACCAGTGAAATTGACCTGCCCGTGAAGAGGCGGGCATAACACAGCAAGACGAGAAGACCCTATGGAGCTTTAATTTATTAATGCAAACAGTACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCATTAAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAACCTCCGAGCAGTACATGCTAAGACTTCACCAGTCAAAGCGAACTACTATACTCAATTGATCCAATAACTTGACCAACGGAACAAGTTACCCTAGGGATAACAGCGCAATCCTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGATCAGGACATCCCGATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGATTAAAGTCCTACGTGATCTGAGTTCAGACCGGAGTAATCCAGGTCGGTTTCTATCTACaTTCAAATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCCTACTTCACAAAGCGCCTTCCCCCGTAAATGATATCATCTCAACTTAGTATTATACCCACACCCACCCAAGAACAGGGTTTGTTAAGATGGCAGAGCCCGGTAATCGCATAAAACTTAAAACTTTACAGTCAGAGGTTCAATTCCTCTTCTTAACAACATACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAATCGCAATGGCATTCCTAATGCTTACCGAACGAAAAATTCTAGGCTATATACAACTACGCAAAGGCCCCAACGTTGTAGGCCCCTACGGGCTACTACAACCCTTCGCTGACGCCATAAAACTCTTCACCAAAGAGCCCCTAAAACCCGCCACATCTACCATCACCCTCTACATCACCGCCCCGACCTTAGCTCTCACCATCGCTCTTCTACTATGAACCCCCCTCCCCATACCCAACCCCCTGGTCAACCTCAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAGCCGTTTACTCAATCCTCTGATCAGGGTGAGCATCAAACTCAAACTACGCCCTGATCGGCGCACTGCGAGCAGTAGCCCAAACAATCTCATATGAAGTCACCCTAGCCATCATTCTACTATCAACATTACTAATAAGTGGCTCCTTTAACCTCTCCACCCTTATCACAACACAAGAACACCTCTGATTACTCCTGCCATCATGACCCTTGGCCATAATATGATTTATCTCCACACTAGCAGAGACCAACCGAACCCCCTTCGACCTTGCCGAAGGGGAGTCCGAACTAGTCTCAGGCTTCAACATCGAATACGCCGCAGGCCCCTTCGCCCTATTCTTCATAGCCGAATACACAAACATTAT SN:Z:MT_human SO:i:0 SR:i:0 +S MTh4001 TATAATAAACACCCTCACCACTACAATCTTCCTAGGAACAACATATGACGCACTCTCCCCTGAACTCTACACAACATATTTTGTCACCAAGACCCTACTTCTAACCTCCCTGTTCTTATGAATTCGAACAGCATACCCCCGATTCCGCTACGACCAACTCATACACCTCCTATGAAAAAACTTCCTACCACTCACCCTAGCATTACTTATATGATATGTCTCCATACCCATTACAATCTCCAGCATTCCCCCTCAAACCTAAGAAATATGTCTGATAAAAGAGTTACTTTGATAGAGTAAATAATAGGAGCTTAAACCCCCTTATTTCTAGGACTATGAGAATCGAACCCATCCCTGAGAATCCAAAATTCTCCGTGCCACCTATCACACCCCATCCTAAAGTAAGGTCAGCTAAATAAGCTATCGGGCCCATACCCCGAAAATGTTGGTTATACCCTTCCCGTACTAATTAATCCCCTGGCCCAACCCGTCATCTACTCT SN:Z:MT_human SO:i:4001 SR:i:0 +S MTo3426 GGGGTAAATGATGGGTTGGGCCAAGGGGTTAATTAGTACGGGAAGGGTATAACCAACATTTTCGGGGTATGGGCCCGATAGCTTATTTAGCTGACCTTACTTTAGGATAGGGTGTGATGGGTGGCACGGAGAATTTTGGATTCTCAGGGGTGGGTTCAACTCCCATAATCCTAGAAATAAGGGGATTTAGACCTCTATTTTTTACTCTATCAAAGTAACTCTTTTGTCAGACATATTTCTTAGGTTTGTGGGGGAATGCCGGAGGTTGCAATGGGTACGGAGATGTGTCATATTAGTAGTGCTAGGGTGAGTGGCAGGAAATTTTTTCATAGAAGGTGCATGAGTTGGTCGTAGCGGAATCGGGGGTATGTTGATCGAATTCATAGGAATAGGGAGGTTAGGAGTAGGGTTTTGATGGTGAAGAGGGTTGTGTAGAGTTCTGGGGAGTGGATGTTGAATGTTGTTCCTAGGAAAATTATAGTGGTGAGGGCGTTTATTAAG SN:Z:MT_orang SO:i:3426 SR:i:1 +S MTh4502 ACCATCTTTGCAGGCACACTCATCACAGCGCTAAGCTCGCACTGATTTTTTACCTGAGTAGGCCTAGAAATAAACATGCTAGCTTTTATTCCAGTTCTAACCAAAAAAATAAACCCTCGTTCCACAGAAGCTGCCATCAAGTATTTCCTCACGCAAGCAACCGCATCCATAATCCTTCTAATAGCTATCCTCTTCAACAATATACTCTCCGGACAATGAACCATAACCAATACTACCAATCAATACTCATCATTAATAATCATAATAGCTATAGCAATAAAACTAGGAATAGCCCCCTTTCACTTCTGAGTCCCAGAGGTTACCCAAGGCACCCCTCTGACATCCGGCCTGCTTCTTCTCACATGACAAAAACTAGCCCCCATCTCAATCATATACCAAATCTCTCCCTCACTAAACGTAAGCCTTCTCCTCACTCTCTCAATCTTATCCATCATAGCAGGCAGTTGAGGTGGATTAAACCAAACCCAGCTACGCAAAATCTTAGCATACTCCTCAATTACCCACATAGGATGAATAATAGCAGTTCTACCGTACAACCCTAACATAACCATTCTTAATTTAACTATTTATATTATCCTAACTACTACCGCATTCCTACTACTCAACTTAAACTCCAGCACCACGACCCTACTACTATCTCGCACCTGAAACAAGCTAACATGACTAACACCCTTAATTCCATCCACCCTCCTCTCCCTAGGAGGCCTGCCCCCGCTAACCGGCTTTTTGCCCAAATGGGCCATTATCGAAGAATTCACAAAAAACAATAGCCTCATCATCCCCACCATCATAGCCACCATCACCCTCCTTAACCTCTACTTCTACCTACGCCTAATCTACTCCACCTCAATCACACTACTCCCCATATCTAACAACGTAAAAATAAAATGACAGTTTGAACATACAAAACCCACCCCATTCCTCCCCACACTCATCGCCCTTACCACGCTACTCCTACCTATCTCCCCTTTTATACTAATAATCTTATAGAAATTTAGGTTAAATACAGACCAAGAGCCTTCAAAGCCCTCAGTAAGTTGCAATACTTAATTTCTGTAACAGCTAAGGACTGCAAAACCCCACTCTGCATCAACTGAACGCAAATCAGCCACTTTAATTAAGCTAAGCCCTTACTAGACCAATGGGACTTAAACCCACAAACACTTAGTTAACAGCTAAGCACCCTAATCAACTGGCTTCAATCTACTTCTCCCGCCGCCGGGAAAAAAGGCGGGAGAAGCCCCGGCAGGTTTGAAGCTGCTTCTTCGAATTTGCAATTCAATATGAAAATCACCTCGGAGCTGGTAAAAAGAGGCCTAACCCCTGTCTTTAGATTTACAGTCCAATGCTTCACTCAGCCATTTTACCTCACCCCCACTGATGTTCGCCGACCGTTGACTATTCTCTACAAACCACAAAGACATTGGAACACTATACCTATTATTCGGCGCATGAGCTGGAGTCCTAGGCACAGCTCTAAGCCTCCTTATTCGAGCCGAGCTGGGCCAGCCAGGCAACCTTCTAGGTAACGACCACATCTACAACGTTATCGTCACAGCCCATGCATTTGTAATAATCTTCTTCATAGTAATACCCATCATAATCGGAGGCTTTGGCAACTGACTAGTTCCCCTAATAATCGGTGCCCCCGATATGGCGTTTCCCCGCATAAACAACATAAGCTTCTGACTCTTACCTCCCTCTCTCCTACTCCTGCTCGCATCTGCTATAGTGGAGGCCGGAGCAGGAACAGGTTGAACAGTCTACCCTCCCTTAGCAGGGAACTACTCCCACCCTGGAGCCTCCGTAGACCTAACCATCTTCTCCTTACACCTAGCAGGTGTCTCCTCTATCTTAGGGGCCATCAATTTCATCACAACAATTATCAATATAAAACCCCCTGCCATAACCCAATACCAAACGCCCCTCTTCGTCTGATCCGTCCTAATCACAGCAGTCCTACTTCTCCTATCTCTCCCAGTCCTAGCTGCTGGCATCACTATACTACTAACAGACCGCAACCTCAACACCACCTTCTTCGACCCCGCCGGAGGAGGAGACCCCATTCTATACCAACACCTATTCTGATTTTTCGGTCACCCTGAAGTTTATATTCTTATCCTACCAGGCTTCGGAATAATCTCCCATATTGTAACTTACTACTCCGGAAAAAAAGAACCATTTGGATACATAGGTATGGTCTGAGCTATGATATCAATTGGCTTCCTAGGGTTTATCGTGTGAGCACACCATATATTTACAGTAGGAATAGACGTAGACACACGAGCATATTTCACCTCCGCTACCATAATCATCGCTATCCCCACCGGCGTCAAAGTATTTAGCTGACTCGCCACACTCCACGGAAGCAATATGAAATGATCTGCTGCAGTGCTCTGAGCCCTAGGATTCATCTTTCTTTTCACCGTAGGTGGCCTGACTGGCATTGTATTAGCAAACTCATCACTAGACATCGTACTACACGACACGTACTACGTTGTAGCCCACTTCCACTATGTCCTATCAATAGGAGCTGTATTTGCCATCATAGGAGGCTTCATTCACTGATTTCCCCTATTCTCAGGCTACACCCTAGACCAAACCTACGCCAAAATCCATTTCACTATCATATTCATCGGCGTAAATCTAACTTTCTTCCCACAACACTTTCTCGGCCTATCCGGAATGCCCCGACGTTACTCGGACTACCCCGATGCATACACCACATGAAACATCCTATCATCTGTAGGCTCATTCATTTCTCTAACAGCAGTAATATTAATAATTTTCATGATTTGAGAAGCCTTCGCTTCGAAGCGAAAAGTCCTAATAGTAGAAGAACCCTCCATAAACCTGGAGTGACTATATGGATGCCCCCCACCCTACCACACATTCGAAGAACCCGTATACATAAAATCTAGACAAAAAAGGAAGGAATCGAACCCCCCAAAGCTGGTTTCAAGCCAACCCCATGGCCTCCATGACTTTTTCAAAAAGGTATTAGAAAAACCATTTCATAACTTTGTCAAAGTTAAATTATAGGCTAAATCCTATATATCTTAATGGCACATGCAGCGCAAGTAGGTCTACAAGACGCTACTTCCCCTATCATAGAAGAGCTTATCACCTTTCATGATCACGCCCTCATAATCATTTTCCTTATCTGCTTCCTAGTCCTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACATCTCAGACGCTCAGGAAATAGAAACCGTCTGAACTATCCTGCCCGCCATCATCCTAGTCCTCATCGCCCTCCCATCCCTACGCATCCTTTACATAACAGACGAGGTCAACGATCCCTCCCTTACCATCAAATCAATTGGCCACCAATGGTACTGAACCTACGAGTACACCGACTACGGCGGACTAATCTTCAACTCCTACATACTTCCCCCATTATTCCTAGAACCAGGCGACCTGCGACTCCTTGACGTTGACAATCGAGTAGTACTCCCGATTGAAGCCCCCATTCGTATAATAATTACATCACAAGACGTCTTGCACTCATGAGCTGTCCCCACATTAGGCTTAAAAACAGATGCAATTCCCGGACGTCTAAACCAAACCACTTTCACCGCTACACGACCGGGGGTATACTACGGTCAATGCTCTGAAATCTGTGGAGCAAACCACAGTTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAATAGGGCCCGTATTTACCCTATAGCACCCCCTCTACCCCCTCTAGAGCCCACTGTAAAGCTAACTTAGCATTAACCTTTTAAGTTAAAGATTAAGAGAACCAACACCTCTTTACAGTGAAATGCCCCAACTAAATACTACCGTATGGCCCACCATAATTACCCCCATACTCCTTACACTATTCCTCATCACCCAACTAAAAATATTAAACACAAACTACCACCTACCTCCCTCACCAAAGCCCATAAAAATAAAAAATTATAACAAACCCTGAGAACCAAAATGAACGAAAATCTGTTCGCTTCATTCATTGCCCCCACAATCCTAGGCCTACCCGCCGCAGTACTGATCATTCTATTTCCCCCTCTATTGATCCCCACCTCCAAATATCTCATCAACAACCGACTAATCACCACCCAACAATGACTAATCAAACTAACCTCAAAACAAATGATAACCATACACAACACTAAAGGACGAACCTGATCTCTTATACTAGTATCCTTAATCATTTTTATTGCCACAACTAACCTCCTCGGACTCCTGCCTCACTCATTTACACCAACCACCCAACTATCTATAAACCTAGCCATGGCCATCCCCTTATGAGCGGGCACAGTGATTATAGGCTTTCGCTCTAAGATTAAAAATGCCCTAGCCCACTTCTTACCACAAGGCACACCTACACCCCTTATCCCCATACTAGTTATTATCGAAACCATCAGCCTACTCATTCAACCAATAGCCCTGGCCGTACGCCTAACCGCTAACATTACTGCAGGCCACCTACTCATGCACCTAATTGGAAGCGCCACCCTAGCAATATCAACCATTAACCTTCCCTCTACACTTATCATCTTCACAATTCTAATTCTACTGACTATCCTAGAAATCGCTGTCGCCTTAATCCAAGCCTACGTTTTCACACTTCTAGTAAGCCTCTACCTGCACGACAACACATAATGACCCACCAATCACATGCCTATCATATAGTAAAACCCAGCCCATGACCCCTAACAGGGGCCCTCTCAGCCCTCCTAATGACCTCCGGCCTAGCCATGTGATTTCACTTCCACTCCATAACGCTCCTCATACTAGGCCTACTAACCAACACACTAACCATATACCAATGATGGCGCGATGTAACACGAGAAAGCACATACCAAGGCCACCACACACCACCTGTCCAAAAAGGCCTTCGATACGGGATAATCCTATTTATTACCTCAGAAGTTTTTTTCTTCGCAGGATTTTTCTGAGC SN:Z:MT_human SO:i:4502 SR:i:0 +S MTo8961 ATTCTACCACTCCAGCCTAGCCCCCACCCCTCAACTTGGAGGACACTGACCCCCAACAGGCATTATCCCCCTCAACCCCCTAGAAGTCCCACTCCTAAACACATCCGTACTACTCGCATCAGGAGTCTCAATTACCTGAGCCCATCACAGCCTGATGGAAAATAATCGAACCCAAATAATTCAAGCACTACTCATCACAATCTTACTAGGCATCTACTTCACTCTCCTTCAGGCTTCAGAATACATTGAAGCTCCTTTCACCATCTCTGACGGCATCTACGGCTCAACATTCTTCATAGCCACGGGATTCCACGGCCTCCACGTCATTATCGGATCAACTTTCCTCACTGTATGCCTAGCCCGCCAGCTATTATTCCACTTCACATCCAAACATCACTTTGGCTTTGAGGCCGCCGCCTGATACTGGCACTTTGTAGACGTAGTCTGACTGTTTCTGTACGTCTCCATCTACTGATGAGGTTCCTACTCTTTTAGTATAAAC SN:Z:MT_orang SO:i:8961 SR:i:1 +S MTh9505 CTTTTACCACTCCAGCCTAGCCCCTACCCCCCAATTAGGAGGGCACTGGCCCCCAACAGGCATCACCCCGCTAAATCCCCTAGAAGTCCCACTCCTAAACACATCCGTATTACTCGCATCAGGAGTATCAATCACCTGAGCTCACCATAGTCTAATAGAAAACAACCGAAACCAAATAATTCAAGCACTGCTTATTACAATTTTACTGGGTCTCTATTTTACCCTCCTACAAGCCTCAGAGTACTTCGAGTCTCCCTTCACCATTTCCGACGGCATCTACGGCTCAACATTTTTTGTAGCCACAGGCTTCCACGGACTTCACGTCATTATTGGCTCAACTTTCCTCACTATCTGCTTCATCCGCCAACTAATATTTCACTTTACATCCAAACATCACTTTGGCTTCGAAGCCGCCGCCTGATACTGGCATTTTGTAGATGTGGTTTGACTATTTCTGTATGTCTCCATCTATTGATGAGGGTCTTACTCTTTTAGTATAAATAGTACCGTTAACTTCCAATTAACTAGTTTTGACAACATTCAAAAAAGAGTAATAAACTTCGCCTTAATTTTAATAATCAACACCCTCCTAGCCTTACTACTAATAATTATTACATTTTGACTACCACAACTCAACGGCTACATAGAAAAATCCACCCCTTACGAGTGCGGCTTCGACCCTATATCCCCCGCCCGCGTCCCTTTCTCCATAAAATTCTTCTTAGTAGCTATTACCTTCTTATTATTTGATCTAGAAATTGCCCTCCTTTTACCCCTACCATGAGCCCTACAAACAACTAACCTGCCACTAATAGTTATGTCATCCCTCTTATTAATCATCATCCTAGCCCTAAGTCTGGCCTATGAGTGACTACAAAAAGGATTAGACTGAACCGAATTGGTATATAGTTTAAACAAAACGAATGATTTCGACTCATTAAATTATGATAATCATATTTACCAAATGCCCCTCATTTACATAAATATTATACTAGCATTTACCATCTCACTTCTAGGAATACTAGTATATCGCTCACACCTCATATCCTCCCTACTATGCCTAGAAGGAATAATACTATCGCTGTTCATTATAGCTACTCTCATAACCCTCAACACCCACTCCCTCTTAGCCAATATTGTGCCTATTGCCATACTAGTCTTTGCCGCCTGCGAAGCAGCGGTGGGCCTAGCCCTACTAGTCTCAATCTCCAACACATATGGCCTAGACTACGTACATAACCTAAACCTACTCCAATGCTAAAACTAATCGTCCCAACAATTATATTACTACCACTGACATGACTTTCCAAAAAACACATAATTTGAATCAACACAACCACCCACAGCCTAATTATTAGCATCATCCCTCTACTATTTTTTAACCAAATCAACAACAACCTATTTAGCTGTTCCCCAACCTTTTCCTCCGACCCCCTAACAACCCCCCTCCTAATACTAACTACCTGACTCCTACCCCTCACAATCATGGCAAGCCAACGCCACTTATCCAGTGAACCACTATCACGAAAAAAACTCTACCTCTCTATACTAATCTCCCTACAAATCTCCTTAATTATAACATTCACAGCCACAGAACTAATCATATTTTATATCTTCTTCGAAACCACACTTATCCCCACCTTGGCTATCATCACCCGATGAGGCAACCAGCCAGAACGCCTGAACGCAGGCACATACTTCCTATTCTACACCCTAGTAGGCTCCCTTCCCCTACTCATCGCACTAATTTACACTCACAACACCCTAGGCTCACTAAACATTCTACTACTCACTCTCACTGCCCAAGAACTATCAAACTCCTGAGCCAACAACTTAATATGACTAGCTTACACAATAGCTTTTATAGTAAAGATACCTCTTTACGGACTCCACTTATGACTCCCTAAAGCCCATGTCGAAGCCCCCATCGCTGGGTCAATAGTACTTGCCGCAGTACTCTTAAAACTAGGCGGCTATGGTATAATACGCCTCACACTCATTCTCAACCCCCTGACAAAACACATAGCCTACCCCTTCCTTGTACTATCCCTATGAGGCATAATTATAACAAGCTCCATCTGCCTACGACAAACAGACCTAAAATCGCTCATTGCATACTCTTCAATCAGCCACATAGCCCTCGTAGTAACAGCCATTCTCATCCAAACCCCCTGAAGCTTCACCGGCGCAGTCATTCTCATAATCGCCCACGGGCTTACATCCTCATTACTATTCTGCCTAGCAAACTCAAACTACGAACGCACTCACAGTCGCATCATAATCCTCTCTCAAGGACTTCAAACTCTACTCCCACTAATAGCTTTTTGATGACTTCTAGCAAGCCTCGCTAACCTCGCCTTACCCCCCACTATTAACCTACTGGGAGAACTCTCTGTGCTAGTAACCACGTTCTCCTGATCAAATATCACTCTCCTACTTACAGGACTCAACATACTAGTCACAGCCCTATACTCCCTCTACATATTTACCACAACACAATGGGGCTCACTCACCCACCACATTAACAACATAAAACCCTCATTCACACGAGAAAACACCCTCATGTTCATACACCTATCCCCCATTCTCCTCCTATCCCTCAACCCCGACATCATTACCGGGTTTTCCTCTTGTAAATATAGTTTAACCAAAACATCAGATTGTGAATCTGACAACAGAGGCTTACGACCCCTTATTTACCGAGAAAGCTCACAAGAACTGCTAACTCATGCCCCCATGTCTAACAACATGGCTTTCTCAACTTTTAAAGGATAACAGCTATCCATTGGTCTTAGGCCCCAAAAATTTTGGTGCAACTCCAAATAAAAGTAATAACCATGCACACTACTATAACCACCCTAACCCTGACTTCCCTAATTCCCCCCATCCTTACCACCCTCGTTAACCCTAACAAAAAAAACTCATACCCCCATTATGTAAAATCCATTGTCGCATCCACCTTTATTATCAGTCTCTTCCCCACAACAATATTCATGTGCCTAGACCAAGAAGTTATTATCTCGAACTGACACTGAGCCACAACCCAAACAACCCAGCTCTCCCTAAGCTTCAAACTAGACTACTTCTCCATAATATTCATCCCTGTAGCATTGTTCGTTACATGGTCCATCATAGAATTCTCACTGTGATATATAAACTCAGACCCAAACATTAATCAGTTCTTCAAATATCTACTCATCTTCCTAATTACCATACTAATCTTAGTTACCGCTAACAACCTATTCCAACTGTTCATCGGCTGAGAGGGCGTAGGAATTATATCCTTCTTGCTCATCAGTTGATGATACGCCCGAGCAGATGCCAACACAGCAGCCATTCAAGCAATCCTATACAACCGTATCGGCGATATCGGTTTCATCCTCGCCTTAGCATGATTTATCCTACACTCCAACTCATGAGACCCACAACAAATAGCCCTTCTAAACGCTAATCCAAGCCTCACCCCACTACTAGGCCTCCTCCTAGCAGCAGCAGGCAAATCAGCCCAA SN:Z:MT_human SO:i:9505 SR:i:0 +S MTh13014 TTAGGTCTCCACCCCTGACTCCCCTCAGCCATAGAAGGCCCCACCCCAGTCTCAGCCCTACTCCACTCAAGCACTATAGTTGTAGCAGGAATCTTCTTACTCATCCGCTTCCACCCCCTAGCAGAAAATAGCCCACTAATCCAAACTCTAACACTATGCTTAGGCGCTATCACCACTCTGTTCGCAGCAGTCTGCGCCCTTACACAAAATGACATCAAAAAAATCGTAGCCTTCTCCACTTCAAGTCAACTAGGACTCATAATAGTTACAATCGGCATCAACCAACCACACCTAGCATTCCTGCACATCTGTACCCACGCCTTCTTCAAAGCCATACTATTTATGTGCTCCGGGTCCATCATCCACAACCTTAACAATGAACAAGATATTCGAAAAATAGGAGGACTACTCAAAACCATACCTCTCACTTCAACCTCCCTCACCATTGGCAGCCTAGCATTAGCAGGAATACCTTTCCTCACAGGTTTCTACTCCAAAGACC SN:Z:MT_human SO:i:13014 SR:i:0 +S MTh13516 ACATCATCGAAACCGCAAACATATCATACACAAACGCCTGAGCCCTATCTATTACTCTCATCGCTACCTCCCTGACAAGCGCCTATAGCACTCGAATAATTCTTCTCACCCTAACAGGTCAACCTCGCTTCCCCACCCTTACTAACATTAACGAAAATAACCCCACCCTACTAAACCCCATTAAACGCCTGGCAGCCGGAAGCCTATTCGCAGGATTTCTCATTACTAACAACATTTCCCCCGCATCCCCCTTCCAAACAACAATCCCCCTCTACCTAAAACTCACAGCCCTCGCTGTCACTTTCCTAGGACTTCTAACAGCCCTAGACCTCAACTACCTAACCAACAAACTTAAAATAAAATCCCCACTATGCACATTTTATTTCTCCAACATACTCGGATTCTACCCTAGCATCACACACCGCACAATCCCCTATCTAGGCCTTCTTACGAGCCAAAACCTGCCCCTACTCCTCCTAGACCTAACCTGACTAGAAAAGCTATTACCTAAAACAATTTCACAGCACCAAATCTCCACCTCCATCATCACCTCAACCCAAAAAGGCATAATTAAACTTTACTTCCTCTCTTTCTTCTTCCCACTCATCCTAACCCTACTCCTAATCACATAACCTATTCCCCCGAGCAATCTCAATTACAATATATACACCAACAAACAATGTTCAACCAGTAACTACTACTAATCAACGCCCATAATCATACAAAGCCCCCGCACCAATAGGATCCTCCCGAATCAACCCTGACCCCTCTCCTTCATAAATTATTCAGCTTCCTACACTATTAAAGTTTACCACAACCACCACCCCATCATACTCTTTCACCCACAGCACCAATCCTACCTCCATCGCTAACCCCACTAAAACACTCACCAAGACCTCAACCCCTGACCCCCATGCCTCAGGATACTCCTCAATAGCCATCGCTGTAGTATATCCAAAGACAACCATCATTCCCCCTAAATAAATTAAAAAAACTATTAAACCCATATAACCTCCCCCAAAATTCAGAATAATAACACACCCGACCACACCGCTAACAATCAATACTAAACCCCCATAAATAGGAGAAGGCTTAGAAGAAAACCCCACAAACCCCATTACTAAACCCACACTCAACAGAAACAAAGCATACATCATTATTCTCGCACGGACTACAACCACGACCAATGATATGAAAAACCATCGTTGTATTTCAACTACAAGAACACCAATGACCCCAATACGCAAAACTAACCCCCTAATAAAATTAATTAACCACTCATTCATCGACCTCCCCACCCCATCCAACATCTCCGCATGATGAAACTTCGGCTCACTCCTTGGCGCCTGCCTGATCCTCCAAATCACCACAGGACTATTCCTAGCCATGCACTACTCACCAGACGCCTCAACCGCCTTTTCATCAATCGCCCACATCACTCGAGACGTAAATTATGGCTGAATCATCCGCTACCTTCACGCCAATGGCGCCTCAATATTCTTTATCTGCCTCTTCCTACACATCGGGCGAGGCCTATATTACGGATCATTTCTCTACTCAGAAACCTGAAACATCGGCATTATCCTCCTGCTTGCAACTATAGCAACAGCCTTCATAGGCTATGTCCTCCCGTGAGGCCAAATATCATTCTGAGGGGCCACAGTAATTACAAACTTACTATCCGCCATCCCATACATTGGGACAGACCTAGTTCAATGAATCTGAGGAGGCTACTCAGTAGACAGTCCCACCCTCACACGATTCTTTACCTTTCACTTCATCTTGCCCTTCATTATTGCAGCCCTAGCAACACTCCACCTCCTATTCTTGCACGAAACGGGATCAAACAACCCCCTAGGAATCACCTCCCATTCCGATAAAATCACCTTCCACCCTTACTACACAATCAAAGACGCCCTCGGCTTACTTCTCTTCCTTCTCTCCTTAATGACATTAACACTATTCTCACCAGACCTCCTAGGCGACCCAGACAATTATACCCTAGCCAACCCCTTAAACACCCCTCCCCACATCAAGCCCGAATGATATTTCCTATTCGCCTACACAATTCTCCGATCCGTCCCTAACAAACTAGGAGGCGTCCTTGCCCTATTACTATCCATCCTCATCCTAGCAATAATCCCCATCCTCCATATATCCAAACAACAAAGCATAATATTTCGCCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCTCATTCTAACCTGAATCGGAGGACAACCAGTAAGCTACCCTTTTACCATCATTGGACAAGTAGCATCCGTACTATACTTCACAACAATCCTAATCCTAATACCAACTATCTCCCTAATTGAAAACAAAATACTCAAATGGGCCTGTCCTTGTAGTATAAACTAATACACCAGTCTTGTAAACCGGAGATGAAAACCTTTTTCCAAGGACAAATCAGAGAAAAAGTCTTTAACTCCACCATTAGCACCCAAAGCTAAGATTCTAATTTAAACTATTCTCTGTTCTTTCATGGGGAAGCAGATTTGGGTACCACCCAAGTATTGACTCACCCATCAACAACCGCTATGTATTTCGTACATTACTGCCAGCCACCATGAATATTGTACGGTACCATAAATACTTGACCACCTGTAGTACATAAAAACCCAATCCACATCAAAACCCCCTCCCCATGCTTACAAGCAAGTACAGCAATCAACCCTCAACTATCACACATCAACTGCAACTCCAAAGCCACCCCTCACCCACTAGGATACCAACAAACCTACCCACCCTTAACAGTACATAGTACATAAAGCCATTTACCGTACATAGCACATTACAGTCAAATCCCTTCTCGTCCCCATGGATGACCCCCCTCAGATAGGGGTCCCTTGACCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTGCTACTCTCCTCGCTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGTCATAAAGCCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATG SN:Z:MT_human SO:i:13516 SR:i:0 +L MTh0 + MTh4001 + 0M SR:i:0 +L MTh4001 + MTh4502 + 0M SR:i:0 +L MTh4001 + MTh4001 + 0M SR:i:1 +L MTh4502 + MTh9505 + 0M SR:i:0 +L MTh9505 + MTh13014 + 0M SR:i:0 +L MTh13014 + MTh13516 + 0M SR:i:0 +L MTh0 + MTo3426 - 0M SR:i:1 +L MTo3426 - MTh4502 + 0M SR:i:1 +L MTh4502 + MTo8961 + 0M SR:i:1 +L MTo8961 + MTh9505 + 0M SR:i:1 +L MTh9505 + MTh13516 + 0M SR:i:1 diff --git a/tex/Makefile b/tex/Makefile new file mode 100644 index 0000000..5a3f842 --- /dev/null +++ b/tex/Makefile @@ -0,0 +1,13 @@ +all:minigraph.pdf + +minigraph.bbl:minigraph.bib minigraph.tex + pdflatex minigraph; bibtex minigraph + +minigraph.pdf:minigraph.tex minigraph.bbl + pdflatex minigraph; pdflatex minigraph + +lite:minigraph.tex + pdflatex minigraph + +clean: + rm -f minigraph.bbl minigraph.log minigraph.aux diff --git a/tex/minigraph.bib b/tex/minigraph.bib new file mode 100644 index 0000000..a9b9b69 --- /dev/null +++ b/tex/minigraph.bib @@ -0,0 +1,676 @@ +@article{Schneider:2017aa, + Author = {Schneider, Valerie A and Graves-Lindsay, Tina and Howe, Kerstin and Bouk, Nathan and Chen, Hsiu-Chuan and Kitts, Paul A and Murphy, Terence D and Pruitt, Kim D and Thibaud-Nissen, Fran{\c c}oise and Albracht, Derek and Fulton, Robert S and Kremitzki, Milinn and Magrini, Vincent and Markovic, Chris and McGrath, Sean and Steinberg, Karyn Meltz and Auger, Kate and Chow, William and Collins, Joanna and Harden, Glenn and Hubbard, Timothy and Pelan, Sarah and Simpson, Jared T and Threadgold, Glen and Torrance, James and Wood, Jonathan M and Clarke, Laura and Koren, Sergey and Boitano, Matthew and Peluso, Paul and Li, Heng and Chin, Chen-Shan and Phillippy, Adam M and Durbin, Richard and Wilson, Richard K and Flicek, Paul and Eichler, Evan E and Church, Deanna M}, + Journal = {Genome Res}, + Month = {May}, + Number = {5}, + Doi = {10.1101/gr.213611.116}, + Pages = {849-864}, + Title = {Evaluation of {GRCh38} and de novo haploid genome assemblies demonstrates the enduring quality of the reference assembly}, + Volume = {27}, + Year = {2017}} + +@article{Li:2018aa, + Author = {Li, Heng and Bloom, Jonathan M and Farjoun, Yossi and Fleharty, Mark and Gauthier, Laura and Neale, Benjamin and MacArthur, Daniel}, + Journal = {Nat Methods}, + Month = {Aug}, + Number = {8}, + Pages = {595-597}, + Title = {A synthetic-diploid benchmark for accurate variant-calling evaluation}, + Volume = {15}, + Doi = {10.1038/s41592-018-0054-7}, + Year = {2018}} + +@article{Huddleston:2017aa, + Author = {Huddleston, John and Chaisson, Mark J P and Steinberg, Karyn Meltz and Warren, Wes and Hoekzema, Kendra and Gordon, David and Graves-Lindsay, Tina A and Munson, Katherine M and Kronenberg, Zev N and Vives, Laura and Peluso, Paul and Boitano, Matthew and Chin, Chen-Shin and Korlach, Jonas and Wilson, Richard K and Eichler, Evan E}, + Journal = {Genome Res}, + Month = {May}, + Number = {5}, + Pages = {677-685}, + Title = {Discovery and genotyping of structural variation from long-read haploid genome sequence data}, + Volume = {27}, + Doi = {10.1101/gr.214007.116}, + Year = {2017}} + +@article{Eichler_2010, + Author = {Eichler, Evan E. and Flint, Jonathan and Gibson, Greg and Kong, Augustine and Leal, Suzanne M. and Moore, Jason H. and Nadeau, Joseph H.}, + Journal = {Nature Reviews Genetics}, + Month = {Jun}, + Number = {6}, + Pages = {446--450}, + Doi = {10.1038/nrg2809}, + Title = {Missing heritability and strategies for finding the underlying causes of complex disease}, + Volume = {11}, + Year = {2010}} + +@article{Wenger_2019, + Author = {Wenger, Aaron M. and Peluso, Paul and Rowell, William J. and Chang, Pi-Chuan and Hall, Richard J. and Concepcion, Gregory T. and Ebler, Jana and Fungtammasan, Arkarachai and Kolesnikov, Alexey and Olson, Nathan D. and et al.}, + Doi = {10.1038/s41587-019-0217-9}, + Issn = {1546-1696}, + Journal = {Nature Biotechnology}, + Month = {Aug}, + Number = {10}, + Pages = {1155--1162}, + Publisher = {Springer Science and Business Media LLC}, + Title = {Accurate circular consensus long-read sequencing improves variant detection and assembly of a human genome}, + Volume = {37}, + Year = {2019}} + +@article{Audano:2019aa, + Author = {Audano, Peter A and Sulovari, Arvis and Graves-Lindsay, Tina A and Cantsilieris, Stuart and Sorensen, Melanie and Welch, AnneMarie E and Dougherty, Max L and Nelson, Bradley J and Shah, Ankeeta and Dutcher, Susan K and Warren, Wesley C and Magrini, Vincent and McGrath, Sean D and Li, Yang I and Wilson, Richard K and Eichler, Evan E}, + Journal = {Cell}, + Month = {Jan}, + Number = {3}, + Pages = {663-675.e19}, + Doi = {10.1016/j.cell.2018.12.019}, + Title = {Characterizing the Major Structural Variant Alleles of the Human Genome}, + Volume = {176}, + Year = {2019}} + +@article{Boucher_2019, + Author = {Boucher, Christina and Gagie, Travis and Kuhnle, Alan and Langmead, Ben and Manzini, Giovanni and Mun, Taher}, + Journal = {Algorithms for Molecular Biology}, + Month = {May}, + Number = {1}, + Title = {Prefix-free parsing for building big {BWTs}}, + Volume = {14}, + Doi = {10.1186/s13015-019-0148-5}, + Year = {2019}} + +@article{Makinen:2010aa, + Author = {M{\"a}kinen, Veli and Navarro, Gonzalo and Sir{\'e}n, Jouni and V{\"a}lim{\"a}ki, Niko}, + Journal = {J Comput Biol}, + Month = {Mar}, + Number = {3}, + Pages = {281-308}, + Doi = {10.1089/cmb.2009.0169}, + Title = {Storage and retrieval of highly repetitive sequence collections}, + Volume = {17}, + Year = {2010}} + +@article{Liu_2016, + Author = {Liu, Bo and Zhu, Dixian and Wang, Yadong}, + Journal = {Bioinformatics}, + Month = {Jun}, + Number = {12}, + Pages = {i174--i182}, + Doi = {10.1093/bioinformatics/btw266}, + Title = {{deBWT}: parallel construction of {Burrows}--{Wheeler} {Transform} for large collection of genomes with de Bruijn-branch encoding}, + Volume = {32}, + Year = {2016}} + +@article{NA2016159, + Author = {Joong Chae Na and Hyunjoon Kim and Heejin Park and Thierry Lecroq and Martine L{\'e}onard and Laurent Mouchard and Kunsoo Park}, + Journal = {Theoretical Computer Science}, + Pages = {159 - 170}, + Title = {{FM-index} of alignment: A compressed index for similar strings}, + Doi = {10.1016/j.tcs.2015.08.008}, + Volume = {638}, + Year = {2016}} + +@article{cpgc:2016aa, + Author = {{Computational Pan-Genomics Consortium}}, + Journal = {Brief Bioinform}, + Month = {Oct}, + Number = {1}, + Pages = {118--135}, + Doi = {10.1093/bib/bbw089}, + Title = {Computational pan-genomics: status, promises and challenges}, + Volume = {19}, + Year = {2016}} + +@article{Vernikos:2015aa, + Author = {Vernikos, George and Medini, Duccio and Riley, David R and Tettelin, Herv{\'e}}, + Journal = {Curr Opin Microbiol}, + Month = {Feb}, + Pages = {148-54}, + Title = {Ten years of pan-genome analyses}, + Doi = {10.1016/j.mib.2014.11.016}, + Volume = {23}, + Year = {2015}} + +@article{Liu:2016ac, + Author = {Liu, Bo and Guo, Hongzhe and Brudno, Michael and Wang, Yadong}, + Journal = {Bioinformatics}, + Month = {Nov}, + Number = {21}, + Pages = {3224-3232}, + Title = {{deBGA}: read alignment with {de Bruijn} graph-based seed and extension}, + Doi = {10.1093/bioinformatics/btw371}, + Volume = {32}, + Year = {2016}} + +@article{Marcus:2014xy, + Author = {Marcus, Shoshana and Lee, Hayan and Schatz, Michael C}, + Journal = {Bioinformatics}, + Month = {Dec}, + Number = {24}, + Pages = {3476-83}, + Title = {{SplitMEM}: a graphical algorithm for pan-genome analysis with suffix skips}, + Doi = {10.1093/bioinformatics/btu756}, + Volume = {30}, + Year = {2014}} + +@article{Baier_2015, + Author = {Baier, Uwe and Beller, Timo and Ohlebusch, Enno}, + Journal = {Bioinformatics}, + Month = {Oct}, + Number = {4}, + Pages = {497--504}, + Doi = {10.1093/bioinformatics/btv603}, + Title = {Graphical pan-genome analysis with compressed suffix trees and the {Burrows}--{Wheeler} transform}, + Volume = {32}, + Year = {2015}} + +@article{Beller:2016ab, + Author = {Beller, Timo and Ohlebusch, Enno}, + Journal = {Algorithms Mol Biol}, + Pages = {20}, + Title = {A representation of a compressed {de Bruijn} graph for pan-genome analysis that enables search}, + Volume = {11}, + Doi = {10.1186/s13015-016-0083-7}, + Year = {2016}} + +@article{Minkin_2016, + Author = {Minkin, Ilia and Pham, Son and Medvedev, Paul}, + Journal = {Bioinformatics}, + Number = {24}, + Pages = {4024-4032}, + Title = {{TwoPaCo}: an efficient algorithm to build the compacted de {Bruijn} graph from many complete genomes}, + Doi = {10.1093/bioinformatics/btw609}, + Volume = {33}, + Year = {2017}} + +@article{Chikhi_2016, + Author = {Chikhi, Rayan and Limasset, Antoine and Medvedev, Paul}, + Journal = {Bioinformatics}, + Month = {Jun}, + Number = {12}, + Pages = {i201--i208}, + Title = {Compacting de {Bruijn} graphs from sequencing data quickly and in low memory}, + Volume = {32}, + Doi = {10.1093/bioinformatics/btw279}, + Year = {2016}} + +@article{Chikhi:2015aa, + Author = {Chikhi, Rayan and Limasset, Antoine and Jackman, Shaun and Simpson, Jared T and Medvedev, Paul}, + Journal = {J Comput Biol}, + Month = {May}, + Number = {5}, + Pages = {336-52}, + Title = {On the representation of de Bruijn graphs}, + Doi = {10.1089/cmb.2014.0160}, + Volume = {22}, + Year = {2015}} + +@article{Dilthey_2015, + Author = {Dilthey, Alexander and Cox, Charles and Iqbal, Zamin and Nelson, Matthew R and McVean, Gil}, + Journal = {Nature Genetics}, + Month = {Apr}, + Number = {6}, + Pages = {682--688}, + Title = {Improved genome inference in the {MHC} using a population reference graph}, + Doi = {10.1038/ng.3257}, + Volume = {47}, + Year = {2015}} + +@article{Eggertsson:2017aa, + Author = {Eggertsson, Hannes P and Jonsson, Hakon and Kristmundsdottir, Snaedis and Hjartarson, Eirikur and Kehr, Birte and Masson, Gisli and Zink, Florian and Hjorleifsson, Kristjan E and Jonasdottir, Aslaug and Jonasdottir, Adalbjorg and Jonsdottir, Ingileif and Gudbjartsson, Daniel F and Melsted, Pall and Stefansson, Kari and Halldorsson, Bjarni V}, + Journal = {Nat Genet}, + Month = {Nov}, + Number = {11}, + Pages = {1654-1660}, + Title = {Graphtyper enables population-scale genotyping using pangenome graphs}, + Volume = {49}, + Doi = {10.1038/ng.3964}, + Year = {2017}} + +@article{Rakocevic_2019, + Author = {Rakocevic, Goran and Semenyuk, Vladimir and Lee, Wan-Ping and Spencer, James and Browning, John and Johnson, Ivan J. and Arsenijevic, Vladan and Nadj, Jelena and Ghose, Kaushik and Suciu, Maria C. and et al.}, + Journal = {Nature Genetics}, + Month = {Jan}, + Number = {2}, + Pages = {354--362}, + Title = {Fast and accurate genomic analyses using genome graphs}, + Volume = {51}, + Doi = {10.1038/s41588-018-0316-4}, + Year = {2019}} + +@article{Garrison:2018aa, + Author = {Garrison, Erik and Sir{\'e}n, Jouni and Novak, Adam M and Hickey, Glenn and Eizenga, Jordan M and Dawson, Eric T and Jones, William and Garg, Shilpa and Markello, Charles and Lin, Michael F and Paten, Benedict and Durbin, Richard}, + Journal = {Nat Biotechnol}, + Month = {Oct}, + Number = {9}, + Pages = {875-879}, + Title = {Variation graph toolkit improves read mapping by representing genetic variation in the reference}, + Doi = {10.1038/nbt.4227}, + Volume = {36}, + Year = {2018}} + +@article{Sibbesen:2018aa, + Author = {Sibbesen, Jonas Andreas and Maretty, Lasse and {Danish Pan-Genome Consortium} and Krogh, Anders}, + Journal = {Nat Genet}, + Month = {Jul}, + Number = {7}, + Pages = {1054-1059}, + Title = {Accurate genotyping across variant classes and lengths using variant graphs}, + Volume = {50}, + Doi = {10.1038/s41588-018-0145-5}, + Year = {2018}} + +@article{Li:2016aa, + Author = {Li, Heng}, + Journal = {Bioinformatics}, + Month = {Jul}, + Number = {14}, + Pages = {2103-10}, + Title = {Minimap and miniasm: fast mapping and de novo assembly for noisy long sequences}, + Doi = {10.1093/bioinformatics/btw152}, + Volume = {32}, + Year = {2016}} + +@article{Rautiainen810812, + Author = {Rautiainen, Mikko and Marschall, Tobias}, + Journal = {bioRxiv}, + Title = {{GraphAligner}: Rapid and Versatile Sequence-to-Graph Alignment}, + Doi = {10.1101/810812}, + year = {2019}} + +@article{Li:2018ab, + Author = {Li, Heng}, + Journal = {Bioinformatics}, + Month = {Sep}, + Number = {18}, + Pages = {3094-3100}, + Title = {Minimap2: pairwise alignment for nucleotide sequences}, + Volume = {34}, + Doi = {10.1093/bioinformatics/bty191}, + Year = {2018}} + +@article{Ono:2013aa, + Author = {Ono, Yukiteru and Asai, Kiyoshi and Hamada, Michiaki}, + Journal = {Bioinformatics}, + Month = {Jan}, + Number = {1}, + Pages = {119-21}, + Title = {{PBSIM}: {PacBio} reads simulator--toward accurate genome assembly}, + Volume = {29}, + Doi = {10.1093/bioinformatics/bts649}, + Year = {2013}} + +@article{Garg810341, + Author = {Garg, Shilpa and Fungtammasan, Arkarachai and Carroll, Andrew and Chou, Mike and Schmitt, Anthony and Zhou, Xiang and Mac, Stephen and Peluso, Paul and Hatas, Emily and Ghurye, Jay and Maguire, Jared and Mahmoud, Medhat and Cheng, Haoyu and Heller, David and Zook, Justin M. and Moemke, Tobias and Marschall, Tobias and Sedlazeck, Fritz J. and Aach, John and Chin, Chen-Shan and Church, George M. and Li, Heng}, + Journal = {bioRxiv}, + Title = {Efficient chromosome-scale haplotype-resolved assembly of human genomes}, + Doi = {10.1101/810341}, + Year = {2019}} + +@article{Robinson:2011aa, + Author = {Robinson, James T and Thorvaldsd{\'o}ttir, Helga and Winckler, Wendy and Guttman, Mitchell and Lander, Eric S and Getz, Gad and Mesirov, Jill P}, + Journal = {Nat Biotechnol}, + Month = {Jan}, + Number = {1}, + Pages = {24-6}, + Title = {Integrative genomics viewer}, + Volume = {29}, + Doi = {10.1038/nbt.1754}, + Year = {2011}} + +@article{Mathews:2003aa, + Author = {Mathews, Lauren M and Chi, Susan Y and Greenberg, Noam and Ovchinnikov, Igor and Swergold, Gary D}, + Journal = {Am J Hum Genet}, + Month = {Mar}, + Number = {3}, + Pages = {739-48}, + Doi = {10.1086/368275}, + Title = {Large differences between {LINE-1} amplification rates in the human and chimpanzee lineages}, + Volume = {72}, + Year = {2003}} + +@article{Biederstedt:2018aa, + Author = {Biederstedt, Evan and Oliver, Jeffrey C and Hansen, Nancy F and Jajoo, Aarti and Dunn, Nathan and Olson, Andrew and Busby, Ben and Dilthey, Alexander T}, + Journal = {F1000Res}, + Pages = {1391}, + Title = {{NovoGraph}: Human genome graph construction from multiple long-read de novo assemblies}, + Volume = {7}, + Doi = {10.12688/f1000research.15895.2}, + Year = {2018}} + +@inproceedings{DBLP:conf/wabi/AbouelhodaO03, + Author = {Mohamed Ibrahim Abouelhoda and Enno Ohlebusch}, + Booktitle = {Algorithms in Bioinformatics, Third International Workshop, {WABI} 2003, Budapest, Hungary, September 15-20, 2003, Proceedings}, + Crossref = {DBLP:conf/wabi/2003}, + Pages = {1--16}, + Doi = {10.1007/978-3-540-39763-2\_1}, + Title = {A Local Chaining Algorithm and Its Applications in Comparative Genomics}, + Year = {2003}} + +@proceedings{DBLP:conf/wabi/2003, + Editor = {Gary Benson and Roderic D. M. Page}, + Publisher = {Springer}, + Title = {Algorithms in Bioinformatics, Third International Workshop, {WABI} 2003, Budapest, Hungary, September 15-20, 2003, Proceedings}, + Volume = {2812}, + Year = {2003}} + +@article{Otto:2011aa, + Author = {Otto, Christian and Hoffmann, Steve and Gorodkin, Jan and Stadler, Peter F}, + Journal = {Algorithms Mol Biol}, + Month = {Mar}, + Pages = {4}, + Title = {Fast local fragment chaining using sum-of-pair gap costs}, + Volume = {6}, + Doi = {10.1186/1748-7188-6-4}, + Year = {2011}} + +@article{Depristo:2011vn, + Author = {Depristo, Mark A and Banks, Eric and Poplin, Ryan and Garimella, Kiran V and Maguire, Jared R and Hartl, Christopher and Philippakis, Anthony A and Del Angel, Guillermo and Rivas, Manuel A and Hanna, Matt and McKenna, Aaron and Fennell, Tim J and Kernytsky, Andrew M and Sivachenko, Andrey Y and Cibulskis, Kristian and Gabriel, Stacey B and Altshuler, David and Daly, Mark J}, + Journal = {Nat Genet}, + Month = {May}, + Number = {5}, + Pages = {491-8}, + Title = {A framework for variation discovery and genotyping using next-generation DNA sequencing data}, + Volume = {43}, + Doi = {10.1038/ng.806}, + Year = {2011}} + +@article{Li:2013aa, + Author = {Li, Heng}, + Journal = {arXiv:1303.3997}, + Title = {Aligning sequence reads, clone sequences and assembly contigs with {BWA-MEM}}, + Year = {2013}} + +@article{Amemiya:2019aa, + Author = {Amemiya, Haley M and Kundaje, Anshul and Boyle, Alan P}, + Journal = {Sci Rep}, + Month = {Jun}, + Number = {1}, + Pages = {9354}, + Title = {The {ENCODE} Blacklist: Identification of Problematic Regions of the Genome}, + Volume = {9}, + Doi = {10.1038/s41598-019-45839-z}, + Year = {2019}} + +@inproceedings{DBLP:conf/ismb/RuzzoT99, + Author = {Walter L. Ruzzo and Martin Tompa}, + Booktitle = {Proceedings of the Seventh International Conference on Intelligent Systems for Molecular Biology, August 6-10, 1999, Heidelberg, Germany}, + Crossref = {DBLP:conf/ismb/1999}, + Pages = {234--241}, + Title = {A Linear Time Algorithm for Finding All Maximal Scoring Subsequences}, + Year = {1999}} + +@proceedings{DBLP:conf/ismb/1999, + Editor = {Thomas Lengauer and Reinhard Schneider and Peer Bork and Douglas L. Brutlag and Janice I. Glasgow and Hans{-}Werner Mewes and Ralf Zimmer}, + Publisher = {{AAAI}}, + Title = {Proceedings of the Seventh International Conference on Intelligent Systems for Molecular Biology, August 6-10, 1999, Heidelberg, Germany}, + Year = {1999}} + +@article{Suzuki:2018aa, + Author = {Suzuki, Hajime and Kasahara, Masahiro}, + Journal = {BMC Bioinformatics}, + Month = {Feb}, + Number = {Suppl 1}, + Pages = {45}, + Title = {Introducing difference recurrence relations for faster semi-global alignment of long sequences}, + Volume = {19}, + Doi = {10.1186/s12859-018-2014-8}, + Year = {2018}} + +@article{Morgulis:2006aa, + Author = {Morgulis, Aleksandr and Gertz, E Michael and Sch{\"a}ffer, Alejandro A and Agarwala, Richa}, + Journal = {J Comput Biol}, + Month = {Jun}, + Number = {5}, + Pages = {1028-40}, + Title = {A fast and symmetric DUST implementation to mask low-complexity DNA sequences}, + Volume = {13}, + Doi = {10.1089/cmb.2006.13.1028}, + Year = {2006}} + +@article{Tarailo-Graovac:2009aa, + Author = {Tarailo-Graovac, Maja and Chen, Nansheng}, + Journal = {Curr Protoc Bioinformatics}, + Month = {Mar}, + Pages = {Unit 4.10}, + Title = {Using RepeatMasker to identify repetitive elements in genomic sequences}, + Volume = {Chapter 4}, + Year = {2009}} + +@article{Iqbal:2012aa, + Author = {Iqbal, Zamin and Caccamo, Mario and Turner, Isaac and Flicek, Paul and McVean, Gil}, + Journal = {Nat Genet}, + Month = {Jan}, + Number = {2}, + Pages = {226-32}, + Title = {De novo assembly and genotyping of variants using colored de Bruijn graphs}, + Volume = {44}, + Doi = {10.1038/ng.1028}, + Year = {2012}} + +@article{Holley695338, + Author = {Holley, Guillaume and Melsted, P{\'a}ll}, + Journal = {bioRxiv}, + Title = {Bifrost {\textendash} Highly parallel construction and indexing of colored and compacted de Bruijn graphs}, + Doi = {10.1101/695338}, + Year = {2019}} + +@article{Lee_2002, + Author = {Lee, C. and Grasso, C. and Sharlow, M. F.}, + Journal = {Bioinformatics}, + Month = {Mar}, + Number = {3}, + Doi = {10.1093/bioinformatics/18.3.452}, + Pages = {452--464}, + Title = {Multiple sequence alignment using partial order graphs}, + Volume = {18}, + Year = {2002}} + +@article{Hickey_2020, + Author = {Hickey, Glenn and Heller, David and Monlong, Jean and Sibbesen, Jonas A. and Sir{\'e}n, Jouni and Eizenga, Jordan and Dawson, Eric T. and Garrison, Erik and Novak, Adam M. and Paten, Benedict}, + Journal = {Genome Biology}, + Month = {Feb}, + Number = {1}, + Title = {Genotyping structural variants in pangenome graphs using the vg toolkit}, + Volume = {21}, + Doi = {10.1186/s13059-020-1941-7}, + Year = {2020}} + +@article{Eggertsson_2019, + Author = {Eggertsson, Hannes P. and Kristmundsdottir, Snaedis and Beyter, Doruk and Jonsson, Hakon and Skuladottir, Astros and Hardarson, Marteinn T. and Gudbjartsson, Daniel F. and Stefansson, Kari and Halldorsson, Bjarni V. and Melsted, Pall}, + Journal = {Nature Communications}, + Month = {Nov}, + Number = {1}, + Title = {GraphTyper2 enables population-scale genotyping of structural variation using pangenome graphs}, + Volume = {10}, + Doi = {10.1038/s41467-019-13341-9}, + Year = {2019}} + +@article{Chen_2019, + Author = {Chen, Sai and Krusche, Peter and Dolzhenko, Egor and Sherman, Rachel M. and Petrovski, Roman and Schlesinger, Felix and Kirsche, Melanie and Bentley, David R. and Schatz, Michael C. and Sedlazeck, Fritz J. and et al.}, + Journal = {Genome Biology}, + Month = {Dec}, + Number = {1}, + Title = {Paragraph: a graph-based structural variant genotyper for short-read sequence data}, + Volume = {20}, + Doi = {10.1186/s13059-019-1909-7}, + year = {2019}} + +@article{10.12688/f1000research.19630.1, + Author = {Llamas, B and Narzisi, G and Schneider, V and Audano, PA and Biederstedt, E and Blauvelt, L and Bradbury, P and Chang, X and Chin, CS and Fungtammasan, A and Clarke, WE and Cleary, A and Ebler, J and Eizenga, J and Sibbesen, JA and Markello, CJ and Garrison, E and Garg, S and Hickey, G and Lazo, GR and Lin, MF and Mahmoud, M and Marschall, T and Minkin, I and Monlong, J and Musunuri, RL and Sagayaradj, S and Novak, AM and Rautiainen, M and Regier, A and Sedlazeck, FJ and Siren, J and Souilmi, Y and Wagner, J and Wrightsman, T and Yokoyama, TT and Zeng, Q and Zook, JM and Paten, B and Busby, B}, + Journal = {F1000Research}, + Number = {1751}, + Title = {A strategy for building and using a human reference pangenome [version 1; peer review: 1 approved, 1 approved with reservations]}, + Volume = {8}, + Doi = {10.12688/f1000research.19630.1}, + Year = {2019}} + +@article{Dilthey_2019, + Author = {Dilthey, Alexander T and Mentzer, Alexander J and Carapito, Raphael and Cutland, Clare and Cereb, Nezih and Madhi, Shabir A and Rhie, Arang and Koren, Sergey and Bahram, Seiamak and McVean, Gil and et al.}, + Journal = {Bioinformatics}, + Month = {Apr}, + Number = {21}, + Pages = {4394--4396}, + Title = {{HLA*LA--HLA} typing from linearly projected graph alignments}, + Volume = {35}, + Doi = {10.1093/bioinformatics/btz235}, + Year = {2019}} + +@article{Danecek:2011qy, + Author = {Danecek, Petr and Auton, Adam and Abecasis, Goncalo and Albers, Cornelis A and Banks, Eric and DePristo, Mark A and Handsaker, Robert E and Lunter, Gerton and Marth, Gabor T and Sherry, Stephen T and McVean, Gilean and Durbin, Richard and {1000 Genomes Project Analysis Group}}, + Journal = {Bioinformatics}, + Month = {Aug}, + Number = {15}, + Pages = {2156-8}, + Title = {The variant call format and VCFtools}, + Volume = {27}, + Doi = {10.1093/bioinformatics/btr330}, + Year = {2011}} + +@article{Pritt_2018, + Author = {Pritt, Jacob and Chen, Nae-Chyun and Langmead, Ben}, + Journal = {Genome Biology}, + Month = {Dec}, + Number = {1}, + Title = {FORGe: prioritizing variants for graph genomes}, + Volume = {19}, + Doi = {10.1186/s13059-018-1595-x}, + Year = {2018}} + +@article{Pevzner:2001vn, + Author = {Pevzner, P A and Tang, H and Waterman, M S}, + Journal = {Proc Natl Acad Sci U S A}, + Month = {Aug}, + Number = {17}, + Pages = {9748-53}, + Title = {An Eulerian path approach to DNA fragment assembly}, + Volume = {98}, + Doi = {10.1073/pnas.171285098}, + Year = {2001}} + +@article{Muggli_2019, + Author = {Muggli, Martin D and Alipanahi, Bahar and Boucher, Christina}, + Journal = {Bioinformatics}, + Month = {Jul}, + Number = {14}, + Pages = {i51--i60}, + Title = {Building large updatable colored de Bruijn graphs via merging}, + Doi = {10.1093/bioinformatics/btz350}, + Volume = {35}, + Year = {2019}} + +@article{Gnerre:2011ys, + Author = {Gnerre, Sante and Maccallum, Iain and Przybylski, Dariusz and Ribeiro, Filipe J and Burton, Joshua N and Walker, Bruce J and Sharpe, Ted and Hall, Giles and Shea, Terrance P and Sykes, Sean and Berlin, Aaron M and Aird, Daniel and Costello, Maura and Daza, Riza and Williams, Louise and Nicol, Robert and Gnirke, Andreas and Nusbaum, Chad and Lander, Eric S and Jaffe, David B}, + Journal = {Proc Natl Acad Sci U S A}, + Month = {Jan}, + Number = {4}, + Pages = {1513-8}, + Title = {High-quality draft assemblies of mammalian genomes from massively parallel sequence data}, + Volume = {108}, + Doi = {10.1073/pnas.1017351108}, + Year = {2011}} + +@article{Sir_n_2019, + Author = {Sir{\'e}n, Jouni and Garrison, Erik and Novak, Adam M and Paten, Benedict and Durbin, Richard}, + Journal = {Bioinformatics}, + Month = {Jul}, + Title = {Haplotype-aware graph indexes}, + Doi = {10.1093/bioinformatics/btz575}, + Year = {2019}} + +@article{Almodaresi:2018aa, + Author = {Almodaresi, Fatemeh and Sarkar, Hirak and Srivastava, Avi and Patro, Rob}, + Journal = {Bioinformatics}, + Month = {Jul}, + Number = {13}, + Pages = {i169-i177}, + Title = {A space and time-efficient index for the compacted colored de Bruijn graph}, + Volume = {34}, + Doi = {10.1093/bioinformatics/bty292}, + Year = {2018}} + +@inproceedings{almodaresi_et_al:LIPIcs:2017:7657, + Address = {Dagstuhl, Germany}, + Author = {Fatemeh Almodaresi and Prashant Pandey and Rob Patro}, + Booktitle = {17th International Workshop on Algorithms in Bioinformatics (WABI 2017)}, + Editor = {Russell Schwartz and Knut Reinert}, + Pages = {18:1--18:15}, + Publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik}, + Series = {Leibniz International Proceedings in Informatics (LIPIcs)}, + Title = {{Rainbowfish: A Succinct Colored de Bruijn Graph Representation}}, + Volume = {88}, + Doi = {10.4230/LIPIcs.WABI.2017.18}, + Year = {2017}} + +@article{Jain_2020, + Author = {Jain, Chirag and Zhang, Haowen and Gao, Yu and Aluru, Srinivas}, + Journal = {Journal of Computational Biology}, + Month = {Apr}, + Number = {4}, + Pages = {640--654}, + Title = {On the Complexity of Sequence-to-Graph Alignment}, + Volume = {27}, + Year = {2020}} + +@inproceedings{DBLP:conf/ipps/JainMZDA19, + Author = {Chirag Jain and Sanchit Misra and Haowen Zhang and Alexander T. Dilthey and Srinivas Aluru}, + Booktitle = {2019 {IEEE} International Parallel and Distributed Processing Symposium, {IPDPS} 2019, Rio de Janeiro, Brazil, May 20-24, 2019}, + Pages = {451--461}, + Publisher = {{IEEE}}, + Title = {Accelerating Sequence Alignment to Graphs}, + Year = {2019}} + +@article{Rautiainen_2019, + Author = {Rautiainen, Mikko and M{\"a}kinen, Veli and Marschall, Tobias}, + Journal = {Bioinformatics}, + Month = {Mar}, + Number = {19}, + Pages = {3599--3607}, + Title = {Bit-parallel sequence-to-graph alignment}, + Volume = {35}, + Year = {2019}} + +@article{Antipov:2016aa, + Author = {Antipov, Dmitry and Korobeynikov, Anton and McLean, Jeffrey S and Pevzner, Pavel A}, + Journal = {Bioinformatics}, + Month = {04}, + Number = {7}, + Pages = {1009-15}, + Title = {hybridSPAdes: an algorithm for hybrid assembly of short and long reads}, + Volume = {32}, + Year = {2016}} + +@article{Li_minigraph:2020aa, + Author = {Li, Heng}, + Doi = {10.5281/zenodo.4016798}, + Title = {Minigraph: a sequence-to-graph mapper and pangenome graph generator}, + Year = {2020}} + +@article{Seo:2016aa, + Author = {Seo, Jeong-Sun and Rhie, Arang and Kim, Junsoo and Lee, Sangjin and Sohn, Min-Hwan and Kim, Chang-Uk and Hastie, Alex and Cao, Han and Yun, Ji-Young and Kim, Jihye and Kuk, Junho and Park, Gun Hwa and Kim, Juhyeok and Ryu, Hanna and Kim, Jongbum and Roh, Mira and Baek, Jeonghun and Hunkapiller, Michael W and Korlach, Jonas and Shin, Jong-Yeon and Kim, Changhoon}, + Journal = {Nature}, + Month = {Oct}, + Number = {7624}, + Pages = {243-247}, + Title = {De novo assembly and phasing of a Korean human genome}, + Volume = {538}, + Doi = {10.1038/nature20098}, + Year = {2016}} + +@article{Kronenberg:2018aa, + Author = {Kronenberg, Zev N and Fiddes, Ian T and Gordon, David and Murali, Shwetha and Cantsilieris, Stuart and Meyerson, Olivia S and Underwood, Jason G and Nelson, Bradley J and Chaisson, Mark J P and Dougherty, Max L and Munson, Katherine M and Hastie, Alex R and Diekhans, Mark and Hormozdiari, Fereydoun and Lorusso, Nicola and Hoekzema, Kendra and Qiu, Ruolan and Clark, Karen and Raja, Archana and Welch, AnneMarie E and Sorensen, Melanie and Baker, Carl and Fulton, Robert S and Armstrong, Joel and Graves-Lindsay, Tina A and Denli, Ahmet M and Hoppe, Emma R and Hsieh, PingHsun and Hill, Christopher M and Pang, Andy Wing Chun and Lee, Joyce and Lam, Ernest T and Dutcher, Susan K and Gage, Fred H and Warren, Wesley C and Shendure, Jay and Haussler, David and Schneider, Valerie A and Cao, Han and Ventura, Mario and Wilson, Richard K and Paten, Benedict and Pollen, Alex and Eichler, Evan E}, + Journal = {Science}, + Month = {06}, + Number = {6393}, + Title = {High-resolution comparative analysis of great ape genomes}, + Volume = {360}, + Doi = {10.1126/science.aar6343}, + Year = {2018}} + +@article{Gordon:2016kq, + Author = {Gordon, David and Huddleston, John and Chaisson, Mark J P and Hill, Christopher M and Kronenberg, Zev N and Munson, Katherine M and Malig, Maika and Raja, Archana and Fiddes, Ian and Hillier, LaDeana W and Dunn, Christopher and Baker, Carl and Armstrong, Joel and Diekhans, Mark and Paten, Benedict and Shendure, Jay and Wilson, Richard K and Haussler, David and Chin, Chen-Shan and Eichler, Evan E}, + Journal = {Science}, + Month = {Apr}, + Number = {6281}, + Pages = {aae0344}, + Title = {Long-read sequence assembly of the gorilla genome}, + Volume = {352}, + Doi = {10.1126/science.aae0344}, + Year = {2016}} diff --git a/tex/minigraph.tex b/tex/minigraph.tex new file mode 100644 index 0000000..900658f --- /dev/null +++ b/tex/minigraph.tex @@ -0,0 +1,986 @@ +%% BioMed_Central_Tex_Template_v1.06 + +\documentclass[twocolumn]{bmcart} + +%%% Load packages +\usepackage{amsthm,amsmath} +\RequirePackage{hyperref} +\usepackage[utf8]{inputenc} %unicode support + +\usepackage{graphicx} +%\def\includegraphic{} +%\def\includegraphics{} + +%%% Put your definitions there: +\startlocaldefs +\endlocaldefs + + +%%% Begin ... +\begin{document} + +%%% Start of article front matter +\begin{frontmatter} + +\begin{fmbox} +\dochead{Method} + +\title{The design and construction of reference pangenome graphs with minigraph} + +\author[ + addressref={aff1,aff2}, % id's of addresses, e.g. {aff1,aff2} + corref={aff1}, % id of corresponding address, if any + email={hli@ds.dfci.harvard.edu} % email address +]{\inits{HL}\fnm{Heng} \snm{Li}} +\author[ + addressref={aff1,aff2}, +]{\inits{XF}\fnm{Xiaowen} \snm{Feng}} +\author[ + addressref={aff2}, +]{\inits{CC}\fnm{Chong} \snm{Chu}} + +\address[id=aff1]{% % unique id + \orgname{Department of Data Sciences, Dana-Farber Cancer Institute}, % university, etc + \city{Boston, MA 02215}, % city + \cny{USA} % country +} +\address[id=aff2]{% + \orgname{Department of Biomedical Informatics, Harvard Medical School}, + \city{Boston, MA 02215}, + \cny{USA} +} + +\begin{abstractbox} + +\begin{abstract} % abstract +The recent advances in sequencing technologies enable the assembly of +individual genomes to the quality of the reference genome. How to integrate +multiple genomes from the same species and make the integrated representation +accessible to biologists remains an open challenge. Here, we propose a +graph-based data model and associated formats to represent multiple genomes +while preserving the coordinate of the linear reference genome. We implement +our ideas in the minigraph toolkit and demonstrate that we can efficiently +construct a pangenome graph and compactly encode tens of thousands of +structural variants missing from the current reference genome. +\end{abstract} + +\begin{keyword} +\kwd{bioinformatics} +\kwd{genomics} +\kwd{pangenome} +\end{keyword} + +\end{abstractbox} + +\end{fmbox} + +\end{frontmatter} + +%% +\section*{Background} + +The human reference genome is a fundamental resource for human genetics and +biomedical research. The primary sequences of the reference genome +GRCh38~\cite{Schneider:2017aa} are a mosaic of haplotypes with each haplotype segment derived +from a single human individual. They cannot represent the genetic diversity in +human populations and as a result, each individual may carry thousands of large +germline variants absent from the reference genome~\cite{Huddleston:2017aa}. +Some of these variants are likely associated with phenotype~\cite{Eichler_2010} +but are often missed or misinterpreted when we map sequence data to GRCh38, in +particular with short reads~\cite{Li:2018aa}. This under-representation of +genetic diversity may become a limiting factor in our understanding of genetic +variations. + +Meanwhile, the advances in long-read sequencing technologies make it possible +to assemble a human individual to a quality comparable to +GRCh38~\cite{Schneider:2017aa,Wenger_2019}. There are already a dozen of +high-quality human assemblies available in GenBank~\cite{Audano:2019aa}. +Properly integrating these genomes into a reference \emph{pangenome}, which +refers to a collection of genomes~\cite{cpgc:2016aa}, would potentially address +the issues with a single linear reference. + +A straightforward way to represent a pangenome is to store unaligned genomes +in a full-text index that compresses redundancies in sequences identical +between individuals~\cite{Makinen:2010aa,Liu_2016,Boucher_2019}. We may +retrieve individual genomes from the index, inspect the k-mer spectrum and test +the presence of k-mers using standard techniques. In principle, it is also +possible to apply canonical read alignment algorithms to map sequences to +the collection, but in practice, the redundant hits to multiple genomes will +confuse downstream mapping-based analyses~\cite{NA2016159}. It is not clear how +to resolve these multiple mappings. + +The other class of methods encodes multiple genomes into a sequence graph, +usually by collapsing identical or similar sequences between genomes onto a +single representative sequence. The results in a \emph{pangenome graph}. A +pangenome graph is a powerful tool to identify core genome, the part of a +genome or gene set that is shared across the majority of the strains or related species +in a clade~\cite{Vernikos:2015aa}. A common way to construct a basic pangenome +graph is to generate a compacted de Bruijn graph +(cDBG)~\cite{Marcus:2014xy,Baier_2015,Beller:2016ab,Chikhi:2015aa,Minkin_2016,Chikhi_2016,almodaresi_et_al:LIPIcs:2017:7657} +from a set of genomes. Basic cDBG does not keep sample information. +\cite{Iqbal:2012aa} proposed colored cDBG with each color represents a sample +or a population. Colored cDBG can be constructed +efficiently~\cite{Muggli_2019,Holley695338}. However, a colored cDBG discards +the chromosomal coordinate and thus disallows the mapping of genomic features. +It often includes connections absent from the input genomes and thus encodes +sequences more than the input. A colored cDBG cannot serve as a +\emph{reference} pangenome graph, either. deBGA~\cite{Liu:2016ac} addresses +the issue by labeling each unitig with its possibly multiple locations in the +input genome(s). Pufferfish~\cite{Almodaresi:2018aa} further reduces its space +requirement. Nonetheless, given hundreds of human genomes, there will be many +more vertices in the graph and most vertices are associated with hundreds of +labels. Whether deBGA and pufferfish can scale to such datasets remains an open +question. GBWT~\cite{Sir_n_2019} provides another practical solution to storage +and indexing, but no existing tools can practically construct a cDBG for many +human genomes in the GBWT representation. + +In addition to cDBG, we can derive a reference pangenome +graph from a single linear multi-sequence alignment (MSA)~\cite{Dilthey_2015,Dilthey_2019}. +It has been used for HLA typing but is not applicable to whole chromosomes when +they cannot be included in a single linear MSA. The third and possibly the most +popular approach to reference graph generation is to call variants from other +sources and then incorporate these variants, often in the VCF format~\cite{Danecek:2011qy}, into +the reference genome as alternative +paths~\cite{Eggertsson:2017aa,Rakocevic_2019,Sibbesen:2018aa,Biederstedt:2018aa,Eggertsson_2019}. +However, because VCF does not define coordinates on insertions, this approach +cannot properly encode variations on long insertions and is therefore limited +to simple variations. There are no satisfactory solutions to the construction +of reference pangenome graphs. + +In this article, we introduce the reference Graphical Fragment Assembly (rGFA) +format to model reference pangenome graphs. We propose and demonstrate an +incremental procedure to construct graphs under this model. The resulting +graphs encode structural variations (SVs) of length 100bp or longer without haplotype +information. Our implementation, minigraph~\cite{Li_minigraph:2020aa} +(\href{https://github.com/lh3/minigraph}{https://github.com/lh3/minigraph}), +can construct a pangenome graph from twenty human assemblies in three hours. + +\section*{Results} + +We will first describe a data model for reference pangenome graphs, which +establishes the foundation of this article. We will then present a new +sequence-to-graph mapper, minigraph, and show how this mapper incrementally +constructs a pangenome graph. We will demonstrate the utility of pangenome +graphs with a human graph generated from twenty human haplotypes and a primate +graph generated from four species. + +\subsection*{Modeling reference pangenome graphs} + +\subsubsection*{Sequence graphs} + +There are several equivalent ways to define a sequence graph. In this article, +a \emph{sequence graph} $G(V,E)$ is a bidirected graph. Each vertex $v\in V$ is +associated with a DNA sequence; each edge $e\in E$ has two directions, one for +each endpoint, which leads to four types of edges: forward-forward, +reverse-forward, forward-reverse and reverse-reverse. The directions on an edge +dictate how a sequence is spelled from a walk/path in the graph. Common +assembly graphs, such as the overlap graph, string graph and de Bruijn graph +can all be formulated as sequence graphs. + +\begin{figure}[t] +\includegraphics[width=.47\textwidth]{Fig1} +\caption{\csentence{Example rGFA and GAF formats.} {\bf (a)} Example rGFA + format. rGFA-specific tags include SN, name of the stable sequence from which + the vertex is derived; SO, offset on the stable sequence; SR, rank: 0 if the + vertex or edge is on the linear reference; $>$0 for non-reference. {\bf (b)} + Corresponding sequence graph. Each thick arrow represents an oriented DNA + sequence. {\bf (c)} Example GAF format, using the segment coordinate, for + reads ``${\tt GTGGCT}$'' and ``${\tt CGTTTCC}$'' mapped to the graph. {\bf + (d)} Equivalent GAF format using the stable coordinate.}\label{fig:rgfa} +\end{figure} + +The Graphical Fragment Assembly (GFA) format~\cite{Li:2016aa} describes +sequence graphs. The core of GFA is defined by the following grammar: + +{\footnotesize +\begin{verbatim} + + <- ( | )+ + <- `S' + <- `L' [+-] [+-] + +\end{verbatim}} + +{\flushleft +A line starting with letter ``${\tt S}$'' corresponds to a vertex and a line +starting with ``${\tt L}$'' corresponds +to a bidirected edge. In a de Bruijn graph, we often attach sequences to edges +instead of vertices~\cite{Pevzner:2001vn,Gnerre:2011ys}. To avoid the confusion, in this +article, we also call a vertex as a \emph{segment} and call an edge as a +\emph{link}, following the GFA terminology. Fig.~\ref{fig:rgfa}a shows an +example GFA that encodes Fig.~\ref{fig:rgfa}b. +} + +A sequence graph in the GFA format natively defines a \emph{segment coordinate} +system where each base in the graph is uniquely indexed by a +2-tuple $({\rm segId},{\rm segOffset})$. For example, in +Fig~\ref{fig:rgfa}a, the base at position $({\rm s2},2)$ is ``{\tt G}''. +A major problem with this coordinate is that it is decoupled from linear +annotations and is sensitive to graph transformations. For example, if we split +a segment into two connected segments, the set of sequences spelled from the graph +remains the same, but the segment coordinates will be changed. Due to the +instability of segment coordinate, a basic sequence graph is inadequate for a +reference graph. + +\subsubsection*{Reference pangenome graphs} + +We propose the reference GFA (rGFA) format to encode reference pangenome graphs. +rGFA is an extension to GFA with three additional tags that indicate the origin +of a segment from linear genomes (Fig.~\ref{fig:rgfa}a). This simple addition +gives us a unique stable coordinate system as an extension to the linear +reference coordinate (e.g. GRCh38). We can pinpoint a position such as +``{\sf chr1:9}'' in the graph and map existing annotations onto the graph. We can +also report a path or walk in the stable coordinate. For example, path +``{\sf s1$\to$s2$\to$s3}'' unambiguously corresponds to ``{\sf +chr1:0-5$\to$chr1:5-8$\to$chr1:8-12}'' or simply ``{\sf chr1:0-12}'' if we +merge adjacent coordinate; similarly, ``{\sf s1$\to$s2$\to$s5$\to$s6}'' +corresponds to ``{\sf chr1:0-8$\to$foo:8-16}''. We will formally describe the +path format when introducing the GAF format in the next section. + +In rGFA, each segment is associated with one origin. This apparently trivial +requirement in fact imposes a strong restriction on the types of graphs rGFA +can encode: it forbids the collapse of different regions from one sequence, +which would often happen in a cDBG. We consider this restriction an +advantage of rGFA because it requires the graph to have a ``linear'' flavor +intuitively and simplifies the data structure to store the graph. + +For simplicity, rGFA disallows overlaps between edges and forbids multiple +edges (more than one edges between the same pair of vertices). These two +restrictions help to avoid ambiguity and reduce the complexity in +implementation. They are not strictly necessary in theory. + +\subsubsection*{The Graphical mApping Format (GAF)} + +\begin{table}[tb] +\caption{The Graphical mApping Format (GAF)}\label{tab:gaf} +\begin{tabular}{rcp{6cm}} +\hline +Col & Type & Description \\ \hline +1 & string & Query sequence name \\ +2 & int & Query sequence length \\ +3 & int & Query start coordinate (0-based; closed) \\ +4 & int & Query end coordinate (0-based; open) \\ +5 & char & Strand relative to col. 6 \\ +6 & string & Graph path matching regular expression \texttt{/([><][\char94\char92s><]+(:\char92d+-\char92d+)?)+\char124([\char94\char92s><]+)/}\\ +7 & int & Path sequence length \\ +8 & int & Path start coordinate \\ +9 & int & Path end coordinate \\ +10 & int & Number of matching bases in the mapping \\ +11 & int & Number of bases, including gaps, in the mapping \\ +12 & int & Mapping quality (0--255 with 255 for missing) \\ \hline +\end{tabular} +\end{table} + +As there are no text formats for sequence-to-graph alignment, we propose a new +Graphical mApping Format (GAF) by extending the Pairwise mApping Format +(PAF)~\cite{Li:2016aa}. GAF is TAB-delimited with each column defined in +Table~\ref{tab:gaf}. Column 6 encodes a path on the graph. It follows the +formal grammar below: + +{\footnotesize +\begin{verbatim} + + <- | + + <- (`>' | `<') ( | ) + <- `:' `-' + +\end{verbatim}} + +{\flushleft +In this grammar, {\tt } is a segment identifier on an S-line in rGFA; +{\tt } is a stable sequence name at the {\tt SN} tag on the +corresponding S-line. Column 6 can be either a path in the segment coordinate +(Fig.~\ref{fig:rgfa}c) or an equivalent path in the stable coordinate +(Fig.~\ref{fig:rgfa}d). We can merge adjacent stable coordinates if the two +segments are originated from the same stable sequence and the end offset of the +first segment is equal to the start offset of the second segment. For example, +``{\tt >chr1:0-5>chr1:5-8}'' can be simplified to ``{\tt >chr1:0-8}''. +Furthermore, if a path in column 6 is derived from one reference sequence, we +recommend to replace it with the entire reference path on the forward +orientation (e.g. see ``read1'' in Fig.~\ref{fig:rgfa}d). With this convention, +a GAF line is reduced to PAF for a sequence mapped to a reference sequence. +Similar to PAF, GAF also allows optional tags in the SAM-like format. Base +alignment is kept at the {\tt cg} tag.} + +Minigraph produces GAF in both the segment and the stable coordinate. +GraphAligner~\cite{Rautiainen810812} produces GAF in the segment coordinate +only, which can be converted to the stable coordinate. + +\begin{figure}[t] +\includegraphics[width=.47\textwidth]{Fig2} +\caption{\csentence{Minigraph algorithms.} {\bf (a)} Diagram of the minigraph + mapping algorithm. Minigraph seeds alignments with minimizers, finds good + enough linear chains, connects them in the graph and seeks the most weighted + path as a graph chain. {\bf (b)} Diagram of incremental graph construction. A + graph is iteratively constructed by mapping each assembly to an existing + graph and augmenting the graph with long poorly mapped sequences in the + assembly.}\label{fig:mg} +\end{figure} + +\subsection*{Sequence-to-graph mapping} + +Our incremental graph construction algorithm relies on genome-to-graph +alignment (Fig.~\ref{fig:mg}b). As existing sequence-to-graph +aligners~\cite{Rautiainen810812,Garrison:2018aa} do not work with +chromosome-long query sequences, we adapted minimap2~\cite{Li:2018ab} for our +purpose and implemented minigraph (Fig.~\ref{fig:mg}a). Briefly, minigraph uses +a minimap2-like algorithm to find local hits to segments in the graph, ignoring +the graph topology. It then chains these local hits if they are connected on +the graph, possibly through cycles. This gives the approximate mapping locations. Minigraph does not +perform base-level alignment. This is because the graph we construct encodes +SVs and rarely contains paths similar at the base level. The best mapping is +often clear without base alignment. + +\begin{table}[b] +\caption{Performance of sequence-to-graph mapping}\label{tab:mgvga} +\begin{tabular}{lrr} +\hline +& minigraph & GraphAligner \\ +\hline +Indexing time (wall-clock sec) & 100 & 589 \\ +Mapping time (wall-clock sec) & 79 & 140 \\ +Peak RAM (GB) & 19.5 & 27.2 \\ +Percent unmapped reads & 0.5\% & 0\% \\ +Percent wrong mappings & 1.7\% & 4.6\% \\ +\hline +\end{tabular} +\end{table} + +To evaluate the accuracy of minigraph mapping, we simulated PacBio reads from +GRCh38 with PBSIM~\cite{Ono:2013aa} and mapped them to the graph we constructed +in the next section. Table~\ref{tab:mgvga} compares the performance of +minigraph and GraphAligner~\cite{Rautiainen810812} v1.0.10 on 68,857 simulated +reads mapped over 8 CPU threads. {\color{black} The N50 read length is 15kb. +9,862 reads are mapped across two or more segments by GraphAligner. Note that +both minigraph and GraphAligner ignore the stable coordinates during mapping. +All segments, originated either from GRCh38 or from individual genomes, are +treated equally. To this end, while we simulated reads from GRCh38, we are also +evaluating how well mappers work with complex SVs present in any input +samples.} + +On this dataset, minigraph +is faster than GraphAligner and uses less memory, partly because minigraph does +not perform base alignment. +As is shown in Table~\ref{tab:mgvga}, minigraph is more accurate than +GraphAligner. This is counter-intuitive given that GraphAligner does base +alignment. Close inspection reveals that most mismapped reads by minigraph are +mapped to the correct genomic loci but wrong graph paths. On the contrary, most +mismapped reads by GraphAligner are mapped to wrong genomic loci. This suggests +minigraph is better at finding approximate mapping locations but GraphAligner +is better at disambiguating similar graph paths. Combining the strength of +both could lead to a better graph mapper. We do plan to implement base-level +alignment in minigraph in future. + +We have also tried vg v1.21.0~\cite{Garrison:2018aa}. It indexed the same graph in 14.7 wall-clock +hours and mapped the simulated reads in 1.8 hours over 8 threads, tens of times +slower than minigraph and GraphAligner. However, no reads are mapped in the +output. We have not been able to make vg work with our data. + +\subsection*{Generating pangenome graphs} + +Fig.~\ref{fig:mg}b shows how minigraph constructs a pangenome graph (see +Methods for details). This procedure is similar to multiple sequence alignment +via partial order graph~\cite{Lee_2002} except that minigraph works with cyclic +graphs and ignores small variants. Minigraph only considers SVs of +100bp--100kb in length and ignores SVs in alignments shorter than 100kb. +For each input assembly, it filters out regions covered by two or more primary +alignments longer than 20kb in the assembly. This filter avoids paralogous +regions in a sample and guarantees that graphs generated by minigraph can be +modeled by rGFA. + +As a sanity check, we compared minigraph to dipcall +(\href{https://github.com/lh3/dipcall}{https://github.com/lh3/dipcall}) on +calling SVs 100bp or longer from a synthetic diploid sample composed of CHM1 +and CHM13~\cite{Li:2018aa}. Given two SV callsets $A$ and $B$, we say a call in +$A$ is \emph{missed} in callset $B$ if there are no calls in $B$ within 1000bp +from the call in $A$. With this criterion, 2.7\% of 14,792 SVs called by +dipcall are missed by minigraph; 6.0\% of 14,932 minigraph SVs are missed by +dipcall. We manually inspected tens of differences in +IGV~\cite{Robinson:2011aa} and identified two causes. First, an INDEL longer +than 100bp called by one caller may be split into two shorter INDELs by the +other caller. There are often more than one smaller SVs around a missed SV +call. Second, dipcall skips regions involving high density of SNPs or involving +both long insertions and long deletions, but minigraph connects these events +and calls SVs in such regions. It tends to call more SVs. Overall, we believe +minigraph and dipcall found similar sets of SVs. + +\begin{table}[tb] +\caption{Assemblies used for graph construction}\label{tab:asm} +\begin{tabular}{llll} +\hline +Name & Species & Population & Accession/Source \\ \hline +CHM1 & Human & N/A & GCA\_001297185.1 \\ +CHM13 & Human & N/A & GCA\_000983455.1 \\ +NA12878 & Human & European & \cite{Garg810341}, phased \\ +NA24385 & Human & Jewish & \cite{Garg810341}, phased \\ +PGP1 & Human & N/A & \cite{Garg810341}, phased \\ +NA19240 & Human & African & GCA\_001524155.4 \\ +HG00514 & Human & East Asian & GCA\_002180035.3 \\ +HG01352 & Human & American & GCA\_002209525.2 \\ +NA19434 & Human & African & GCA\_002872155.1 \\ +HG02818 & Human & African & GCA\_003574075.1 \\ +HG03486 & Human & African & GCA\_003086635.1 \\ +HG03807 & Human & South Asian& GCA\_003601015.1 \\ +HG00733 & Human & American & GCA\_002208065.1 \\ +HG02059 & Human & East Asian & GCA\_003070785.1 \\ +HG00268 & Human & European & GCA\_008065235.1 \\ +HG04217 & Human & South Asian& GCA\_007821485.1 \\ +AK1 & Human & East Asian & GCA\_001750385.1 \\ +Clint & Chimpanzee & & GCA\_002880755.3 \\ +Susie & Gorilla & & GCA\_900006655.3 \\ +Kamilah & Gorilla & & GCA\_008122165.1 \\ +Susie & Orangutan & & GCA\_002880775.3 \\ +\hline +\end{tabular} +\end{table} + +\begin{figure*}[htbp] +\includegraphics[width=.95\textwidth]{Fig3} +\caption{\csentence{Characteristics of the human and the great ape graphs.} {\bf + (a)} Human variations stratified by repeat class and by the number of + alleles of each variation. The repeat annotation was obtained from the + longest allele of each variation. VNTR: variable-number tandem repeat, a + tandem repeat with the unit motif length $\ge$7bp. STR: short random repeat, + a tandem repeat with the unit motif length $\le$6bp. LCR: low-complexity + regions. Mixed-inter.: a variation involving $\ge$2 types of interspersed + repeats. {\bf (b)} Great ape variations stratified by repeat class and by the + number of alleles. {\bf (c)} Human biallelic variations stratified by repeat + class and by insertion to/deletion from GRCh38. Both alleles are required to + be covered in all assemblies. {\bf (d)} Human-specific biallelic variations + stratified by repeat class and by insertion to/deletion from GRCh38. Red bars + correspond to insertions to the human lineage. {\bf (e)} Distribution of + different types of human variations along chromosomes. {\bf (f)} Boxplot of + the longest allele length in each repeat class. Outliers are omitted for the + clarity of the figure.}\label{fig:anno} +\end{figure*} + +\subsection*{A human pangenome graph} + +Starting with GRCh38, we constructed a human pangenome graph from 20 human +haplotypes or haplotype-collapsed assemblies (Table~\ref{tab:asm}). It took +minigraph 2.7 wall-clock hours over 24 CPU threads to generate this graph. The +peak memory is 98.1GB. The resulting graph consists of 148,618 segments and +214,995 links. It contains 37,332 variations, where a \emph{variation} +denotes a minimal subgraph that has a single source and a single sink with both +segments coming from GRCh38. A path through the bubble between the source and +and the sink represents an \emph{allele}. + +Variations in the human graph are enriched with Alus and VNTRs +(Fig.~\ref{fig:anno}a). While interspersed repeats are about evenly distributed +along chromosomes except in the pseudoautosomal regions (Fig.~\ref{fig:anno}e), +VNTRs are enriched towards telomeres~\cite{Audano:2019aa}. It is worth noting +the density of minisatellites is also higher in subtelomeres. If we normalize +the density of VNTRs in the pangenome graph by the density of minisatellites in +GRCh38, the enrichment of VNTRs towards telomeres is still visible but becomes +less prominent. At the same time, repeat-less variations are also enriched +towards the ends of chromosomes (green areas in Fig.~\ref{fig:anno}e), +suggesting subtelomeres tend to harbor SVs anyway. We also +identified 85 processed pseudogenes among these variations. + +\begin{figure} +\includegraphics[width=.46\textwidth]{igv-edit.png} +\caption{\csentence{IGV screenshot of a region enriched with long insertions.} + Numbers on wide purple bars indicate insertion lengths. CLR: PacBio noisy + continuous long reads. HiFi: PacBio high-fidelity reads.}\label{fig:igv} +\end{figure} + +Another noticeable feature of VNTRs is that over half of VNTR variations are +multiallelic (Fig.~\ref{fig:anno}a). Fig.~\ref{fig:igv} shows a multi-allelic +region composed of VNTRs. We can see many insertions of different lengths. The +two different NA12878 assemblies also disagree with each other, which we often +see around other VNTR loci in NA12878 as well. We have not inspected raw reads +in this particular example, but we tend to believe the disagreement is caused +by local misassemblies rather than somatic mutations. In addition, due to the +multiallelic nature of such VNTRs, the two haplotypes in a human individual are +often different. Assemblies mixing the two haplotypes (aka collapsed +assemblies) may have more troubles in these regions. Multiallelic VNTRs are +hard to assemble correctly. + +Multiallelic VNTRs are also hard to align and to call. In Fig.~\ref{fig:igv}, +the insertion positions are often different, which could be caused by a few +mutations or sequencing errors. A naive alignment-based SV caller would call a +dozen of low-frequency insertions in this region, which does not reflect these +correlated events. Without base-level alignment, minigraph may +have more troubles with obtaining the optimal alignment in these complex VNTR +regions. Improved data quality, assembly algorithms and graph mapping +algorithms are required to investigate VNTR regions in detail. + +\subsection*{A great ape pangenome graph} + +We also constructed a great ape pangenome graph from GRCh38, one chimpanzee, +two gorillas and one orangutan (Table~\ref{tab:asm}). This graph contains +206,452 variations, over four times more than the human graph. About half of +variations are originated from orangutan, the species most distant from human. + +In the great ape graph, the L1-to-Alu ratio is close to 1:1, much higher than +the ratio in the human graph (Fig.~\ref{fig:anno}b vs Fig.~\ref{fig:anno}a). +This is perhaps correlated with the elevated L1 activity in great +apes~\cite{Mathews:2003aa}. Of retrotransposon-related variations specific to +the human lineage, the overwhelming majority are insertions +(Fig.~\ref{fig:anno}d), which is expected as transpositions lead to insertions +only. Most human-specific Alu deletions are incomplete and involve ancient Alu +subfamilies. They are likely genomic deletions that happen to hit Alus. In +contrast, the majority of ``partial-repeats'' are deletions from the human +lineage. Two thirds of autosomal insertions in this category are segmental +duplications in GRCh38. In all, minigraph is an efficient tool to study closely +related species. + +\subsection*{Blacklist regions from human pangenome graphs} + +The human pangenome graph effectively encodes SVs $\ge$100bp +in 20 genomes. These large-scale variations could be a frequent source of +technical artifacts in variant calling with short reads. To test this +hypothesis, we compared short-read SNP calls with vs without regions around SVs +in the pangenome graph. + +We constructed a human pangenome graph excluding CHM1 and CHM13, the two +samples used in the SynDip benchmark~\cite{Li:2018aa}, and generated regions +around variations (see Methods), which we call as \emph{blacklist regions}, +following the rationale in~\cite{Amemiya:2019aa}. Blacklist regions is totaled +29.2Mb in length, intersecting 0.7\% of confident regions in +SynDip~\cite{Li:2018aa}; 0.7\% of truth SNPs are contained in blacklist regions +-- true SNPs are not enriched in blacklist regions. + +We mapped short reads used in~\cite{Li:2018aa} with minimap2 and called +variants with GATK v4.1.2~\cite{Depristo:2011vn}. This callset +contains 32,879 false positive SNPs, 21\% of which fall in blacklist regions -- +false SNP calls are highly enriched in this $<$1\% region of human genome. This +confirms a noticeable fraction of false SNP calls using short reads are +resulted from misalignment involving SVs. + +\section*{Discussion} + +Based on the GFA assembly format~\cite{Li:2016aa}, we proposed the rGFA format, +which defines a data model for reference pangenome graphs at the same time. +rGFA takes a linear reference genome as the backbone and maintains the +conceptual ``linearity'' of input genomes. + +rGFA is not the only pangenome graph model. Vg~\cite{Garrison:2018aa} +encodes a stable sequence with a path through the sequence graph~\cite{10.12688/f1000research.19630.1}. A segment +in the graph may occur on multiple paths, or occur multiple times on one path +if there are cycles in the graph. This way, vg allows different regions in one +chromosome collapsed to one segment. We call such a graph as a collapsed graph. rGFA +cannot encode a collapsed graph. The vg model is thus more general. + +In our view, however, the reference pangenome graph should not be a collapsed +graph. In a collapsed graph, the definition of orthology is not clear because +multiple sequences from the same sample may go through the same segment. +Without the concept of orthology, we cannot define variations, either. In +addition, due to the one-to-many relationship between segments and the +reference genome, it is intricate to derive the stable coordinate of a path in +a collapsed graph. For example, suppose segment {\sf s1} corresponds to two +regions {\sf chr1:100-200} and {\sf chr1:500-600}. To convert a path {\sf +s2$\to$s1$\to$s3} to the stable coordinate, we have to inspect adjacent +segments to tell which {\sf s1} corresponds to; this becomes more challenging +when {\sf s2} and {\sf s3} represent multiple regions in the reference genome. +In contrast, rGFA inherently forbids a collapsed graph and avoids the potential +issues above. This makes rGFA simpler than vg's path model and easier to work +with. + +To demonstrate practical applications of rGFA, we developed minigraph to +incrementally generate pangenome graphs. It can generate a graph from 20 +genomes in three hours and can scale to hundreds of genomes in future. A +limitation of minigraph is that it does not perform base alignment and may be +confused by similar paths in the graph. {\color{black} Unfortunately, base-level +sequence-to-graph alignment is not a fully solved problem. Partial-order graph +alignment~\cite{Lee_2002} and PaSGAL~\cite{DBLP:conf/ipps/JainMZDA19} only work +with directed acyclic graphs (DAGs). Vg~\cite{Garrison:2018aa} uses a heuristic +to unroll cycles but it is expotential in time in the worst case and for DAGs, +its exact mode is tens of times slower than PaSGAL. Antipov et +al~\cite{Antipov:2016aa} proved that alignment against cyclic graphs can be +done in polynomial time. GraphAligner~\cite{Rautiainen810812} implements a +fast quadratic algorithm for computing edit distance~\cite{Rautiainen_2019}. +However, edit distance based alignment disallows long INDELs and is often +inadequate for accurate variant calling. Jain et al~\cite{Jain_2020} recently +proposed a quadratic algorithm for alignment with affine gap penalty but the +authors focused on the theoretical analysis only. To the best of our knowledge, +no tools can efficiently perform sequence-to-graph alignment under affine gap +cost. We plan to learn from the existing algorithms and implement fast base +alignment in minigraph in future. This may take significant effort.} + +Another limitation of minigraph is +that it is unable to align sequences against a graph encoding all small variants. +Such a graph will be composed of millions of short segments. Not +indexing minimizers across segments, minigraph will fail to seed the initial +linear chains. This limitation can only be resolved by completely changing the +minigraph mapping algorithm. Nonetheless, small variants are easier to +analyze with the standard methods. Incorporating these variants unnecessarily +enlarges the graph, complicates implementations, increases the rate of false +mappings~\cite{Pritt_2018} and reduces the performance of common tasks. There +is also no known algorithm that can construct such a complex graph for hundreds +of human genomes. + +Minigraph does not keep track of the sample information as of now. To address +this issue, we are considering to implement colored rGFA, similar to colored de +Bruijn graphs~\cite{Iqbal:2012aa}. In a colored rGFA, a color represents one +sample. Each segment or link is associated with one or multiple colors, +indicating the sources of the segment or the link. Colors can be stored in an +rGFA tag or in a separate segment/link-by-sample binary +matrix~\cite{Holley695338}. The matrix representation may be more compact given +a large number of samples. + +We have shown minigraph can be a fast and powerful research tool to summarize +SVs at the population scale and to study the evolution of closely related +species. A more practical question is how a reference pangenome graph may +influence routine data analysis. Here is our limited view. + +We think a critical role a reference graph plays is that it extends the +coordinate system of a linear reference genome. This allows us to annotate +variations in highly diverse regions such as the human HLA and KIR regions. The +existing pipelines largely ignore these variations because most of them cannot +be encoded in the primary assembly of GRCh38. + +The extended graph coordinate system further helps to consistently represent +complex SVs. Given multiple samples, the current practice is to call SVs from +individual samples and then merge them. Two subtly different SVs, especially +long insertions, may be called at two distinct locations and treated as +separate events. With the minigraph procedure, the two SVs are likely to +be aligned together as long as they are similar to each other and are +sufficiently different from the reference allele. To some extent, minigraph is +performing multiple sequence alignment with partial order +alignment~\cite{Lee_2002}. This procedure is more robust to different +representations of the same SV than naive merging. When we refer to a SNP, we often use its +chromosomal coordinate such as ``chr1:12345''. We rarely do so for SVs because +their positions are sensitive to alignment and SV callers. The more consistent +SV representation implied by a pangenome graph will help to alleviate the issue +and subsequently facilitate the genotyping of +SVs~\cite{Hickey_2020,Eggertsson_2019,Chen_2019}. + +While we believe a reference pangenome graph will make complex variations more +accessible by geneticists and biologists, we suspect a great majority of +biomedical researchers will still rely on a linear reference genome due to the +conceptual simplicity of linear genomes and the mature tool chains developed in +decades. Many analyses such as SNP calling in well behaved regions do not +benefit much from a pangenome representation, either. Nonetheless, a pangenome +reference still helps applications based on linear references. With a graph +reference, we may blacklist regions enriched with SVs that lead to small variant +calling errors. We may potentially generate ``decoy'' sequences that are +missing from the primary assembly to attract falsely mapped reads away. We may +perform read alignment against a graph, project the alignment to the linear +coordinate and finish the rest of analyses in the linear space. We anticipate a +pangenome reference to supplement the linear reference, not to replace it. + +\section*{Conclusions} + +Complex human sequence variations are like genomic dark matter: they are +pervasive in our genomes but are often opaque to the assay with the existing +tools. We envision a pangenome graph reference will become an effective +means to the study of these complex variations. We proposed a data model (rGFA), +designed formats (rGFA and GAF) and developed companion tools (minigraph and +gfatools) to demonstrate the feasibility of our vision. Our work is still +preliminary but it is likely to set a starting point to the development of the +next-generation graph-based tools, which may ultimately help us to understand +our genomes better. + +\section*{Methods} + +\subsection*{The minigraph mapping algorithm} + +\subsubsection*{Seeding and linear chaining} +Similar to minimap2, minigraph uses minimizers on segments as seeds. It also +applies a similar chaining algorithm but with different scoring and with a new +heuristic to speed up chaining over long distances. For the completeness of +this article, we will describe part of the minimap2 chaining algorithm here. + +\paragraph*{Minimap2-like chaining} +Formally, an \emph{anchor} is a 3-tuple $(x,y,w)$, representing a closed +interval $[x-w+1,x]$ on a segment in the reference graph matching an interval +$[y-w+1,y]$ on the query. Given a list of anchors sorted by $x$, let $f(i)$ be +the maximal chaining score up to the $i$-th anchor in the list. $f(i)$ can be +computed by: +\begin{equation}\label{eq:dp} +f(i)=\max\big\{\max_{i>j\ge1}\{f(j)+\alpha(j,i)-\beta(j,i)\},w_i\big\} +\end{equation} +where $\alpha(j,i)=\min\big\{\min\{y_i-y_j,x_i-x_j\},w_i\big\}$ is +the number of matching bases between anchor $i$ and $j$. +$\beta(j,i)$ is the gap penalty. Let $g_{ji}=|(y_i-y_j)-(x_i-x_j)|$ +be the gap length and $d_{ji}=\min\{y_i-y_j,x_i-x_j\}$ be the smaller distance +between the two anchors. Minigraph uses the following gap cost: +$$ +\beta(j,i)=\left\{\begin{array}{ll} +\infty & (g_{ji}>G) \\ +c_1\cdot g_{ji} + c_2\cdot d_{ji} + \log_2{g_{ji}} & (0j\ge1$}\\ \text{$x_i-G\le x_j\le x_i-w_i$}\\ \text{$y_i-G\le y_j\le y_i-w_i$}}}\big\{f'(j)+w_j-\beta'(j,i)\big\} +\end{equation} +We can find the optimal $f'(i)$ in $O(n\log n)$ time with +RMQ~\cite{DBLP:conf/wabi/AbouelhodaO03,Otto:2011aa}. To see that, define +$$h'(j)=f'(j)+w_j+c_1(y_j+x_j)$$ +The following condition +$$f'(j)+w_j-\beta'(j,i)>f'(k)+w_k-\beta'(k,i)$$ +is equivalent to $h'(j)>h'(k)$, independent of $i$. If we maintain ${\rm +RMQ}_i$ as the binary tree that keeps $\{(y_j,-h'(j)):j3 alleles' ls 2 + +set out "CHM13-f1-90.bb.anno.len.eps" + +set ylab "Sum of length on reference (Mbp)" off +0.0,0 +set key top left +plot \ + "3 alleles' ls 2 diff --git a/tex/plots/CHM13-f1-90.bb.anno.tbl b/tex/plots/CHM13-f1-90.bb.anno.tbl new file mode 100644 index 0000000..a718520 --- /dev/null +++ b/tex/plots/CHM13-f1-90.bb.anno.tbl @@ -0,0 +1,13 @@ +01_Alu Alu 14298 221 89 4354652 126178 114295 +02_L1 L1 3947 143 96 7536426 703699 642792 +03_SVA SVA 1021 399 704 1240723 328180 995325 +04_ERV ERV 1656 115 127 1009356 204523 1036717 +05_Mixed-MEI Mixed-MEI 2088 251 137 4625357 1690752 2230499 +10_Satellite Satellite 3619 747 1109 4148105 3066920 45077097 +11_VNTR VNTR 5852 4010 8866 737321 838607 11642589 +12_STR STR 4449 2178 910 392351 252638 879580 +13_Other-LCR Other-LCR 3882 791 682 388838 159218 2049820 +20_Mixed-repeat Mixed-repeat 848 185 336 2145808 1171839 7487870 +21_Partial-repeat Partial-repeat 5240 613 760 13853718 6197517 26833676 +30_Non-rep-uniq Non-rep-uniq 9175 501 130 788608 73912 43045 +31_Non-rep-dup Non-rep-dup 878 177 333 441803 74941 1231962 diff --git a/tex/plots/CHM13-f1-90.bb.mini-inter-none.win.gp b/tex/plots/CHM13-f1-90.bb.mini-inter-none.win.gp new file mode 100644 index 0000000..c1ff8e2 --- /dev/null +++ b/tex/plots/CHM13-f1-90.bb.mini-inter-none.win.gp @@ -0,0 +1,269 @@ +set t po eps co so enh "Helvetica,18" +set out "chr-plot.eps" +set size 2,1.52 +set multiplot layout 23,1 +set lmargin screen 0.095 +set border 0; unset xtics; unset ytics; set bmargin 0; set tmargin 0.02; set rmargin 0.02 +set style line 1 lc rgb "#377eb8" lw 1 +set style line 2 lc rgb "#e41a1c" lw 1 +set style line 3 lc rgb "#4daf4a" lw 1 +set yran [0:164] + +set style fill solid 0.8 + +set origin 0,1.4447826086956521 +set xran [0:248.387497] +set size 2,0.06521739130434782 +set style rect fc lt -1 fs solid 0.15 noborder +unset obj; unset label +set obj rect from 116.796216, graph 0 to 147.241828, graph 1 +set label "chr1" at screen 0.01, graph 0.5 +set key at screen 1.95,1.32 +plot \ + " CHM13-f1-90.bb.mini-win +gzip -dc CHM13-f1-90.bb.anno.gz | awk '$12~/inter|SINE|LINE|SVA|DNA|ERV/' | ./bedutils.js window -l CHM13v1.size -w500000 -s100000 -c /dev/stdin > CHM13-f1-90.bb.inter-win +gzip -dc CHM13-f1-90.bb.anno.gz | awk '$12~/none|partial|self/' | ./bedutils.js window -l CHM13v1.size -w500000 -s100000 -c /dev/stdin > CHM13-f1-90.bb.none-win + +paste CHM13-f1-90.bb.mini-win CHM13-f1-90.bb.inter-win CHM13-f1-90.bb.none-win | awk '$1~/^chr([0-9]+|X)$/' | cut -f1-3,6,9 > CHM13-f1-90.bb.mini-inter-none.win + +./chr-plot.js -n3 CHM13v1.cen.bed CHM13-f1-90.bb.mini-inter-none.win|gnuplot diff --git a/tex/plots/CHM13v1.cen.bed b/tex/plots/CHM13v1.cen.bed new file mode 100644 index 0000000..1147173 --- /dev/null +++ b/tex/plots/CHM13v1.cen.bed @@ -0,0 +1,23 @@ +chr1 116796216 147241828 248387497 +chr2 85991672 99673016 242696747 +chr3 85805192 101415517 201106605 +chr4 44705247 59870604 193575430 +chr5 42077197 54596619 182045437 +chr6 53286920 66058622 172126870 +chr7 55414368 68714496 160567423 +chr8 39243541 51325076 146259322 +chr9 39952789 81694033 150617274 +chr10 34633784 46664580 134758122 +chr11 46061948 59413485 135127772 +chr12 29620490 42202482 133324781 +chr13 0 23171058 114240146 +chr14 0 17765925 101219177 +chr15 0 23279251 100338308 +chr16 30848291 57219476 96330493 +chr17 18892710 32487230 84277185 +chr18 10965698 25933550 80542536 +chr19 19655572 34768168 61707359 +chr20 21383653 37969531 66210247 +chr21 0 17078862 45827691 +chr22 0 20739833 51353906 +chrX 52820107 65927026 154259625 diff --git a/tex/plots/CHM13v1.size b/tex/plots/CHM13v1.size new file mode 100644 index 0000000..a736083 --- /dev/null +++ b/tex/plots/CHM13v1.size @@ -0,0 +1,23 @@ +chr1 248387497 +chr2 242696747 +chr3 201106605 +chr4 193575430 +chr5 182045437 +chr6 172126870 +chr7 160567423 +chr8 146259322 +chr9 150617274 +chr10 134758122 +chr11 135127772 +chr12 133324781 +chr13 114240146 +chr14 101219177 +chr15 100338308 +chr16 96330493 +chr17 84277185 +chr18 80542536 +chr19 61707359 +chr20 66210247 +chr21 45827691 +chr22 51353906 +chrX 154259625 diff --git a/tex/plots/anno2tbl.js b/tex/plots/anno2tbl.js new file mode 100755 index 0000000..bbb6a85 --- /dev/null +++ b/tex/plots/anno2tbl.js @@ -0,0 +1,40 @@ +#!/usr/bin/env k8 + +var buf = new Bytes(); +var file = arguments.length == 0? new File() : new File(arguments[0]); + +var h = {}; +while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + for (var i = 1; i <= 7; ++i) t[i] = parseInt(t[i]); + if (t[5]) continue; + if (t[11] == "gap") continue; + if (/chrUn|_random/.test(t[0])) continue; + var na = t[4] < 4? t[4] : 4; + var type = null; + if (t[11] == "mini") type = "11_VNTR"; + else if (t[11] == "micro") type = "12_STR"; + else if (t[11] == "micro" || t[11] == "lcr") type = "13_Other-LCR"; + else if (t[11] == "LINE/L1") type = "02_L1"; + else if (t[11] == "SINE/Alu") type = "01_Alu"; + else if (t[11] == "Retroposon/SVA") type = "03_SVA"; + else if (t[11] == "LTR/ERV") type = "04_ERV"; + else if (t[11] == "inter" || /^(DNA|LINE|SINE|LTR)/.test(t[11])) type = "05_Mixed-MEI"; + else if (/^Satellite/.test(t[11]) || t[11] == "alpha" || t[11] == "hsat2/3") type = "10_Satellite"; + else if (t[11] == "self") type = "31_Non-rep-dup"; + else if (t[11] == "none") type = "30_Non-rep-uniq"; + else if (t[11] == "mixed") type = "20_Mixed-repeat"; + else type = "21_Partial-repeat"; + var key = type; + if (h[key] == null) h[key] = [0, null, 0, 0, 0, 0, 0, 0]; + ++h[key][na]; + h[key][na+3] += t[7]; +} + +file.close(); +buf.destroy(); + +for (var key in h) { + var label = key.replace(/^[0-9]+_/, ""); + print(key, label, h[key][2], h[key][3], h[key][4], h[key][5], h[key][6], h[key][7]); +} diff --git a/tex/plots/bedutils.js b/tex/plots/bedutils.js new file mode 100755 index 0000000..cc055c7 --- /dev/null +++ b/tex/plots/bedutils.js @@ -0,0 +1,367 @@ +#!/usr/bin/env k8 + +/***************************** + ***** Library functions ***** + *****************************/ + +/******************************* + * Command line option parsing * + *******************************/ + +var getopt = function(args, ostr) { + var oli; // option letter list index + if (typeof(getopt.place) == 'undefined') + getopt.ind = 0, getopt.arg = null, getopt.place = -1; + if (getopt.place == -1) { // update scanning pointer + if (getopt.ind >= args.length || args[getopt.ind].charAt(getopt.place = 0) != '-') { + getopt.place = -1; + return null; + } + if (getopt.place + 1 < args[getopt.ind].length && args[getopt.ind].charAt(++getopt.place) == '-') { // found "--" + ++getopt.ind; + getopt.place = -1; + return null; + } + } + var optopt = args[getopt.ind].charAt(getopt.place++); // character checked for validity + if (optopt == ':' || (oli = ostr.indexOf(optopt)) < 0) { + if (optopt == '-') return null; // if the user didn't specify '-' as an option, assume it means null. + if (getopt.place < 0) ++getopt.ind; + return '?'; + } + if (oli+1 >= ostr.length || ostr.charAt(++oli) != ':') { // don't need argument + getopt.arg = null; + if (getopt.place < 0 || getopt.place >= args[getopt.ind].length) ++getopt.ind, getopt.place = -1; + } else { // need an argument + if (getopt.place >= 0 && getopt.place < args[getopt.ind].length) + getopt.arg = args[getopt.ind].substr(getopt.place); + else if (args.length <= ++getopt.ind) { // no arg + getopt.place = -1; + if (ostr.length > 0 && ostr.charAt(0) == ':') return ':'; + return '?'; + } else getopt.arg = args[getopt.ind]; // white space + getopt.place = -1; + ++getopt.ind; + } + return optopt; +} + +/*************** + * BED overlap * + ***************/ + +function it_index(a) { + if (a.length == 0) return -1; + a.sort(function(x, y) { return x[0] - y[0] }); + var last, last_i; + for (var i = 0; i < a.length; i += 2) last = a[i][2] = a[i][1], last_i = i; + for (var k = 1; 1<>k&1? last_i - (1<<(k-1)) : last_i + (1<<(k-1)); + if (last_i < a.length) last = last > a[last_i][2]? last : a[last_i][2]; + } + return k - 1; +} + +function it_overlap(a, st, en) { + var h, stack = [], b = []; + for (h = 0; 1<> h << h, i1 = i0 + (1<<(h+1)) - 1; + if (i1 >= a.length) i1 = a.length; + for (var i = i0; i < i1; ++i) + if (a[i][0] < en && st < a[i][1]) + b.push(i); + } else if (w == 0) { // if left child not processed + stack.push([x, h, 1]); + var y = x - (1<<(h-1)); + if (y >= a.length || a[y][2] > st) + stack.push([y, h - 1, 0]); + } else if (x < a.length && a[x][0] < en) { + if (st < a[x][1]) b.push(x); + stack.push([x + (1<<(h-1)), h - 1, 0]); + } + } + return b; +} + +/****************************** + ***** Command-line tools ***** + ******************************/ + +function bed_sum(args) +{ + var buf = new Bytes(); + var file = args.length == 0 || args[0] == '-'? new File() : new File(args[0]); + var s = 0; + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t", 3); + if (t.length < 3) continue; + s += parseInt(t[2]) - parseInt(t[1]); + } + file.close(); + buf.destroy(); + print(s); + return 0; +} + +function bed_sum2nd(args) +{ + var buf = new Bytes(); + var file = args.length == 0 || args[0] == '-'? new File() : new File(args[0]); + var s = 0; + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t", 2); + s += parseInt(t[1]); + } + file.close(); + buf.destroy(); + print(s); + return 0; +} + +function bed_merge(args) +{ + var buf = new Bytes(); + var file = args.length > 0? new File(args[0]) : new File(); + var ctg = null, st, en; + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t", 3); + var s = parseInt(t[1]); + var e = parseInt(t[2]); + if (ctg != t[0] || s > en) { // no overlap + if (ctg != null) print(ctg, st, en); + ctg = t[0], st = s, en = e; + } else if (s < st) throw Error("ERROR: input is not sorted by coordinate"); + else en = en > e? en : e; + } + if (ctg != null) print(ctg, st, en); + file.close(); + buf.destroy(); + return 0; +} + +function bed_sum1(args) +{ + var buf = new Bytes(); + var file = args.length == 0 || args[0] == '-'? new File() : new File(args[0]); + var ctg = null, st = 0, en = 0, sum = 0; + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t", 3); + var s = parseInt(t[1]); + var e = parseInt(t[2]); + if (ctg != t[0] || s > en) { // no overlap + sum += en - st; + if (ctg != null && ctg != t[0]) { + print(ctg, sum); + sum = 0; + } + ctg = t[0], st = s, en = e; + } else if (s < st) throw Error("ERROR: input is not sorted by coordinate"); + else en = en > e? en : e; + } + if (ctg != null) { + sum += en - st; + print(ctg, sum); + } + file.close(); + buf.destroy(); + return 0; +} + +function bed_gdist(args) +{ + if (args.length == 0) { + print("Usage: bedutils.js gdist <3-col-gmap.txt> "); + exit(1); + } + var file, buf = new Bytes(); + + var gmap = {}; + file = new File(args[0]); + var last_pos = 0, last_ctg = null, last_v = 0.0; + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + var pos = parseInt(t[1]); + var v = parseFloat(t[2]); + if (last_ctg != t[0] && last_ctg != null) { + gmap[last_ctg].push([last_pos, 0x7fffffff, -1, last_v]); + last_pos = 0, last_v = 0.0; + } + if (gmap[t[0]] == null) gmap[t[0]] = []; + if (last_pos == pos) throw Error("Zero-length interval"); + gmap[t[0]].push([last_pos, pos, -1, last_v]); + last_pos = pos, last_ctg = t[0], last_v = v; + } + if (last_ctg != null) + gmap[last_ctg].push([last_pos, 0x7fffffff, -1, last_v]); + file.close(); + + for (var ctg in gmap) it_index(gmap[ctg]); + + file = args.length >= 2? new File(args[1]) : new File(); + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + var st = parseInt(t[1]), en = parseInt(t[2]); + var v, g = gmap[t[0]]; + if (g == null) v = -1; + else if (st == en) v = 0; + else { + var as = it_overlap(g, st, st + 1); + var ae = it_overlap(g, en - 1, en); + if (as.length != 1 || ae.length != 1) + throw Error("Bug!"); + var is = as[0], ie = ae[0]; + var xs = g[is][3] + (is == g.length - 1? 0 : (g[is+1][3] - g[is][3]) / (g[is][1] - g[is][0]) * (st - g[is][0])); + var xe = g[ie][3] + (ie == g.length - 1? 0 : (g[ie+1][3] - g[ie][3]) / (g[ie][1] - g[ie][0]) * (en - g[ie][0])); + v = 1e6 * (xe - xs) / (en - st); + } + v = v <= 0? v : v.toFixed(15); + print(t[0], t[1], t[2], v); + } + file.close(); + buf.destroy(); +} + +function bed_window(args) +{ + var c, win_size = 1000000, skip = 500000, cnt_only = false, fn_len = null; + while ((c = getopt(args, "w:s:cl:")) != null) { + if (c == 'w') win_size = parseInt(getopt.arg); + else if (c == 's') skip = parseInt(getopt.arg); + else if (c == 'c') cnt_only = true; + else if (c == 'l') fn_len = getopt.arg; + } + + var lens = {}, file, buf = new Bytes(); + if (fn_len) { + file = new File(fn_len); + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + if (t.length < 2) continue; + lens[t[0]] = parseInt(t[1]); + } + file.close(); + } + file = getopt.ind < args.length? new File(args[getopt.ind]) : new File(); + var bed = {}, ctgs = []; + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + if (bed[t[0]] == null) { bed[t[0]] = []; ctgs.push(t[0]); } + bed[t[0]].push([parseInt(t[1]), parseInt(t[2]), -1]); + } + file.close(); + buf.destroy(); + + for (var ct = 0; ct < ctgs.length; ++ct) { + var ctg = ctgs[ct]; + it_index(bed[ctg]); + var a = bed[ctg]; + var max = 0; + for (var i = 0; i < a.length; ++i) + max = max > a[i][1]? max : a[i][1]; + if (lens[ctg] > 0 && max < lens[ctg]) max = lens[ctg]; + for (var x = 0; x < max; x += skip) { + var st = x - (win_size>>1), en = x + (win_size>>1); + if (st < 0) st = 0; + if (en > max) en = max; + var sum = 0, b = it_overlap(a, st, en); + if (cnt_only) { + sum = b.length; + } else { + for (var i = 0; i < b.length; ++i) { + var c = a[b[i]]; + var s = st > c[0]? st : c[0]; + var e = en < c[1]? en : c[1]; + sum += e - s; + } + } + print(ctg, x, sum/(en-st)*1e6); + } + } +} + +function bed_cov(args) +{ + if (args.length < 2) { + warn("Usage: bedutils.js cov "); + exit(1); + } + var file, buf = new Bytes(); + + file = new File(args[0]); + var bed = {}; + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t", 3); + if (bed[t[0]] == null) bed[t[0]] = []; + bed[t[0]].push([parseInt(t[1]), parseInt(t[2])]); + } + for (var ctg in bed) it_index(bed[ctg]); + file.close(); + + file = new File(args[1]); + while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t", 3); + if (bed[t[0]] == null) { + print(t[0], t[1], t[2], 0, 0); + } else { + var st0 = parseInt(t[1]), en0 = parseInt(t[2]); + var b = bed[t[0]]; + var a = it_overlap(b, st0, en0); + var cov_st = 0, cov_en = 0, cov = 0; + for (var i = 0; i < a.length; ++i) { + var st1 = b[a[i]][0] > st0? b[a[i]][0] : st0; + var en1 = b[a[i]][1] < en0? b[a[i]][1] : en0; + if (st1 > cov_en) { + cov += cov_en - cov_st; + cov_st = st1, cov_en = en1; + } else cov_en = cov_en > en1? cov_en : en1; + } + cov += cov_en - cov_st; + print(t[0], t[1], t[2], a.length, cov); + } + } + file.close(); + + buf.destroy(); +} + +function main(args) +{ + if (args.length == 0) { + print("Usage: bedutils.js [arguments]"); + print("Commands:"); + print(" sum sum of BED regions (deprecated by bedtk)"); + print(" sum1 sum of BED regions for each contig"); + print(" sum2nd sum of the 2nd column"); + print(" merge merge overlapping regions in *sorted* BED (deprecated)"); + print(" cov breadth of coverage (deprecated by bedtk)"); + print(" gdist genetic distance from 3-col genetic map"); + print(" window window-based counting"); + exit(1); + } + + var cmd = args.shift(); + if (cmd == 'sum') bed_sum(args); + else if (cmd == 'sum2nd') bed_sum2nd(args); + else if (cmd == 'sum1') bed_sum1(args); + else if (cmd == 'merge') bed_merge(args); + else if (cmd == 'cov') bed_cov(args); + else if (cmd == 'gdist') bed_gdist(args); + else if (cmd == 'window') bed_window(args); + else throw Error("unrecognized command: " + cmd); +} + +main(arguments); diff --git a/tex/plots/chr-plot.js b/tex/plots/chr-plot.js new file mode 100755 index 0000000..cee19b8 --- /dev/null +++ b/tex/plots/chr-plot.js @@ -0,0 +1,130 @@ +#!/usr/bin/env k8 + +var getopt = function(args, ostr) { + var oli; // option letter list index + if (typeof(getopt.place) == 'undefined') + getopt.ind = 0, getopt.arg = null, getopt.place = -1; + if (getopt.place == -1) { // update scanning pointer + if (getopt.ind >= args.length || args[getopt.ind].charAt(getopt.place = 0) != '-') { + getopt.place = -1; + return null; + } + if (getopt.place + 1 < args[getopt.ind].length && args[getopt.ind].charAt(++getopt.place) == '-') { // found "--" + ++getopt.ind; + getopt.place = -1; + return null; + } + } + var optopt = args[getopt.ind].charAt(getopt.place++); // character checked for validity + if (optopt == ':' || (oli = ostr.indexOf(optopt)) < 0) { + if (optopt == '-') return null; // if the user didn't specify '-' as an option, assume it means null. + if (getopt.place < 0) ++getopt.ind; + return '?'; + } + if (oli+1 >= ostr.length || ostr.charAt(++oli) != ':') { // don't need argument + getopt.arg = null; + if (getopt.place < 0 || getopt.place >= args[getopt.ind].length) ++getopt.ind, getopt.place = -1; + } else { // need an argument + if (getopt.place >= 0 && getopt.place < args[getopt.ind].length) + getopt.arg = args[getopt.ind].substr(getopt.place); + else if (args.length <= ++getopt.ind) { // no arg + getopt.place = -1; + if (ostr.length > 0 && ostr.charAt(0) == ':') return ':'; + return '?'; + } else getopt.arg = args[getopt.ind]; // white space + getopt.place = -1; + ++getopt.ind; + } + return optopt; +} + +var c, width = 2, height = 1.5, y_max0 = null, y_min0 = null, fn_out = "chr-plot.eps", n = 1, fsize = 14; +while ((c = getopt(arguments, "w:h:x:i:o:n:f:")) != null) { + if (c == 'h') height = parseFloat(getopt.arg); + else if (c == 'n') n = parseInt(getopt.arg); + else if (c == 'w') width = parseFloat(getopt.arg); + else if (c == 'x') y_max0 = parseFloat(getopt.arg); + else if (c == 'i') y_min0 = parseFloat(getopt.arg); + else if (c == 'o') fn_out = getopt.arg; + else if (c == 'f') fsize = parseInt(getopt.arg); +} + +if (arguments.length - getopt.ind < 2) { + print("Usage: chr-plot.js [options] "); + print("Options:"); + print(" -n INT number of data points [" + n + "]"); + print(" -w FLOAT width of the plot [" + width + "]"); + print(" -h FLOAT height of the plot [" + height + "]"); + print(" -x FLOAT max y value [auto]"); + print(" -i FLOAT min y value [auto]"); + print(" -o FILE output file name [chr-plot.eps]"); + exit(1); +} + +var file, buf = new Bytes(); + +var chr_list = [], chr = {}, cen = [], max_len = 0; +file = new File(arguments[getopt.ind]); +while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + var len = parseInt(t[3]); + chr_list.push(t[0]); + cen.push([parseInt(t[1]), parseInt(t[2])]); + chr[t[0]] = len; + max_len = max_len > len? max_len : len; +} +file.close(); + +var y_max = -1e300, y_min = 1e300; +file = new File(arguments[getopt.ind+1]); +while (file.readline(buf) >= 0) { + var t = buf.toString().split("\t"); + if (chr[t[0]] == null) continue; + var y = parseFloat(t[2]); + y_max = y_max > y? y_max : y; + y_min = y_min < y? y_min : y; +} +file.close(); +if (y_max0 != null) y_max = y_max0; +if (y_min0 != null) y_min = y_min0; + +buf.destroy(); + +print('set t po eps co so enh "Helvetica,' + fsize + '"'); +print('set out "' + fn_out + '"'); +print('set size ' + width + ',' + (height + 0.02)); +print('set multiplot layout ' + chr_list.length + ',1'); +print('set lmargin screen ' + (fsize/2 * 0.01 + 0.005).toFixed(3)); +print('set border 0; unset xtics; unset ytics; set bmargin 0; set tmargin 0.02; set rmargin 0.02'); +print('set style line 1 lc rgb "#377eb8" lw 1'); +print('set style line 2 lc rgb "#e41a1c" lw 1'); +print('set style line 3 lc rgb "#4daf4a" lw 1'); +//print('set style fill transparent solid 0.5 noborder'); +print('set yran [' + y_min + ':' + y_max + ']'); +print(''); +var h = height / chr_list.length; +for (var i = 0; i < chr_list.length; ++i) { + var len = chr[chr_list[i]]; + print('set origin 0,' + (height - (i + 1) * h + 0.01)); + print('set xran [0:' + len * 1e-6 + ']'); + print('set size ' + (width*len/max_len) + ',' + h); + print('set style rect fc lt -1 fs solid 0.15 noborder'); + print('unset obj; unset label'); + print('set obj rect from ' + cen[i][0]*1e-6 + ', graph 0 to ' + cen[i][1]*1e-6 + ', graph 1'); + print('set label "' + chr_list[i] + '" at screen 0.01, graph 0.5'); + print('plot \\'); + for (var j = 0; j < n; ++j) { + var st, en, endl = j == n - 1? '' : ', \\'; + if (j > 0) { + st = en = ''; + for (var k = 0; k < j; ++k) { + st += '+$' + (k + 3); + en += '+$' + (k + 3); + } + en += '+$' + (j + 3); + st = st.replace(/^\+/, "(") + ")"; + en = en.replace(/^\+/, "(") + ")"; + } else st = '(0)', en = '($3)'; + print(' " $@ + +$(prefix).gap:$(prefix).fa + seqtk gap $< > $@ + +$(prefix).brnn.gz:$(prefix).fa + ~/dna-nn/dna-brnn -Ai ~/dna-nn/attcc-alpha.knm -t16 $< | htsbox bgzip > $@ + +$(prefix).etrf.gz:$(prefix).fa + ~/src/etrf/etrf $< | htsbox bgzip > $@ + +$(prefix).sdust.gz:$(prefix).fa + ~/minimap2/sdust $< | htsbox bgzip > $@ + +CHM13-f1-90.bb.paf.gz:CHM13-f1-90.bb.fa + minimap2 -cxasm20 -r2k --cs -t16 ~/ref/CHM13v1Y.fa $< 2> CHM13-f1-90.bb.paf.log | gzip > $@ + +GRCh38-f1-90.bb.paf.gz:GRCh38-f1-90.bb.fa + minimap2 -cxasm20 -r2k --cs -t16 ~/ref/hs38.fa $< 2> GRCh38-f1-90.bb.paf.log | gzip > $@