diff --git a/.github/workflows/linux-CI.yml b/.github/workflows/linux-CI.yml
new file mode 100644
index 0000000..ba2255a
--- /dev/null
+++ b/.github/workflows/linux-CI.yml
@@ -0,0 +1,28 @@
+name: linux
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+
+jobs:
+  build-linux:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Preparing Repositories
+      uses: actions/checkout@v3
+      with:
+        submodules: recursive
+
+    - name: make
+      run: make
+
+    - name: Run Test Data
+      run: | 
+         ./bin/abpoa ./test_data/test.fa
+         ./bin/abpoa ./test_data/heter.fa -d2
+         ./bin/abpoa ./test_data/seq.fa -a1
+         ./bin/abpoa ./test_data/seq.fa -r5 -a1
+         ./bin/abpoa ./test_data/heter.fq -d2 -Q
+         ./bin/abpoa ./test_data/heter.fq -d2 -r2 -Q -a1
diff --git a/.github/workflows/macos-CI.yml b/.github/workflows/macos-CI.yml
new file mode 100644
index 0000000..c28b431
--- /dev/null
+++ b/.github/workflows/macos-CI.yml
@@ -0,0 +1,28 @@
+name: macos
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+
+jobs:
+  build-macos:
+    runs-on: macos-latest
+    steps:
+    - name: Preparing Repositories
+      uses: actions/checkout@v3
+      with:
+        submodules: recursive
+
+    - name: make
+      run: make
+
+    - name: Run Test Data
+      run: | 
+         ./bin/abpoa ./test_data/test.fa
+         ./bin/abpoa ./test_data/heter.fa -d2
+         ./bin/abpoa ./test_data/seq.fa -a1
+         ./bin/abpoa ./test_data/seq.fa -r5 -a1
+         ./bin/abpoa ./test_data/heter.fq -d2 -Q
+         ./bin/abpoa ./test_data/heter.fq -d2 -r2 -Q -a1
\ No newline at end of file
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
new file mode 100644
index 0000000..46cb499
--- /dev/null
+++ b/.github/workflows/wheels.yml
@@ -0,0 +1,82 @@
+name: Build and upload to PyPI
+
+# Build on every branch push, tag push, and pull request change:
+on: [push, pull_request]
+# Alternatively, to publish when a (published) GitHub Release is created, use the following:
+# on:
+#   push:
+#   pull_request:
+#   release:
+#     types:
+#       - published
+env:
+  CIBW_BUILD_VERBOSITY: 3
+  #CIBW_TEST_COMMAND: python -m unittest discover {project}/tests
+  # Disable building PyPy wheels on all platforms
+  CIBW_SKIP: pp*
+
+jobs:
+  build_wheels:
+    name: Build wheels for ${{ matrix.python }}-${{ matrix.buildplat[1] }}
+    runs-on: ${{ matrix.buildplat[0] }}
+    strategy:
+      # Ensure that a wheel builder finishes even if another fails
+      fail-fast: false
+      matrix:
+        buildplat:
+        - [ubuntu-latest, manylinux_x86_64, auto]
+        - [macos-latest, macosx_x86_64, x86_64]
+        # skip these for now, need more work
+        - [macos-latest, macosx_arm64, arm64]
+        # python: ["cp38", "cp39", "cp310", "cp311", "cp312"]
+        python: ["cp312"]
+
+    steps:
+      - uses: actions/checkout@v3
+        with:
+            submodules: 'true'
+
+      - name: Build wheels
+        uses: pypa/cibuildwheel@v2.15.0
+        env:
+          CIBW_ARCHS: ${{ matrix.buildplat[2] }}
+          CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }}
+
+      - uses: actions/upload-artifact@v3
+        with:
+          path: ./wheelhouse/*.whl
+
+  build_sdist:
+    name: Build source distribution
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+            submodules: 'true'
+
+      - name: Build sdist
+        run: pipx run build --sdist
+
+      - uses: actions/upload-artifact@v3
+        with:
+          path: dist/*.tar.gz
+
+  upload_pypi:
+    needs: [build_wheels, build_sdist]
+    runs-on: ubuntu-latest
+    # upload to PyPI on every tag starting with 'v'
+    if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
+    # alternatively, to publish when a GitHub Release is created, use the following rule:
+    # if: github.event_name == 'release' && github.event.action == 'published'
+    steps:
+      - uses: actions/download-artifact@v3
+        with:
+          # unpacks default artifact into dist/
+          # if `name: artifact` is omitted, the action will create extra parent dir
+          name: artifact
+          path: dist
+
+      - uses: pypa/gh-action-pypi-publish@v1.5.0
+        with:
+          user: __token__
+          password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/Makefile b/Makefile
index 0b6df3f..208570a 100644
--- a/Makefile
+++ b/Makefile
@@ -4,20 +4,27 @@ default: abpoa
 #CC          = gcc
 OS          := $(shell uname)
 ARCH        := $(shell arch)
-EXTRA_FLAGS = -Wno-unused-function -Wno-misleading-indentation -DUSE_SIMDE -DSIMDE_ENABLE_NATIVE_ALIASES
-CFLAGS      = -Wall -O3 $(EXTRA_FLAGS)
+# add -fno-tree-vectorize to avoid certain vectorization errors in O3 optimization
+# right now, we are using -O3 for the best performance, and no vectorization errors were found
+EXTRA_FLAGS = -Wall -Wno-unused-function -Wno-misleading-indentation -DUSE_SIMDE -DSIMDE_ENABLE_NATIVE_ALIASES# -fno-tree-vectorize
 
 # for debug
 ifneq ($(debug),)
-	DFLAGS   =   -D __DEBUG__
+	EXTRA_FLAGS  += -D __DEBUG__
 endif
+ifneq ($(sdebug),)
+	EXTRA_FLAGS  += -D __SIMD_DEBUG__
+endif
+
 # for gdb
 ifneq ($(gdb),)
-	CFLAGS   = -Wall -g ${DFLAGS} $(EXTRA_FLAGS)
+	OPT_FLAGS = -g
 else
-	CFLAGS   = -Wall -O3 ${DFLAGS} $(EXTRA_FLAGS)
+	OPT_FLAGS = -O3
 endif
 
+CFLAGS        = $(OPT_FLAGS) $(EXTRA_FLAGS)
+
 # for gprof
 ifneq ($(pg),)
 	PG_FLAG  =   -pg
diff --git a/README.md b/README.md
index 258025f..7131b63 100644
--- a/README.md
+++ b/README.md
@@ -5,18 +5,19 @@
 [![PyPI](https://img.shields.io/pypi/dm/pyabpoa.svg?label=pip%20install)](https://pypi.python.org/pypi/pyabpoa)
 [![Published in Bioinformatics](https://img.shields.io/badge/Published%20in-Bioinformatics-blue.svg)](https://dx.doi.org/10.1093/bioinformatics/btaa963)
 [![GitHub Issues](https://img.shields.io/github/issues/yangao07/abPOA.svg?label=Issues)](https://github.com/yangao07/abPOA/issues)
-[![Build Status](https://img.shields.io/travis/yangao07/abPOA/main.svg?label=Main)](https://travis-ci.org/yangao07/abPOA)
+[![C/C++ CI](https://github.com/yangao07/abPOA/actions/workflows/linux-CI.yml/badge.svg)](https://github.com/yangao07/abPOA/actions/workflows/linux-CI.yml)
+[![C/C++ CI](https://github.com/yangao07/abPOA/actions/workflows/macos-CI.yml/badge.svg)](https://github.com/yangao07/abPOA/actions/workflows/macos-CI.yml)
 [![License](https://img.shields.io/badge/License-MIT-black.svg)](https://github.com/yangao07/abPOA/blob/main/LICENSE)
-<!-- [![PyPI](https://img.shields.io/pypi/v/pyabpoa.svg?style=flat)](https://pypi.python.org/pypi/pyabpoa) -->
-## Updates (v1.5.1)
+## Updates (v1.5.2)
 
-- Fix a memory allocation bug caused by int overflow when memery is large (>4G)
+- Fix a backtrack bug in seeding mode
+- Add consensus mode: most frequent base at each pos
 
 ## Getting started
 Download the [latest release](https://github.com/yangao07/abPOA/releases):
 ```
-wget https://github.com/yangao07/abPOA/releases/download/v1.5.1/abPOA-v1.5.1.tar.gz
-tar -zxvf abPOA-v1.5.1.tar.gz && cd abPOA-v1.5.1
+wget https://github.com/yangao07/abPOA/releases/download/v1.5.2/abPOA-v1.5.2.tar.gz
+tar -zxvf abPOA-v1.5.2.tar.gz && cd abPOA-v1.5.2
 ```
 Make from source and run with test data:
 ```
@@ -34,7 +35,7 @@ abpoa ./test_data/seq.fa > cons.fa
 ## Table of Contents
 
 - [abPOA: adaptive banded Partial Order Alignment](#abpoa-adaptive-banded-partial-order-alignment)
-  - [Updates (v1.5.1)](#updates-v151)
+  - [Updates (v1.5.2)](#updates-v152)
   - [Getting started](#getting-started)
   - [Table of Contents](#table-of-contents)
   - [Introduction](#introduction)
@@ -43,12 +44,13 @@ abpoa ./test_data/seq.fa > cons.fa
     - [Building abPOA from source files](#building-abpoa-from-source-files)
     - [Pre-built binary executable file for Linux/Unix or MacOS](#pre-built-binary-executable-file-for-linuxunix-or-macos)
   - [General usage](#general-usage)
-    - [To generate consensus sequence](#to-generate-consensus-sequence)
-    - [To generate multiple consensus sequences](#to-generate-multiple-consensus-sequences)
-    - [To generate row-column multiple sequence alignment in FASTA format](#to-generate-row-column-multiple-sequence-alignment-in-fasta-format)
-    - [To generate graph information in GFA format](#to-generate-graph-information-in-gfa-format)
-    - [To align sequence to an existing graph in GFA/MSA format](#to-align-sequence-to-an-existing-graph-in-gfamsa-format)
-    - [To generate a plot of the alignment graph](#to-generate-a-plot-of-the-alignment-graph)
+    - [Generate a consensus sequence](#generate-a-consensus-sequence)
+    - [Generate multiple consensus sequences](#generate-multiple-consensus-sequences)
+    - [Generate row-column multiple sequence alignment in FASTA format](#generate-row-column-multiple-sequence-alignment-in-fasta-format)
+    - [Generate graph information in GFA format](#generate-graph-information-in-gfa-format)
+    - [Align sequence to an existing graph in GFA/MSA format](#align-sequence-to-an-existing-graph-in-gfamsa-format)
+    - [Generate a consensus sequence for amino acid sequences](#generate-a-consensus-sequence-for-amino-acid-sequences)
+    - [Generate a plot of the alignment graph](#generate-a-plot-of-the-alignment-graph)
   - [Input](#input)
   - [Output](#output)
     - [Consensus sequence](#consensus-sequence)
@@ -79,22 +81,22 @@ It right now supports SSE2/SSE4.1/AVX2 vectorization.
 
 For more information, please refer to our [paper](https://dx.doi.org/10.1093/bioinformatics/btaa963) published in Bioinformatics.
 
-## <a name="install"></a>Installation
+## Installation
 
-### <a name="conda"></a>Installing abPOA via conda
+### Installing abPOA via conda
 On Linux/Unix and Mac OS, abPOA can be installed via
 ```
 conda install -c bioconda abpoa   # install abPOA program
 ```
 
-### <a name="build"></a>Building abPOA from source files
+### Building abPOA from source files
 You can also build abPOA from source files. 
 Make sure you have gcc (>=6.4.0) and zlib installed before compiling.
 It is recommended to download the [latest release](https://github.com/yangao07/abPOA/releases).
 ```
-wget https://github.com/yangao07/abPOA/releases/download/v1.5.1/abPOA-v1.5.1.tar.gz
-tar -zxvf abPOA-v1.5.1.tar.gz
-cd abPOA-v1.5.1; make
+wget https://github.com/yangao07/abPOA/releases/download/v1.5.2/abPOA-v1.5.2.tar.gz
+tar -zxvf abPOA-v1.5.2.tar.gz
+cd abPOA-v1.5.2; make
 ```
 Or, you can use `git clone` command to download the source code.
 This gives you the latest version of abPOA, which might be still under development.
@@ -103,39 +105,51 @@ git clone --recursive https://github.com/yangao07/abPOA.git
 cd abPOA; make
 ```
 
-### <a name="binary"></a>Pre-built binary executable file for Linux/Unix or MacOS
+### Pre-built binary executable file for Linux/Unix or MacOS
 If you meet any compiling issue, please try the pre-built binary file for linux:
 ```
-wget https://github.com/yangao07/abPOA/releases/download/v1.5.1/abPOA-v1.5.1_x64-linux.tar.gz
-tar -zxvf abPOA-v1.5.1_x64-linux.tar.gz
+wget https://github.com/yangao07/abPOA/releases/download/v1.5.2/abPOA-v1.5.2_x64-linux.tar.gz
+tar -zxvf abPOA-v1.5.2_x64-linux.tar.gz
 ```
 or for macos:
 ```
-wget https://github.com/yangao07/abPOA/releases/download/v1.5.1/abPOA-v1.5.1_arm64-macos.tar.gz
-tar -zxvf abPOA-v1.5.1_arm64-macos.tar.gz
+wget https://github.com/yangao07/abPOA/releases/download/v1.5.2/abPOA-v1.5.2_arm64-macos.tar.gz
+tar -zxvf abPOA-v1.5.2_arm64-macos.tar.gz
 ```
 
-## <a name="usage"></a>General usage
-### <a name="gen_1cons"></a>To generate consensus sequence
+## General usage
+### Generate a consensus sequence
 
 ```
 abpoa seq.fa > cons.fa
 ```
 
-### <a name="gen_mcons"></a>To generate multiple consensus sequences
+abPOA provides two conensus calling methods: 
+* heaviest bundlding (default): the path with largest weight along the partial order graph
+* most frequent bases: pick the most common base at each alignment position
+
+Sometimes these two methods will generate different consensus sequences [#67](https://github.com/yangao07/abPOA/issues/67)
+
+
+To use `most frequent bases` method:
+```
+abpoa seq.fa -a1 > cons.fa
+```
+
+### Generate multiple consensus sequences
 
 ```
 abpoa heter.fa -d2 > 2cons.fa
 ```
 
-### <a name="gen_msa"></a>To generate row-column multiple sequence alignment in FASTA format
+### Generate row-column multiple sequence alignment in FASTA format
 
 ```
 abpoa seq.fa -r1 > out.msa
 abpoa seq.fa -r2 > out_cons.msa
 ```
 
-### <a name="gen_gfa"></a>To generate graph information in [GFA](https://github.com/GFA-spec/GFA-spec/blob/master/GFA1.md) format
+### Generate graph information in [GFA](https://github.com/GFA-spec/GFA-spec/blob/master/GFA1.md) format
 
 ```
 abpoa seq.fa -r3 > out.gfa
@@ -145,7 +159,7 @@ To include the generated consensus sequence as a path in the GFA file:
 abpoa seq.fa -r4 > out.gfa
 ```
 
-### <a name="aln_to_gfa"></a>To align sequence to an existing graph in GFA/MSA format
+### Align sequence to an existing graph in GFA/MSA format
 ```
 abpoa -i in.gfa seq.fa -r3 > out.gfa
 abpoa -i in.msa seq.fa -r1 > out.msa
@@ -157,14 +171,22 @@ abpoa seq1.fa -r1 > seq1.msa
 abpoa -i seq1.msa seq2.fa > cons.fa
 ```
 
-### <a name="gen_plot"></a>To generate a plot of the alignment graph
+### Generate a consensus sequence for amino acid sequences
+```
+abpoa -c -t BLOSUM62.mtx input_aa.fa > output_aa_cons.fa
+```
+abPOA provides two score matrix files for amino acid sequences: `BLOSUM62.mtx`, `HOXD70.mtx`.
+
+You can also use any score matrix, as long as it has the same format as the above two.
+
+### Generate a plot of the alignment graph
 
 ```
 abpoa seq.fa -g poa.png > cons.fa
 ```
-See [Plot of alignment graph](#plot) for more details about the plot file.
+See [Plot of alignment graph](#plot-of-alignment-graph) for more details about the plot file.
 
-## <a name="input"></a>Input
+## Input
 abPOA works with FASTA, FASTQ, gzip'd FASTA(.fa.gz) and gzip'd FASTQ(.fq.gz) formats. The input file is 
 expected to contains multiple sequences which will be processed sequentially to perform the iterative 
 sequence-to-graph (partial order) alignment.
@@ -173,8 +195,8 @@ abPOA can also take a list of filenames as input with option `-l`, where each li
 file containing multiple sequences. Each sequence file is then individually aligned by abPOA to generate a
 consensus sequence.
 
-## <a name="output"></a>Output
-### <a name="cons"></a>Consensus sequence 
+## Output
+### Consensus sequence 
 By default, abPOA only outputs the consensus sequence generated from the final alignment graph.
 It is in FASTA format with the name field set as "Consensus_sequence".
 For example:
@@ -194,7 +216,7 @@ CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATC
 >Consensus_sequence_2
 CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCATCCCCACCGCCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
 ```
-### <a name="msa"></a>Row-column multiple sequence alignment
+### Row-column multiple sequence alignment
 abPOA can also output the row-column multiple sequence alignment (RC-MSA) of all the aligned sequences in FASTA format.
 For example:
 ```
@@ -209,7 +231,7 @@ ACGTGTACA--TTGAC
 ```
 The `-` in the sequence stands for alignment gap. 
 
-### <a name="gfa"></a>Full graph information
+### Full graph information
 abPOA can output the final alignment graph in GFA format.
 Each segment line (`S` line) represents one node and each link line (`L` line) represents one edge between two nodes.
 The original input sequences and the generated consensus sequence are described as paths in `P` lines.
@@ -219,7 +241,7 @@ abPOA outputs two graph-related numbers in the header line (`H` line):
 
 Please refer to the [GFA specification](https://github.com/GFA-spec/GFA-spec/blob/master/GFA1.md) for more details of the GFA format.
 
-### <a name="plot"></a>Plot of alignment graph
+### Plot of alignment graph
 
 abPOA can generate a plot of the final partial order alignment graph with the help of `graphviz dot`. 
 For example:
@@ -232,11 +254,11 @@ The numbers inside the nodes are the node IDs. The numbers on the edges are the
 Make sure you have `dot` installed before using abPOA to generate the plot.
 For Linux/Unix systems: `sudo apt-get install graphviz`.
 
-## <a name="description"></a>Algorithm description
-### <a name="banding"></a>Adaptive banding
+## Algorithm description
+### Adaptive banding
 To understand how the adaptive banding working, please refer to our [Bioinformatics paper](https://dx.doi.org/10.1093/bioinformatics/btaa963).
 
-### <a name="seeding"></a>Minimizer-based seeding mode
+### Minimizer-based seeding mode
 As abPOA always allocates quadratic size of memory, for very long input sequences (>10 kb), memory usage will be a challenge.
 
 To solve this issue, we develop a minimizer-based seeding and partition method to split the sequence and graph with a small window.
@@ -249,7 +271,7 @@ A second round of chaining is then performed on all the local minimizer chains t
 With this global chain, abPOA selects a series of minimizer hits as partition anchors which has at least a distance of 500 bp (by default, -n/--min-poa-win).
 Within each partitioned window, abPOA performs banded partial order alignment separately and combines all the alignment results at the end.
 
-### <a name="tree"></a>Minimizer-based progressive tree
+### Minimizer-based progressive tree
 Instead of aligning all the sequences in the original order, abPOA can alternatively build a progressive tree to guide the alignment order.
 The generation of the progressive tree is also based on minimizers.
 For each pair of sequences, abPOA calculates their similarity score which is the Jaccard similarity of the minimizers, i.e. the number of minimizer hits divided by the total number of all minimizers from the two sequences.
@@ -261,23 +283,23 @@ With all the similarity scores (minimizer-based Jaccard similarity), abPOA build
 
 Then, abPOA performs partial order alignment following the order of sequences in this progressive tree set.
 
-### <a name="mcons"></a>Multiple consensus sequences
+### Multiple consensus sequences
 abPOA supports generating multiple consensus sequences from the final alignment graph (set -d/--max-num-cons as >1).
 
 The general underlying idea is to group input sequences into multiple clusters based on the heterozygous bases in the graph,
 Then, one consensus sequence is separately generated for each cluster of input sequences.
 The minimum allele frequency for each heterozygous base is 0.25 (by default, -q/--min-freq). 
 
-## <a name="dev"></a>For development
+## For development
 abPOA is not only a stand-alone tool for MSA and consensus calling, it can also work as a programming library. [example.c](example.c) shows how to use the C APIs of abPOA to take a set of sequences as input and perform MSA and consensus calling. Basically, the library file `libabpoa.a` and two header files [abpoa.h](include/abpoa.h) and [simd_instruction.h](include/simd_instruction.h) are needed to make the abPOA library work in your program.
 
 abPOA also provides Python bindings to all the primary C APIs. Refer to [python/README.md](python/README.md) for more details.
 
-## <a name="eval"></a>Evaluation datasets
+## Evaluation datasets
 The evaluation datasets and scripts used in [abPOA paper](https://dx.doi.org/10.1093/bioinformatics/btaa963) can be found in [abPOA-v1.0.5](https://github.com/yangao07/abPOA/releases/tag/v1.0.5).
 
-## <a name="contact"></a>Contact
-Yan Gao gaoy1@chop.edu
+## Contact
+Yan Gao yangao@ds.dfci.harvard.edu
 
 Yi Xing xingyi@chop.edu
 
diff --git a/TODO.md b/TODO.md
new file mode 100644
index 0000000..3c7e96d
--- /dev/null
+++ b/TODO.md
@@ -0,0 +1,9 @@
+# To test after commit
+1. run through
+   1. SSE/AVX2/AVX512
+   2. 16/32bits
+   3. global/local
+   4. lg/ag/cg
+2. same result
+   1. SSE vs AVX2 vs AVX512
+   2. 16bits vs 32 bits
\ No newline at end of file
diff --git a/include/abpoa.h b/include/abpoa.h
index 87bbdcf..693a5bc 100644
--- a/include/abpoa.h
+++ b/include/abpoa.h
@@ -36,7 +36,12 @@
 #define ABPOA_OUT_CONS_FQ  5
 
 #define ABPOA_HB 0
-#define ABPOA_HC 1
+#define ABPOA_MC 1
+
+#define ABPOA_NONE_VERBOSE 0
+#define ABPOA_INFO_VERBOSE 1
+#define ABPOA_DEBUG_VERBOSE 2
+#define ABPOA_LONG_DEBUG_VERBOSE 3
 
 // NOTE: upper boundary of in_edge_n is pow(2,30)
 // for MATCH/MISMATCH: node_id << 34  | query_id << 4 | op
@@ -72,7 +77,7 @@ typedef struct {
     uint8_t ret_cigar:1, rev_cigar:1, out_msa:1, out_cons:1, out_gfa:1, out_fq:1, use_read_ids:1, amb_strand:1;
     uint8_t use_qv:1, disable_seeding:1, progressive_poa:1;
     char *incr_fn, *out_pog;
-    int align_mode, gap_mode, max_n_cons;
+    int align_mode, gap_mode, max_n_cons, cons_algrm; // consensus calling algorithm: 0: partial order graph, 1: majority voting
     double min_freq; // for multiploid data
     int verbose; // to control output msg
 
@@ -83,8 +88,8 @@ typedef struct {
 typedef struct {
     int node_id;
     int in_edge_n, in_edge_m, *in_id;
-    int out_edge_n, out_edge_m, *out_id; int *out_weight;
-    int *read_weight, n_read, m_read; // weight of each read, valid when use_qv=1
+    int out_edge_n, out_edge_m, *out_id; int *out_edge_weight; // out_edge_weight: edge-wise weight
+    int *read_weight, n_read, m_read; // read_weight: read-wise weight, valid when use_qv=1
     uint64_t **read_ids; int read_ids_n; // for each edge
 
     int aligned_node_n, aligned_node_m, *aligned_node_id; // mismatch; aligned node will have same rank
diff --git a/setup.py b/setup.py
index 52c0e7e..f2f0608 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,4 @@
-import os, platform, sys
+import os
 
 try:
     from setuptools import setup, Extension
@@ -9,21 +9,24 @@
 
 simde = ['-DUSE_SIMDE', '-DSIMDE_ENABLE_NATIVE_ALIASES']
 
-if platform.system() == "Darwin":
+machine_system = os.popen("uname").readlines()[0].rsplit()[0]
+machine_arch = os.popen("uname -m").readlines()[0].rsplit()[0]
+
+if machine_system == "Darwin":
     # note: see https://github.com/pypa/wheel/issues/406
     simd_flag = ['-march=native', '-D__AVX2__', '-mmacosx-version-min=10.9']
-    if platform.machine() in ["aarch64", "arm64"]:
+    if machine_arch in ["aarch64", "arm64"]:
         os.environ['_PYTHON_HOST_PLATFORM'] = "macosx-10.9-arm64"
         os.environ['ARCHFLAGS'] = "-arch arm64"
-    else:
+    else: # x86_64
         os.environ['_PYTHON_HOST_PLATFORM'] = "macosx-10.9-x86_64"
         os.environ['ARCHFLAGS'] = "-arch x86_64"
 else:
-    if platform.machine() in ["aarch64", "arm64"]:
+    if machine_arch in ["aarch64", "arm64"]:
         simd_flag = ['-march=armv8-a+simd', '-D__AVX2__']
-    elif platform.machine() in ["aarch32"]:
+    elif machine_arch in ["aarch32"]:
         simd_flag = ['-march=armv8-a+simd', '-mfpu=auto -D__AVX2__']
-    else:
+    else: # x86_64
         simd_flag=['-march=native']
         if os.getenv('SSE4', False):
             simd_flag=['-msse4.1']
@@ -60,13 +63,13 @@
     description = "pyabpoa: SIMD-based partial order alignment using adaptive band",
     long_description = long_description,
     long_description_content_type="text/markdown",
-    version = "1.5.1",
+    version = "1.5.2",
     url = "https://github.com/yangao07/abPOA",
     author = "Yan Gao",
-    author_email = "gaoy1@chop.edu",
+    author_email = "yangao@ds.dfci.harvard.edu",
     license = "MIT",
     keywords = "multiple-sequence-alignment  partial-order-graph-alignment",
-    setup_requires=["cython"],
+    setup_requires=["cython<3"], # see https://github.com/cython/cython/issues/5568
     # Build instructions
     ext_modules = [
         Extension(
diff --git a/src/abpoa.c b/src/abpoa.c
index 64105c1..b0b3cd9 100644
--- a/src/abpoa.c
+++ b/src/abpoa.c
@@ -16,8 +16,8 @@ char PROG[20] = "abpoa";
 #define _bO BOLD UNDERLINE "O" NONE
 #define _bA BOLD UNDERLINE "A" NONE
 char DESCRIPTION[100] = _ba "daptive " _bb "anded " _bP "artial " _bO "rder " _bA "lignment";
-char VERSION[20] = "1.5.1";
-char CONTACT[30] = "gaoy1@chop.edu";
+char VERSION[20] = "1.5.2";
+char CONTACT[30] = "yangao@ds.dfci.harvard.edu";
 
 const struct option abpoa_long_opt [] = {
     { "align-mode", 1, NULL, 'm' },
@@ -49,11 +49,13 @@ const struct option abpoa_long_opt [] = {
     { "output", 1, NULL, 'o' },
     { "result", 1, NULL, 'r' },
     { "out-pog", 1, NULL, 'g' },
+    { "cons-algrm", 1, NULL, 'a'},
     { "max-num-cons", 1, NULL, 'd', },
     { "min-freq", 1, NULL, 'q', },
 
     { "help", 0, NULL, 'h' },
     { "version", 0, NULL, 'v' },
+    { "verbose", 1, NULL, 'V'},
 
     { 0, 0, 0, 0}
 };
@@ -64,14 +66,14 @@ int abpoa_usage(void)
     err_printf("%s: %s \n\n", PROG, DESCRIPTION);
     err_printf("Version: %s\t", VERSION);
 	err_printf("Contact: %s\n\n", CONTACT);
-    err_printf("Usage: %s [options] <in.fa/fq> > cons.fa/msa.out/abpoa.gfa\n\n", PROG);
+    err_printf("Usage: %s [options] <in.fa/fq> > cons.fa/msa.fa/abpoa.gfa\n\n", PROG);
     err_printf("Options:\n");
     err_printf("  Alignment:\n");
-    err_printf("    -m --aln-mode   INT     alignment mode [%d]\n", ABPOA_GLOBAL_MODE);
+    err_printf("    -m --aln-mode    INT    alignment mode [%d]\n", ABPOA_GLOBAL_MODE);
     err_printf("                              %d: global, %d: local, %d: extension\n", ABPOA_GLOBAL_MODE, ABPOA_LOCAL_MODE, ABPOA_EXTEND_MODE);
-    err_printf("    -M --match      INT     match score [%d]\n", ABPOA_MATCH);
-    err_printf("    -X --mismatch   INT     mismatch penalty [%d]\n", ABPOA_MISMATCH);
-    err_printf("    -t --matrix    FILE     scoring matrix file, \'-M\' and \'-X\' are not used when \'-t\' is used [Null]\n");
+    err_printf("    -M --match       INT    match score [%d]\n", ABPOA_MATCH);
+    err_printf("    -X --mismatch    INT    mismatch penalty [%d]\n", ABPOA_MISMATCH);
+    err_printf("    -t --matrix     FILE    scoring matrix file, \'-M\' and \'-X\' are not used when \'-t\' is used [Null]\n");
     err_printf("                            e.g., \'HOXD70.mtx, BLOSUM62.mtx\'\n");
     err_printf("    -O --gap-open INT(,INT) gap opening penalty (O1,O2) [%d,%d]\n", ABPOA_GAP_OPEN1, ABPOA_GAP_OPEN2);
     err_printf("    -E --gap-ext  INT(,INT) gap extension penalty (E1,E2) [%d,%d]\n", ABPOA_GAP_EXT1, ABPOA_GAP_EXT2);
@@ -83,9 +85,9 @@ int abpoa_usage(void)
     err_printf("                            for each input sequence, try the reverse complement if the current\n");
     err_printf("                            alignment score is too low, and pick the strand with a higher score\n");
     err_printf("  Adaptive banded DP:\n");
-    err_printf("    -b --extra-b    INT     first adaptive banding parameter [%d]\n", ABPOA_EXTRA_B);
+    err_printf("    -b --extra-b     INT    first adaptive banding parameter [%d]\n", ABPOA_EXTRA_B);
     err_printf("                            set b as < 0 to disable adaptive banded DP\n");
-    err_printf("    -f --extra-f  FLOAT     second adaptive banding parameter [%.2f]\n", ABPOA_EXTRA_F);
+    err_printf("    -f --extra-f   FLOAT    second adaptive banding parameter [%.2f]\n", ABPOA_EXTRA_F);
     err_printf("                            the number of extra bases added on both sites of the band is\n");
     err_printf("                            b+f*L, where L is the length of the aligned sequence\n");
     // err_printf("    -z --zdrop    INT       Z-drop score in extension alignment [-1]\n");
@@ -101,7 +103,8 @@ int abpoa_usage(void)
     // err_printf("    -n --par-size           minimal partition size [%d]\n", ABPOA_W);
 
     err_printf("  Input/Output:\n");
-    err_printf("    -Q --use-qual-weight    take base quality score from FASTQ input file as graph edge weight [False]\n");
+    err_printf("    -Q --use-qual-weight    take base quality score from FASTQ input file as graph edge weight for consensus calling [False]\n");
+    err_printf("                            effective only when input sequences are in FASTQ format and consensus calling with heaviest bundling\n");
     err_printf("    -c --amino-acid         input sequences are amino acid (default is nucleotide) [False]\n");
     err_printf("    -l --in-list            input file is a list of sequence file names [False]\n");
     err_printf("                            each line is one sequence file containing a set of sequences\n");
@@ -116,12 +119,16 @@ int abpoa_usage(void)
     err_printf("                            - %d: graph in GFA format\n", ABPOA_OUT_GFA);
     err_printf("                            - %d: graph with consensus path in GFA format\n", ABPOA_OUT_CONS_GFA);
     err_printf("                            - %d: consensus in FASTQ format\n", ABPOA_OUT_CONS_FQ);
+    err_printf("    -a --cons-algrm INT     consensus algorithm [%d]\n", ABPOA_HB);
+    err_printf("                            - %d: heaviest bundling path in partial order graph\n", ABPOA_HB);
+    err_printf("                            - %d: most frequent bases at each position\n", ABPOA_MC);
     err_printf("    -d --maxnum-cons INT    max. number of consensus sequence to generate [1]\n");
     err_printf("    -q --min-freq  FLOAT    min. frequency of each consensus sequence (only effective when -d/--num-cons > 1) [%.2f]\n", MULTIP_MIN_FREQ);
     err_printf("    -g --out-pog    FILE    dump final alignment graph to FILE (.pdf/.png) [Null]\n\n");
 
     err_printf("    -h --help               print this help usage information\n");
     err_printf("    -v --version            show version number\n");
+    err_printf("    -V --verbose     INT    verbose level (0-2). 0: none, 1: information, 2: debug [0]\n");
 
 
     err_printf("\n");
@@ -149,7 +156,7 @@ int abpoa_main(char *file_fn, int is_list, abpoa_para_t *abpt){
 
 int main(int argc, char **argv) {
     int c, m, in_list=0; char *s; abpoa_para_t *abpt = abpoa_init_para();
-    while ((c = getopt_long(argc, argv, "m:M:X:t:O:E:b:f:z:e:QSk:w:n:i:clpso:r:g:d:q:hvV:", abpoa_long_opt, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "m:M:X:t:O:E:b:f:z:e:QSk:w:n:i:clpso:r:g:a:d:q:hvV:", abpoa_long_opt, NULL)) >= 0) {
         switch(c)
         {
             case 'm': m = atoi(optarg);
@@ -192,6 +199,7 @@ int main(int argc, char **argv) {
                       break;
             case 'g': abpt->out_pog= strdup(optarg); break;
 
+            case 'a': abpt->cons_algrm = atoi(optarg); break;
             case 'd': abpt->max_n_cons = atoi(optarg); break; 
             case 'q': abpt->min_freq = atof(optarg); break;
 
diff --git a/src/abpoa.h b/src/abpoa.h
index 87bbdcf..693a5bc 100644
--- a/src/abpoa.h
+++ b/src/abpoa.h
@@ -36,7 +36,12 @@
 #define ABPOA_OUT_CONS_FQ  5
 
 #define ABPOA_HB 0
-#define ABPOA_HC 1
+#define ABPOA_MC 1
+
+#define ABPOA_NONE_VERBOSE 0
+#define ABPOA_INFO_VERBOSE 1
+#define ABPOA_DEBUG_VERBOSE 2
+#define ABPOA_LONG_DEBUG_VERBOSE 3
 
 // NOTE: upper boundary of in_edge_n is pow(2,30)
 // for MATCH/MISMATCH: node_id << 34  | query_id << 4 | op
@@ -72,7 +77,7 @@ typedef struct {
     uint8_t ret_cigar:1, rev_cigar:1, out_msa:1, out_cons:1, out_gfa:1, out_fq:1, use_read_ids:1, amb_strand:1;
     uint8_t use_qv:1, disable_seeding:1, progressive_poa:1;
     char *incr_fn, *out_pog;
-    int align_mode, gap_mode, max_n_cons;
+    int align_mode, gap_mode, max_n_cons, cons_algrm; // consensus calling algorithm: 0: partial order graph, 1: majority voting
     double min_freq; // for multiploid data
     int verbose; // to control output msg
 
@@ -83,8 +88,8 @@ typedef struct {
 typedef struct {
     int node_id;
     int in_edge_n, in_edge_m, *in_id;
-    int out_edge_n, out_edge_m, *out_id; int *out_weight;
-    int *read_weight, n_read, m_read; // weight of each read, valid when use_qv=1
+    int out_edge_n, out_edge_m, *out_id; int *out_edge_weight; // out_edge_weight: edge-wise weight
+    int *read_weight, n_read, m_read; // read_weight: read-wise weight, valid when use_qv=1
     uint64_t **read_ids; int read_ids_n; // for each edge
 
     int aligned_node_n, aligned_node_m, *aligned_node_id; // mismatch; aligned node will have same rank
diff --git a/src/abpoa_align.c b/src/abpoa_align.c
index 7eca502..cd427d5 100644
--- a/src/abpoa_align.c
+++ b/src/abpoa_align.c
@@ -106,6 +106,7 @@ abpoa_para_t *abpoa_init_para(void) {
     abpt->out_fq = 0;     // output consensus sequence in fastq
     abpt->out_gfa = 0;    // out graph in GFA format
     abpt->out_msa = 0;    // output msa
+    abpt->cons_algrm = ABPOA_HB; // consensus calling algorithm
     abpt->max_n_cons = 1; // number of max. generated consensus sequence
     abpt->min_freq = MULTIP_MIN_FREQ; 
     abpt->use_read_ids = 0;
@@ -133,7 +134,7 @@ abpoa_para_t *abpoa_init_para(void) {
     abpt->min_w = ABPOA_MIN_POA_WIN;
     abpt->progressive_poa = 0; // progressive partial order alignment
 
-    abpt->verbose = 0;
+    abpt->verbose = ABPOA_NONE_VERBOSE;
 
     // abpt->simd_flag = simd_check();
 
@@ -142,10 +143,10 @@ abpoa_para_t *abpoa_init_para(void) {
 
 void abpoa_post_set_para(abpoa_para_t *abpt) {
     abpoa_set_gap_mode(abpt);
-    if (abpt->out_msa || abpt->out_gfa || abpt->max_n_cons > 1) {
+    if (abpt->out_msa || abpt->out_gfa || abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MC) {
         abpt->use_read_ids = 1;
-        set_65536_table();
-        if (abpt->max_n_cons > 1) set_bit_table16();
+        if (abpt->out_msa || abpt->out_gfa) set_65536_table();
+        if (abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MC) set_bit_table16();
     }
     if (abpt->align_mode == ABPOA_LOCAL_MODE) abpt->wb = -1;
     int i;
@@ -199,9 +200,7 @@ int abpoa_anchor_poa(abpoa_t *ab, abpoa_para_t *abpt, uint8_t **seqs, int **weig
     // uint8_t *seq1;
     for (_i = 0; _i < n_seq; ++_i) {
         i = read_id_map[_i]; read_id = exist_n_seq + i; qlen = seq_lens[i]; whole_res.n_cigar = 0, whole_res.m_cigar = 0, whole_res.graph_cigar = 0;
-#ifdef __DEBUG__
-        fprintf(stderr, "seq: # %d\n", i);
-#endif
+        if (abpt->verbose >= ABPOA_DEBUG_VERBOSE) fprintf(stderr, "seq: # %d\n", i);
         // seq-to-graph alignment and add alignment within each split window
         if (_i == 0) ai = 0; else ai = par_c[_i-1];
 
@@ -252,10 +251,7 @@ int abpoa_anchor_poa(abpoa_t *ab, abpoa_para_t *abpt, uint8_t **seqs, int **weig
         for (; ai < par_c[_i]; ++ai) {
             end_tpos = ((par_anchors.a[ai] >> 32) & 0x7fffffff) - k + 1; end_id = tpos_to_node_id[end_tpos];
             end_qpos = (uint32_t)par_anchors.a[ai] - k + 1;
-
-#ifdef __DEBUG__
-            fprintf(stderr, "\tanchor: t: %d (id: %d), q: %d\n", end_tpos, end_id, end_qpos);
-#endif
+            if (abpt->verbose >= ABPOA_DEBUG_VERBOSE) fprintf(stderr, "\tanchor: t: %d (id: %d), q: %d\n", end_tpos, end_id, end_qpos);
 
             res.graph_cigar = 0; res.n_cigar = 0;
             abpoa_align_sequence_to_subgraph(ab, abpt, beg_id, end_id, qseq+beg_qpos, end_qpos-beg_qpos, &res);
@@ -277,9 +273,7 @@ int abpoa_anchor_poa(abpoa_t *ab, abpoa_para_t *abpt, uint8_t **seqs, int **weig
         }
         end_id = ABPOA_SINK_NODE_ID; end_qpos = seq_lens[i];
 
-#ifdef __DEBUG__
-            fprintf(stderr, "\tanchor: t: %d (id: %d), q: %d\n", end_tpos, end_id, end_qpos);
-#endif
+        if (abpt->verbose >= ABPOA_DEBUG_VERBOSE) fprintf(stderr, "\tanchor: t: %d (id: %d), q: %d\n", end_tpos, end_id, end_qpos);
         res.graph_cigar = 0; res.n_cigar = 0;
         abpoa_align_sequence_to_subgraph(ab, abpt, beg_id, end_id, qseq+beg_qpos, end_qpos-beg_qpos, &res);
         abpoa_push_whole_cigar(&whole_res.n_cigar, &whole_res.m_cigar, &whole_res.graph_cigar, res.n_cigar, res.graph_cigar);
@@ -307,9 +301,7 @@ int abpoa_poa(abpoa_t *ab, abpoa_para_t *abpt, uint8_t **seqs, int **weights, in
     // uint8_t *seq1;
     for (i = 0; i < n_seq; ++i) {
         qlen = seq_lens[i]; qseq = seqs[i]; weight = weights[i]; read_id = exist_n_seq + i;
-#ifdef __DEBUG__
-        fprintf(stderr, "seq: # %d\n", i);
-#endif
+        if (abpt->verbose >= ABPOA_DEBUG_VERBOSE) fprintf(stderr, "seq: # %d\n", i);
         res.graph_cigar = 0; res.n_cigar = 0;
         if (abpoa_align_sequence_to_graph(ab, abpt, qseq, qlen, &res) >= 0) {
             if (abpt->amb_strand && (res.best_score < MIN_OF_TWO(qlen, ab->abg->node_n-2) * abpt->max_mat * .3333)) { // TODO .3333
diff --git a/src/abpoa_graph.c b/src/abpoa_graph.c
index 1fe83c4..e05ba04 100644
--- a/src/abpoa_graph.c
+++ b/src/abpoa_graph.c
@@ -28,7 +28,7 @@ void abpoa_free_node(abpoa_node_t *node, int n) {
     for (i = 0; i < n; ++i) {
         if (node[i].in_edge_m > 0) free(node[i].in_id);
         if (node[i].out_edge_m > 0) {
-            free(node[i].out_id); free(node[i].out_weight);
+            free(node[i].out_id); free(node[i].out_edge_weight);
             if (node[i].read_ids_n > 0) {
                 for (j = 0; j < node[i].out_edge_m; ++j) {
                     free(node[i].read_ids[j]);
@@ -51,7 +51,7 @@ abpoa_graph_t *abpoa_realloc_graph_edge(abpoa_graph_t *abg, int io, int id, int
         if (edge_m <= 0) {
             abg->node[id].out_edge_m = MAX_OF_TWO(abg->node[id].out_edge_n, 1);
             abg->node[id].out_id = (int*)_err_malloc(abg->node[id].out_edge_m * sizeof(int));
-            abg->node[id].out_weight = (int*)_err_malloc(abg->node[id].out_edge_m * sizeof(int));
+            abg->node[id].out_edge_weight = (int*)_err_malloc(abg->node[id].out_edge_m * sizeof(int));
             if (use_read_ids || abg->node[id].read_ids_n > 0) {
                 abg->node[id].read_ids = (uint64_t**)_err_malloc(abg->node[id].out_edge_m * sizeof(uint64_t*));
                 if (abg->node[id].read_ids_n > 0) {
@@ -64,7 +64,7 @@ abpoa_graph_t *abpoa_realloc_graph_edge(abpoa_graph_t *abg, int io, int id, int
         } else if (abg->node[id].out_edge_n >= edge_m) {
             abg->node[id].out_edge_m = abg->node[id].out_edge_n+1; kroundup32(abg->node[id].out_edge_m);
             abg->node[id].out_id = (int*)_err_realloc(abg->node[id].out_id, abg->node[id].out_edge_m * sizeof(int));
-            abg->node[id].out_weight = (int*)_err_realloc(abg->node[id].out_weight, abg->node[id].out_edge_m * sizeof(int));
+            abg->node[id].out_edge_weight = (int*)_err_realloc(abg->node[id].out_edge_weight, abg->node[id].out_edge_m * sizeof(int));
             if (use_read_ids || abg->node[id].read_ids_n > 0) {
                 abg->node[id].read_ids = (uint64_t**)_err_realloc(abg->node[id].read_ids, abg->node[id].out_edge_m * sizeof(uint64_t*));
                 if (abg->node[id].read_ids_n > 0) {
@@ -253,8 +253,8 @@ void abpoa_BFS_set_node_remain(abpoa_graph_t *abg, int src_id, int sink_id) {
             int max_w=-1, max_id=-1;
             for (i = 0; i < abg->node[cur_id].out_edge_n; ++i) {
                 out_id = abg->node[cur_id].out_id[i];
-                if (abg->node[cur_id].out_weight[i] > max_w) {
-                    max_w = abg->node[cur_id].out_weight[i];
+                if (abg->node[cur_id].out_edge_weight[i] > max_w) {
+                    max_w = abg->node[cur_id].out_edge_weight[i];
                     max_id = out_id;
                 }
             }
@@ -287,7 +287,7 @@ void abpoa_topological_sort(abpoa_graph_t *abg, abpoa_para_t *abpt) {
         // fprintf(stderr, "node_n: %d, index_rank_m: %d\n", node_n, abg->index_rank_m);
         abg->index_to_node_id = (int*)_err_realloc(abg->index_to_node_id, abg->index_rank_m * sizeof(int));
         abg->node_id_to_index = (int*)_err_realloc(abg->node_id_to_index, abg->index_rank_m * sizeof(int));
-        if (abpt->out_msa || abpt->max_n_cons > 1) 
+        if (abpt->out_msa || abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MC) 
             abg->node_id_to_msa_rank = (int*)_err_realloc(abg->node_id_to_msa_rank, abg->index_rank_m * sizeof(int));
         if (abpt->wb >= 0) {
             abg->node_id_to_max_pos_left = (int*)_err_realloc(abg->node_id_to_max_pos_left, abg->index_rank_m * sizeof(int));
@@ -426,7 +426,7 @@ int abpoa_add_graph_edge(abpoa_graph_t *abg, int from_id, int to_id, int check_e
         int i;
         for (i = 0; i < out_edge_n; ++i) {
             if (abg->node[from_id].out_id[i] == to_id) { // edge exists
-                abg->node[from_id].out_weight[i] += w; // update weight on existing edge
+                abg->node[from_id].out_edge_weight[i] += w; // update weight on existing edge
                 // update label id
                 edge_exist = 1;
                 out_edge_i = i;
@@ -444,7 +444,7 @@ int abpoa_add_graph_edge(abpoa_graph_t *abg, int from_id, int to_id, int check_e
         /// out edge
         abpoa_realloc_graph_edge(abg, 1, from_id, add_read_id);
         abg->node[from_id].out_id[out_edge_n] = to_id;
-        abg->node[from_id].out_weight[out_edge_n] = w; // initial weight for new edge
+        abg->node[from_id].out_edge_weight[out_edge_n] = w; // initial weight for new edge
         out_edge_i = out_edge_n;
         ++abg->node[from_id].out_edge_n;
     }
@@ -699,7 +699,7 @@ void abpoa_reset(abpoa_t *ab, abpoa_para_t *abpt, int qlen) {
         abg->node_m = abg->index_rank_m = node_m;
         abg->index_to_node_id = (int*)_err_realloc(abg->index_to_node_id, node_m * sizeof(int));
         abg->node_id_to_index = (int*)_err_realloc(abg->node_id_to_index, node_m * sizeof(int));
-        if (abpt->out_msa || abpt->max_n_cons > 1) 
+        if (abpt->out_msa || abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MC) 
             abg->node_id_to_msa_rank = (int*)_err_realloc(abg->node_id_to_msa_rank, node_m * sizeof(int));
         if (abpt->wb >= 0) {
             abg->node_id_to_max_pos_left = (int*)_err_realloc(abg->node_id_to_max_pos_left, node_m * sizeof(int));
diff --git a/src/abpoa_output.c b/src/abpoa_output.c
index 697962d..1c08c2d 100644
--- a/src/abpoa_output.c
+++ b/src/abpoa_output.c
@@ -275,51 +275,75 @@ int abpoa_cons_phred_score(int n_cov, int n_seq) {
     return (33 + (int)(-10 * log10(p) + 0.499));
 }
 
-int get_read_ids_clu_count(uint64_t *cur_read_ids, int read_ids_n, uint64_t *clu_read_ids) {
+int get_edge_inclu_read_count(int edge_i, int cons_i, abpoa_node_t *node, uint64_t **clu_read_ids) {
     int n = 0, i; uint64_t b;
-    for (i = 0; i < read_ids_n; ++i) {
-        b = cur_read_ids[i] & clu_read_ids[i];
+    for (i = 0; i < node->read_ids_n; ++i) {
+        b = node->read_ids[edge_i][i] & clu_read_ids[cons_i][i];
         n += get_bit_cnt4(ab_bit_table16, b);
     }
     return n;
 }
 
-int get_read_ids_clu_weight(uint64_t *cur_read_ids, int read_ids_n, uint64_t *clu_read_ids, uint8_t use_qv, int *read_weight, int m_read) {
-    if (use_qv == 0) return get_read_ids_clu_count(cur_read_ids, read_ids_n, clu_read_ids);
-    int w = 0, i; uint64_t b;
-    for (i = 0; i < read_ids_n; ++i) {
-        b = cur_read_ids[i] & clu_read_ids[i];
-
-        w += get_bit_cnt4(ab_bit_table16, b);
-    }
-    uint64_t one = 1;
-    for (i = 0; i < m_read; ++i) {
-        if (read_weight[i] > 0) {
+// for n_clu >= 2, get weight of reads within cons_i'th clu
+int get_inclu_edge_weight(int edge_i, int cons_i, abpoa_node_t *node, uint64_t **clu_read_ids, int use_qv) {
+    // collect read-wise weight
+    if (use_qv == 0) return get_edge_inclu_read_count(edge_i, cons_i, node, clu_read_ids);
+    int w = 0, i; uint64_t c, one = 1;
+    for (i = 0; i < node->m_read; ++i) {
+        if (node->read_weight[i] > 0) {
             int n = i / 64, b = i & 0x3f;
-            if ((cur_read_ids[n] & clu_read_ids[n] & (one << b)) > 0)
-                w += read_weight[i];
+            c = node->read_ids[edge_i][n] & clu_read_ids[cons_i][n];
+            if ((c & (one << b)) > 0)
+                w += node->read_weight[i];
         }
     }
     return w;
 }
 
-int abpoa_consensus_cov(abpoa_graph_t *abg, int id, uint64_t *clu_read_ids) {
-    int i, j, in_id, left_n, right_n;
+int get_edge_weight(int edge_i, int cons_i, abpoa_node_t *node, uint64_t **clu_read_ids, int use_qv, int n_clu) {
+    if (n_clu == 1) { // use weight directly
+        return node->out_edge_weight[edge_i];
+    } else { // collect weight of reads within cons_i'th clu
+        return get_inclu_edge_weight(edge_i, cons_i, node, clu_read_ids, use_qv);
+    }
+}
+
+int get_node_weight(int n_clu, int cons_i, abpoa_node_t *node, uint64_t **clu_read_ids, int use_qv) {
+    int w = 0, i;
+    for (i = 0; i < node->out_edge_n; ++i) {
+        w += get_edge_weight(i, cons_i, node, clu_read_ids, use_qv, n_clu);
+    }
+    return w;
+}
+
+// get base coverage for node[id]
+int abpoa_node_out_cov(abpoa_node_t *node, int id, uint64_t **clu_read_ids, int cons_i, int n_cons) {
+    if (n_cons == 1) return node[id].n_read;
+    int i, out_cov = 0;
+    for (i = 0; i < node[id].out_edge_n; ++i) {
+        out_cov += get_edge_inclu_read_count(i, cons_i, node+id, clu_read_ids);
+    }
+    return out_cov;
+}
+
+int abpoa_node_in_cov(abpoa_node_t *node, int id, uint64_t **clu_read_ids, int cons_i, int n_cons) {
+    if (n_cons == 1) return node[id].n_read;
+    int i, j, in_id, in_cov = 0;
     // for each id: get max{left_weigth, right_weight}
-    left_n = right_n = 0;
-    for (i = 0; i < abg->node[id].in_edge_n; ++i) {
-        in_id = abg->node[id].in_id[i];
-        for (j = 0; j < abg->node[in_id].out_edge_n; ++j) {
-            if (abg->node[in_id].out_id[j] == id) {
-                left_n += get_read_ids_clu_count(abg->node[in_id].read_ids[j], abg->node[in_id].read_ids_n, clu_read_ids);
+    for (i = 0; i < node->in_edge_n; ++i) {
+        in_id = node[id].in_id[i];
+        for (j = 0; j < node[in_id].out_edge_n; ++j) {
+            if (node[in_id].out_id[j] == id) {
+                in_cov += get_edge_inclu_read_count(j, cons_i, node+in_id, clu_read_ids);
                 break;
             }
         }
     }
-    for (i = 0; i < abg->node[id].out_edge_n; ++i) {
-        right_n += get_read_ids_clu_count(abg->node[id].read_ids[i], abg->node[id].read_ids_n, clu_read_ids);
-    }
-    return MAX_OF_TWO(left_n, right_n);
+    return in_cov;
+}
+int abpoa_node_cov(abpoa_node_t *node, int id, uint64_t **clu_read_ids, int cons_i, int n_cons) {
+    if (n_cons == 1) return node[id].n_read;
+    return MAX_OF_TWO(abpoa_node_in_cov(node, id, clu_read_ids, cons_i, n_cons), abpoa_node_out_cov(node, id, clu_read_ids, cons_i, n_cons));
 }
 
 void abpoa_set_hb_cons(abpoa_graph_t *abg, int **max_out_id, int n_cons, uint64_t **clu_read_ids, int src_id, int sink_id, abpoa_cons_t *abc) {
@@ -331,7 +355,7 @@ void abpoa_set_hb_cons(abpoa_graph_t *abg, int **max_out_id, int n_cons, uint64_
         while (cur_id != sink_id) {
             abc->cons_node_ids[i][j] = cur_id;
             abc->cons_base[i][j] = abg->node[cur_id].base;
-            abc->cons_cov[i][j] = abpoa_consensus_cov(abg, cur_id, clu_read_ids[i]);
+            abc->cons_cov[i][j] = abpoa_node_cov(abg->node, cur_id, clu_read_ids, i, n_cons);
             abc->cons_phred_score[i][j] = abpoa_cons_phred_score(abc->cons_cov[i][j], abc->clu_n_seq[i]);
             ++j;
             cur_id = max_out_id[i][cur_id];
@@ -340,78 +364,59 @@ void abpoa_set_hb_cons(abpoa_graph_t *abg, int **max_out_id, int n_cons, uint64_
     }
 }
 
-void abpoa_set_hb_cons1(abpoa_graph_t *abg, int *max_out_id, int src_id, int sink_id, abpoa_cons_t *abc) {
-    int i = 0, cur_id;
-    abc->n_cons = 1;
-    cur_id = max_out_id[src_id];
-    while (cur_id != sink_id) {
-        abc->cons_node_ids[0][i] = cur_id;
-        abc->cons_base[0][i] = abg->node[cur_id].base;
-        abc->cons_cov[0][i] = abg->node[cur_id].n_read;
-        abc->cons_phred_score[0][i] = abpoa_cons_phred_score(abc->cons_cov[0][i], abc->n_seq);
-        cur_id = max_out_id[cur_id];
-        ++i;
+void abpoa_set_major_voting_cons(abpoa_graph_t *abg, int m, int ***row_column_count, int **msa_node_id, int src_id, int sink_id, int msa_l, abpoa_cons_t *abc) {
+    int cur_id, cons_i, i, j, max_c, max_base, gap_c, c;
+    int cons_l;
+    for (cons_i = 0; cons_i < abc->n_cons; ++cons_i) {
+        cons_l = 0;
+        for (i = 0; i < msa_l; ++i) {
+            max_c = 0, max_base = m, gap_c = abc->clu_n_seq[cons_i];
+            for (j = 0; j < m-1; ++j) {
+                c = row_column_count[cons_i][i][j];
+                if (c > max_c) {
+                    max_c = c;
+                    max_base = j;
+                }
+                gap_c -= c;
+            }
+            if (max_c >= gap_c) { // append consensus base to abc
+                cur_id = msa_node_id[i][max_base];
+                abc->cons_node_ids[cons_i][cons_l] = cur_id;
+                abc->cons_base[cons_i][cons_l] = max_base;
+                abc->cons_cov[cons_i][cons_l] = max_c;
+                abc->cons_phred_score[cons_i][cons_l] = abpoa_cons_phred_score(abc->cons_cov[cons_i][cons_l], abc->clu_n_seq[cons_i]);
+                cons_l++;
+            }
+        }
+        abc->cons_len[cons_i] = cons_l;
     }
-    abc->cons_len[0] = i;
 }
 
-// heaviest_bundling
-// 1. argmax{cur->weight}
-// 2. argmax{out_node->weight}
-void abpoa_heaviest_bundling(abpoa_graph_t *abg, int src_id, int sink_id, int *out_degree, abpoa_cons_t *abc) {
-    int *id, i, cur_id, in_id, out_id, max_id; int max_w, out_w;
-    int *score = (int*)_err_malloc(abg->node_n * sizeof(int));
-    int *max_out_id = (int*)_err_malloc(abg->node_n * sizeof(int));
-
-    kdq_int_t *q = kdq_init_int();
-    kdq_push_int(q, sink_id);
-    // reverse Breadth-First-Search
-    while ((id = kdq_shift_int(q)) != 0) {
-        cur_id = *id;
-        if (cur_id == sink_id) {
-            max_out_id[cur_id] = -1;
-            score[cur_id] = 0;
-        } else {
-            max_id = -1;
-            if (cur_id == src_id) {
-                int path_score = -1, path_max_w = -1;
-                for (i = 0; i < abg->node[cur_id].out_edge_n; ++i) {
-                    out_id = abg->node[cur_id].out_id[i];
-                    out_w = abg->node[cur_id].out_weight[i];
-                    if (out_w > path_max_w || (out_w == path_max_w && score[out_id] > path_score)) {
-                        max_id = out_id;
-                        path_score = score[out_id];
-                        path_max_w = out_w;
-                    }
-                }
-                max_out_id[cur_id] = max_id;
-                kdq_destroy_int(q);
-                break;
-            } else {
-                max_w = INT32_MIN;
-                for (i = 0; i < abg->node[cur_id].out_edge_n; ++i) {
-                    out_id = abg->node[cur_id].out_id[i];
-                    out_w = abg->node[cur_id].out_weight[i];
-                    if (max_w < out_w) {
-                        max_w = out_w; max_id = out_id;
-                    } else if (max_w == out_w && score[max_id] <= score[out_id]) {
-                        max_id = out_id;
-                    }
-                }
-                score[cur_id] = max_w + score[max_id];
-                max_out_id[cur_id] = max_id;
-            }
+void abpoa_set_row_column_weight(abpoa_graph_t *abg, int n_clu, int m, int ***rc_weight, uint64_t **clu_read_ids, int **msa_node_id) {
+    int i, cons_i, k, rank, aligned_id;
+    int node_w;
+    for (i = 2; i < abg->node_n; ++i) {
+        // get msa rank
+        rank = abpoa_graph_node_id_to_msa_rank(abg, i);
+        for (k = 0; k < abg->node[i].aligned_node_n; ++k) {
+            aligned_id = abg->node[i].aligned_node_id[k];
+            rank = MAX_OF_TWO(rank, abpoa_graph_node_id_to_msa_rank(abg, aligned_id));
         }
-        for (i = 0; i < abg->node[cur_id].in_edge_n; ++i) {
-            in_id = abg->node[cur_id].in_id[i];
-            if (--out_degree[in_id] == 0) kdq_push_int(q, in_id);
+        msa_node_id[rank-1][abg->node[i].base] = i;
+        // assign seq
+        for (cons_i = 0; cons_i < n_clu; ++cons_i) {
+            node_w = abpoa_node_out_cov(abg->node, i, clu_read_ids, cons_i, n_clu);
+            rc_weight[cons_i][rank-1][abg->node[i].base] = node_w;
+            // for (k = 0; k < abg->node[i].read_ids_n; ++k) {
+            //     for (j = 0; j < abg->node[i].out_edge_n; ++j) {
+            //         if (n_clu > 1) b = abg->node[i].read_ids[j][k] & clu_read_ids[cons_i][k];
+            //         else b = abg->node[i].read_ids[j][k];
+            //         rc_count[cons_i][rank-1][abg->node[i].base] += get_bit_cnt4(ab_bit_table16, b);
+            //     }
+            // }
+            rc_weight[cons_i][rank-1][m-1] -= rc_weight[cons_i][rank-1][abg->node[i].base];
         }
     }
-    abc->clu_n_seq[0] = abc->n_seq;
-    // set cons read ids
-    for (i = 0; i < abc->n_seq; ++i) abc->clu_read_ids[0][i] = i;
-    abpoa_set_hb_cons1(abg, max_out_id, src_id, sink_id, abc);
-    free(score); free(max_out_id);
 }
 
 void set_clu_read_ids(abpoa_cons_t *abc, uint64_t **read_ids, int cons_i, int n_seq) {
@@ -426,18 +431,26 @@ void set_clu_read_ids(abpoa_cons_t *abc, uint64_t **read_ids, int cons_i, int n_
         err_fatal(__func__, "Error in set cluster read ids. (%d, %d)", n, abc->clu_n_seq[cons_i]);
 }
 
-void abpoa_multip_heaviest_bundling(abpoa_graph_t *abg, abpoa_para_t *abpt, int src_id, int sink_id, int *out_degree, int n_clu, int read_ids_n, uint64_t **clu_read_ids, abpoa_cons_t *abc) {
+// heaviest_bundling
+// 1. argmax{cur->weight}
+// 2. argmax{out_node->weight}
+void abpoa_heaviest_bundling(abpoa_graph_t *abg, abpoa_para_t *abpt, int src_id, int sink_id, int *out_degree, int n_clu, int read_ids_n, uint64_t **clu_read_ids, abpoa_cons_t *abc) {
     int *id, cons_i, i, cur_id, in_id, out_id, max_id; int max_w, out_w;
 
     int *_out_degree = (int*)_err_malloc(abg->node_n * sizeof(int));
     int *score = (int*)_err_malloc(abg->node_n * sizeof(int));
     int **max_out_id = (int**)_err_malloc(n_clu * sizeof(int*));
     for (i = 0; i < n_clu; ++i) max_out_id[i] = (int*)_err_malloc(abg->node_n * sizeof(int));
+    if (n_clu == 1) abc->clu_n_seq[0] = abc->n_seq;
+    else {
+        for (cons_i = 0; cons_i < n_clu; cons_i++) {
+            abc->clu_n_seq[cons_i] = get_read_cnt(clu_read_ids[cons_i], read_ids_n);
+            set_clu_read_ids(abc, clu_read_ids, cons_i, abc->n_seq);
+        }
+    }
 
     for (cons_i = 0; cons_i < n_clu; cons_i++) {
         for (i = 0; i < abg->node_n; ++i) _out_degree[i] = out_degree[i];
-        abc->clu_n_seq[cons_i] = get_read_cnt(clu_read_ids[cons_i], read_ids_n);
-        set_clu_read_ids(abc, clu_read_ids, cons_i, abc->n_seq);
         kdq_int_t *q = kdq_init_int();
         kdq_push_int(q, sink_id);
         // reverse Breadth-First-Search
@@ -452,8 +465,7 @@ void abpoa_multip_heaviest_bundling(abpoa_graph_t *abg, abpoa_para_t *abpt, int
                     int path_score = -1, path_max_w = -1;
                     for (i = 0; i < abg->node[cur_id].out_edge_n; ++i) {
                         out_id = abg->node[cur_id].out_id[i];
-                        out_w = get_read_ids_clu_weight(abg->node[cur_id].read_ids[i], abg->node[cur_id].read_ids_n, clu_read_ids[cons_i], abpt->use_qv, abg->node[cur_id].read_weight, abg->node[cur_id].m_read);
-                        // out_w = abg->node[cur_id].out_weight[i];
+                        out_w = get_edge_weight(i, cons_i, abg->node+cur_id, clu_read_ids, abpt->use_qv, n_clu);
                         if (out_w > path_max_w || (out_w == path_max_w && score[out_id] > path_score)) {
                             max_id = out_id;
                             path_score = score[out_id];
@@ -467,7 +479,7 @@ void abpoa_multip_heaviest_bundling(abpoa_graph_t *abg, abpoa_para_t *abpt, int
                     max_w = INT32_MIN;
                     for (i = 0; i < abg->node[cur_id].out_edge_n; ++i) {
                         out_id = abg->node[cur_id].out_id[i];
-                        out_w = get_read_ids_clu_weight(abg->node[cur_id].read_ids[i], abg->node[cur_id].read_ids_n, clu_read_ids[cons_i], abpt->use_qv, abg->node[cur_id].read_weight, abg->node[cur_id].m_read);
+                        out_w = get_edge_weight(i, cons_i, abg->node+cur_id, clu_read_ids, abpt->use_qv, n_clu);
                         if (max_w < out_w) {
                             max_w = out_w;
                             max_id = out_id;
@@ -492,6 +504,42 @@ void abpoa_multip_heaviest_bundling(abpoa_graph_t *abg, abpoa_para_t *abpt, int
     for (i = 0; i < n_clu; ++i) free(max_out_id[i]); free(max_out_id);
 }
 
+void abpoa_major_voting(abpoa_graph_t *abg, abpoa_para_t *abpt, int src_id, int sink_id, int *out_degree, int n_clu, int read_ids_n, uint64_t **clu_read_ids, abpoa_cons_t *abc) {
+    abpoa_set_msa_rank(abg, src_id, sink_id);
+    int i, cons_i, msa_l = abg->node_id_to_msa_rank[sink_id]-1;
+    int ***row_column_weight = (int***)_err_malloc(n_clu * sizeof(int**));
+    int **msa_node_id = (int**)_err_malloc(msa_l * sizeof(int*));
+    // init row_column_weight
+    for (cons_i = 0; cons_i < n_clu; ++cons_i) {
+        row_column_weight[cons_i] = (int**)_err_malloc(msa_l * sizeof(int*));
+        for (i = 0; i < msa_l; ++i) {
+            row_column_weight[cons_i][i] = (int*)_err_calloc(abpt->m, sizeof(int));
+            row_column_weight[cons_i][i][abpt->m-1] = abc->clu_n_seq[cons_i];
+        }
+    }
+    for (i = 0; i < msa_l; ++i) msa_node_id[i] = (int*)_err_calloc(abpt->m, sizeof(int));
+    abc->n_cons = n_clu;
+    if (n_clu == 1) abc->clu_n_seq[0] = abc->n_seq;
+    else {
+        for (cons_i = 0; cons_i < n_clu; ++cons_i) {
+            abc->clu_n_seq[cons_i] = get_read_cnt(clu_read_ids[cons_i], read_ids_n);
+            set_clu_read_ids(abc, clu_read_ids, cons_i, abc->n_seq);
+        }
+    }
+    // no quality weight used for now, only read count
+    abpoa_set_row_column_weight(abg, n_clu, abpt->m, row_column_weight, clu_read_ids, msa_node_id);
+    abpoa_set_major_voting_cons(abg, abpt->m, row_column_weight, msa_node_id, src_id, sink_id, msa_l, abc);
+    for (cons_i = 0; cons_i < n_clu; ++cons_i) {
+        for (i = 0; i < msa_l; ++i) {
+            free(row_column_weight[cons_i][i]);
+        } free(row_column_weight[cons_i]);
+    }
+    for (i = 0; i < msa_l; ++i) {
+        free(msa_node_id[i]);
+    } 
+    free(row_column_weight); free(msa_node_id);
+}
+
 void abpoa_output_fx_consensus(abpoa_t *ab, abpoa_para_t *abpt, FILE *out_fp) {
     if (out_fp == NULL) return;
     int cons_i, j;
@@ -728,7 +776,7 @@ int reassign_hap(int **clu_haps, int *clu_size, uint64_t **clu_read_ids, int rea
 
 // read_weight is NOT used here, no matter use_qv is set or not.
 // collect minimized set of het bases
-int abpoa_set_het_row_column_ids_weight(abpoa_graph_t *abg, uint64_t ***read_ids, int *het_poss, int **rc_weight, int msa_l, int n_seq, int m, int min_w, int read_ids_n) {
+int abpoa_set_het_row_column_ids_weight(abpoa_graph_t *abg, uint64_t ***read_ids, int *het_poss, int **rc_weight, int msa_l, int n_seq, int m, int min_w, int read_ids_n, int verbose) {
     int i, j, k, n, rank;
     uint64_t b, one = 1, *whole_read_ids = (uint64_t*)_err_calloc(read_ids_n, sizeof(uint64_t));
     for (i = 0; i < n_seq; ++i) {
@@ -782,12 +830,12 @@ int abpoa_set_het_row_column_ids_weight(abpoa_graph_t *abg, uint64_t ***read_ids
             if (iden == 1) continue;
 
             het_poss[n_het_pos++] = rank;
-#ifdef __DEBUG__
-            fprintf(stderr, "%d\t", rank);
-            for (j = 0; j < m; ++j) {
-                fprintf(stderr, "%c: %d\t", "ACGT-"[j], rc_weight[rank][j]);
-            } fprintf(stderr, "\n");
-#endif
+            if (verbose >= ABPOA_LONG_DEBUG_VERBOSE) {
+                fprintf(stderr, "%d\t", rank);
+                for (j = 0; j < m; ++j) {
+                    fprintf(stderr, "%c: %d\t", "ACGT-"[j], rc_weight[rank][j]);
+                } fprintf(stderr, "\n");
+            }
         }
     }
     free(n_branch); free(node_map);
@@ -796,7 +844,8 @@ int abpoa_set_het_row_column_ids_weight(abpoa_graph_t *abg, uint64_t ***read_ids
 
 // group read into clusters based on all het bases
 // initial cluster size could be > max_n_cons
-int abpoa_collect_clu_hap_read_ids(int *het_poss, int n_het_pos, uint64_t ***read_ids, int read_ids_n, int n_seq, int m, int min_w, int max_n_cons, uint64_t ***clu_read_ids, int *_m_clu) {
+int abpoa_collect_clu_hap_read_ids(int *het_poss, int n_het_pos, uint64_t ***read_ids, int read_ids_n, 
+                                   int n_seq, int m, int min_w, int max_n_cons, uint64_t ***clu_read_ids, int *_m_clu, int verbose) {
     if (n_het_pos == 0) return 1;
     int i, j, k, n_clu = 0, m_clu = 2;
     int **clu_haps = (int**)_err_malloc(2 * sizeof(int*));
@@ -832,25 +881,25 @@ int abpoa_collect_clu_hap_read_ids(int *het_poss, int n_het_pos, uint64_t ***rea
         }
     }
     if (n_clu < 2) err_fatal(__func__, "# haplotypes: %d\n", n_clu);
-#ifdef __DEBUG__
-    fprintf(stderr, "n_clu: %d\n", n_clu);
-    for (i = 0; i < n_clu; ++i) {
-        for (j = 0; j < n_het_pos; ++j) {
-            fprintf(stderr, "%d\t", clu_haps[i][j]);
+    if (verbose >= ABPOA_LONG_DEBUG_VERBOSE) {
+        fprintf(stderr, "n_clu: %d\n", n_clu);
+        for (i = 0; i < n_clu; ++i) {
+            for (j = 0; j < n_het_pos; ++j) {
+                fprintf(stderr, "%d\t", clu_haps[i][j]);
+            }
+            fprintf(stderr, "\tsize: %d\n", clu_size[i]);
         }
-        fprintf(stderr, "\tsize: %d\n", clu_size[i]);
     }
-#endif
 
     // assign haplotype with reads < min_w to haplotype with reads >= min_w
     // keep at most _max_n_cons_ haps and read ids, weight need to >= min_w
     n_clu = reassign_hap(clu_haps, clu_size, *clu_read_ids, read_ids_n, n_clu, min_w, max_n_cons, n_het_pos);
-#ifdef __DEBUG__
-    fprintf(stderr, "After re-assign: n_clu: %d\n", n_clu);
-    for (i = 0; i < n_clu; ++i) {
-        fprintf(stderr, "%d:\tsize: %d\n", i, clu_size[i]);
+    if (verbose >= ABPOA_LONG_DEBUG_VERBOSE) {
+        fprintf(stderr, "After re-assign: n_clu: %d\n", n_clu);
+        for (i = 0; i < n_clu; ++i) {
+            fprintf(stderr, "%d:\tsize: %d\n", i, clu_size[i]);
+        }
     }
-#endif
     for (i = 0; i < m_clu; ++i) free(clu_haps[i]); free(clu_haps); free(clu_size);
     *_m_clu = m_clu;
     return n_clu;
@@ -858,7 +907,7 @@ int abpoa_collect_clu_hap_read_ids(int *het_poss, int n_het_pos, uint64_t ***rea
 
 // read_weight is NOT used here
 // cluster reads into _n_clu_ groups based on heterogeneous bases
-int abpoa_multip_read_clu(abpoa_graph_t *abg, int src_id, int sink_id, int n_seq, int m, int max_n_cons, double min_freq, uint64_t ***clu_read_ids, int *_m_clu) {
+int abpoa_multip_read_clu(abpoa_graph_t *abg, int src_id, int sink_id, int n_seq, int m, int max_n_cons, double min_freq, uint64_t ***clu_read_ids, int *_m_clu, int verbose) {
     abpoa_set_msa_rank(abg, src_id, sink_id);
     int i, j, n_clu, m_clu, read_ids_n = (n_seq-1)/64+1;
     int msa_l = abg->node_id_to_msa_rank[sink_id]-1, min_w = MAX_OF_TWO(1, n_seq * min_freq); // TODO fastq-qual weight
@@ -878,11 +927,11 @@ int abpoa_multip_read_clu(abpoa_graph_t *abg, int src_id, int sink_id, int n_seq
     } 
     // find min set of het nodes
     int *het_poss = (int*)_err_calloc(msa_l, sizeof(int));
-    int n_het_pos = abpoa_set_het_row_column_ids_weight(abg, read_ids, het_poss, rc_weight, msa_l, n_seq, m, min_w, read_ids_n);
+    int n_het_pos = abpoa_set_het_row_column_ids_weight(abg, read_ids, het_poss, rc_weight, msa_l, n_seq, m, min_w, read_ids_n, verbose);
     
     if (n_het_pos < 1) n_clu = 1;
     // collect at most _max_n_cons_ haplotypes and corresponding read ids
-    else n_clu = abpoa_collect_clu_hap_read_ids(het_poss, n_het_pos, read_ids, read_ids_n, n_seq, m, min_w, max_n_cons, clu_read_ids, &m_clu);
+    else n_clu = abpoa_collect_clu_hap_read_ids(het_poss, n_het_pos, read_ids, read_ids_n, n_seq, m, min_w, max_n_cons, clu_read_ids, &m_clu, verbose);
 
     for (i = 0; i < msa_l; ++i) {
         for (j = 0; j < m; ++j) free(read_ids[i][j]);
@@ -903,19 +952,21 @@ void abpoa_generate_consensus(abpoa_t *ab, abpoa_para_t *abpt) {
         out_degree[i] = abg->node[i].out_edge_n;
     }
 
-    int n_clu, m_clu, n_seq = ab->abs->n_seq; uint64_t **clu_read_ids;
+    int n_clu, m_clu=0, n_seq = ab->abs->n_seq; uint64_t **clu_read_ids=NULL;
     int read_ids_n = (n_seq-1)/64+1;
 
-    if (abpt->max_n_cons > 1) n_clu = abpoa_multip_read_clu(abg, ABPOA_SRC_NODE_ID, ABPOA_SINK_NODE_ID, n_seq, abpt->m, abpt->max_n_cons, abpt->min_freq, &clu_read_ids, &m_clu);
-    else n_clu = 1;
+    n_clu = 1;
+    if (abpt->max_n_cons > 1) 
+        n_clu = abpoa_multip_read_clu(abg, ABPOA_SRC_NODE_ID, ABPOA_SINK_NODE_ID, n_seq, abpt->m, abpt->max_n_cons, abpt->min_freq, &clu_read_ids, &m_clu, abpt->verbose);
 
     abpoa_cons_t *abc = ab->abc;
     abpoa_allocate_cons(abc, abg->node_n, ab->abs->n_seq, n_clu);
-    if (n_clu > 1) {
-         abpoa_multip_heaviest_bundling(abg, abpt, ABPOA_SRC_NODE_ID, ABPOA_SINK_NODE_ID, out_degree, n_clu, read_ids_n, clu_read_ids, abc);
+    if (abpt->cons_algrm == ABPOA_HB)
+        abpoa_heaviest_bundling(abg, abpt, ABPOA_SRC_NODE_ID, ABPOA_SINK_NODE_ID, out_degree, n_clu, read_ids_n, clu_read_ids, abc);
+    else
+        abpoa_major_voting(abg, abpt, ABPOA_SRC_NODE_ID, ABPOA_SINK_NODE_ID, out_degree, n_clu, read_ids_n, clu_read_ids, abc);
+    if (m_clu > 0) {
         for (i = 0; i < m_clu; ++i) free(clu_read_ids[i]); free(clu_read_ids);
-    } else {
-        abpoa_heaviest_bundling(abg, ABPOA_SRC_NODE_ID, ABPOA_SINK_NODE_ID, out_degree, abc);
     }
     abg->is_called_cons = 1; free(out_degree);
 }
diff --git a/src/abpoa_plot.c b/src/abpoa_plot.c
index d35fbcf..3fbbffa 100644
--- a/src/abpoa_plot.c
+++ b/src/abpoa_plot.c
@@ -84,7 +84,7 @@ void abpoa_dump_pog(abpoa_t *ab, abpoa_para_t *abpt) {
         // out_edge
         for (j = 0; j < abg->node[id].out_edge_n; ++j) {
             out_id = abg->node[id].out_id[j];
-            fprintf(fp, "\t%s -> %s [label=\"%d\", penwidth=%d]\n", node_label[id], node_label[out_id], abg->node[id].out_weight[j], abg->node[id].out_weight[j]+1);
+            fprintf(fp, "\t%s -> %s [label=\"%d\", penwidth=%d]\n", node_label[id], node_label[out_id], abg->node[id].out_edge_weight[j], abg->node[id].out_edge_weight[j]+1);
         }
         if (abg->node[id].aligned_node_n > 0) {
             fprintf(fp, "\t{rank=same; %s ", node_label[id]);
diff --git a/src/abpoa_seed.c b/src/abpoa_seed.c
index 13cbb05..277413e 100644
--- a/src/abpoa_seed.c
+++ b/src/abpoa_seed.c
@@ -231,7 +231,7 @@ void mm_aa_sketch(void *km, const uint8_t *str, int len, int w, int k, uint32_t
 int abpoa_build_guide_tree(abpoa_para_t *abpt, int n_seq, ab_u128_v *mm, int *tree_id_map) {
     if (mm->n == 0) return 0;
 
-    if (abpt->verbose > 0) fprintf(stderr, "[%s] Building progressive guide tree ... ", __func__);
+    if (abpt->verbose >= ABPOA_INFO_VERBOSE) fprintf(stderr, "[%s] Building progressive guide tree ... ", __func__);
     size_t i, _i, j; int rid1, rid2;                                          // mm_hit_n: mimizer hits between each two sequences
                                                                               // 0: 0
                                                                               // 1: 0 1
@@ -319,7 +319,7 @@ int abpoa_build_guide_tree(abpoa_para_t *abpt, int n_seq, ab_u128_v *mm, int *tr
     }
 
     free(mm_hit_n); free(jac_sim);
-    if (abpt->verbose > 0) fprintf(stderr, "done!\n");
+    if (abpt->verbose >= ABPOA_INFO_VERBOSE) fprintf(stderr, "done!\n");
     return 0;
 }
 
@@ -382,7 +382,9 @@ int get_local_chain_score(int j_end_tpos, int j_end_qpos, int i_end_anchor_i, ab
 // local chains:
 //   x: strand | end_tpos | end_qpos
 //   y: end_anchor_i | start_anchor_i
-int abpoa_dp_chaining_of_local_chains(void *km, ab_u128_t *local_chains, int n_local_chains, ab_u64_v *anchors, int *score, int *pre_id, ab_u64_v *par_anchors, int min_w, int tlen, int qlen) {
+int abpoa_dp_chaining_of_local_chains(void *km, ab_u128_t *local_chains, int n_local_chains, 
+                                      ab_u64_v *anchors, int *score, int *pre_id, ab_u64_v *par_anchors, 
+                                      int min_w, int tlen, int qlen, int verbose) {
     int i, j, st, score1, global_max_score=INT32_MIN, global_max_i=-1;
     int *chain_score = (int*)kmalloc(km, n_local_chains * 4), *pre_chain_id = (int*)kmalloc(km, n_local_chains * 4);
     size_t _n = par_anchors->n;
@@ -452,13 +454,13 @@ int abpoa_dp_chaining_of_local_chains(void *km, ab_u128_t *local_chains, int n_l
         par_anchors->a[par_anchors->n-i-1] = tmp;
     }
 
-#ifdef __DEBUG__
-    for (i = _n; i < par_anchors->n; ++i) {
-        uint64_t ia = par_anchors->a[i];
-        // strand, rpos, qpos
-        fprintf(stderr, "%c\t%ld\t%d\n", "+-"[ia >> 63], (ia>>32) & 0x7fffffff, ((uint32_t)ia));
+    if (verbose >= ABPOA_LONG_DEBUG_VERBOSE) {
+        for (i = _n; i < par_anchors->n; ++i) {
+            uint64_t ia = par_anchors->a[i];
+            // strand, rpos, qpos
+            fprintf(stderr, "%c\t%ld\t%d\n", "+-"[ia >> 63], (ia>>32) & 0x7fffffff, ((uint32_t)ia));
+        }
     }
-#endif
     kfree(km, chain_score), kfree(km, pre_chain_id);
     return 0;
 }
@@ -482,7 +484,7 @@ static int get_chain_score(int max_bw, int *score, int i_qpos, int i_tpos, int j
 // Dynamic Programming-based Chaining for global alignment mode
 // anchors:
 //          strand<<63 | tpos<<32 | qpos
-int abpoa_dp_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, abpoa_para_t *abpt, int tlen, int qlen) {
+int abpoa_dp_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, abpoa_para_t *abpt, int tlen, int qlen, int verbose) {
     int i, j, st, n_a = anchors->n;
     int *score = (int32_t*)kmalloc(km, n_a * 4), *pre_id = (int32_t*)kmalloc(km, n_a * 4), *end_pos = (int32_t*)kmalloc(km, n_a * 4);
     memset(end_pos, 0, n_a * 4);
@@ -516,9 +518,7 @@ int abpoa_dp_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, abpoa_
 
             if (pre_id[j] >= 0) end_pos[pre_id[j]] = i;
         }
-#ifdef __DEBUG__
-        fprintf(stderr, "%d pre_id: %d, score: %d, tpos: %d, qpos: %d\n", i, max_j, max_score, i_tpos, i_qpos);
-#endif
+        if (verbose >= ABPOA_LONG_DEBUG_VERBOSE) fprintf(stderr, "%d pre_id: %d, score: %d, tpos: %d, qpos: %d\n", i, max_j, max_score, i_tpos, i_qpos);
         score[i] = max_score, pre_id[i] = max_j;
     }
 
@@ -570,7 +570,8 @@ int abpoa_dp_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, abpoa_
     }
 
     radix_sort_ab_128x(local_chains+tot_chain_i+1, local_chains + n_local_chains);
-    abpoa_dp_chaining_of_local_chains(km, local_chains+tot_chain_i+1, n_local_chains-1-tot_chain_i, anchors, score, pre_id, par_anchors, min_w, tlen, qlen);
+    abpoa_dp_chaining_of_local_chains(km, local_chains+tot_chain_i+1, n_local_chains-1-tot_chain_i, 
+                                      anchors, score, pre_id, par_anchors, min_w, tlen, qlen, abpt->verbose);
 
     kfree(km, score); kfree(km, pre_id); kfree(km, end_pos); kfree(km, local_chains);
     return 0;
@@ -630,7 +631,7 @@ int LIS(void *km, int tot_n, uint64_t *rank, int n) {
 // output:
 //   anchor list size: n
 //   list of anchors: anchors
-int LIS_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, int min_w) {
+int LIS_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, int min_w, int verbose) {
     size_t i, j, n_a = anchors->n, n_for, n_rev;
     uint64_t *for_rank = (uint64_t*)kmalloc(km, sizeof(uint64_t) * n_a);
     uint64_t *rev_rank = (uint64_t*)kmalloc(km, sizeof(uint64_t) * n_a);
@@ -663,10 +664,8 @@ int LIS_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, int min_w)
         n = n_rev; rank = rev_rank; kfree(km, for_rank);
     }
     // filter anchors
-    int last_tpos = -1, last_qpos = -1, cur_tpos, cur_qpos;
-#ifdef __DEBUG__
-    size_t _n = par_anchors->n;
-#endif
+    int last_tpos = -1, last_qpos = -1, cur_tpos, cur_qpos; size_t _n = 0;
+    if (verbose >= ABPOA_LONG_DEBUG_VERBOSE) _n = par_anchors->n;
     for (i = 0; i < n; ++i) {
         j = (int)rank[i]-1;
         cur_tpos = (anchors->a[j] >> 32) & 0x7fffffff;
@@ -677,18 +676,19 @@ int LIS_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, int min_w)
         kv_push(uint64_t, 0, *par_anchors, anchors->a[j]); // store LIS anchors into par_anchors
         last_tpos = cur_tpos; last_qpos = cur_qpos;
     }
-#ifdef __DEBUG__
-    for (i = _n; i < par_anchors->n; ++i) {
-        uint64_t ia = par_anchors->a[i];
-        // strand, rpos, qpos
-        fprintf(stderr, "%c\t%ld\t%d\n", "+-"[ia >> 63], (ia>>32) & 0x7fffffff, ((uint32_t)ia));
+
+    if (verbose >= ABPOA_LONG_DEBUG_VERBOSE) {
+        for (i = _n; i < par_anchors->n; ++i) {
+            uint64_t ia = par_anchors->a[i];
+            // strand, rpos, qpos
+            fprintf(stderr, "%c\t%ld\t%d\n", "+-"[ia >> 63], (ia>>32) & 0x7fffffff, ((uint32_t)ia));
+        }
     }
-#endif
     return 0;
 }
 
 int abpoa_collect_mm(void *km, uint8_t **seqs, int *seq_lens, int n_seq, abpoa_para_t *abpt, ab_u128_v *mm, int *mm_c) {
-    if (abpt->verbose > 0) fprintf(stderr, "[%s] Collecting minimizers ... ", __func__);
+    if (abpt->verbose >= ABPOA_INFO_VERBOSE) fprintf(stderr, "[%s] Collecting minimizers ... ", __func__);
     int i;
     mm_c[0] = 0;
     for (i = 0; i < n_seq; ++i) { // collect minimizers
@@ -696,7 +696,7 @@ int abpoa_collect_mm(void *km, uint8_t **seqs, int *seq_lens, int n_seq, abpoa_p
         else mm_sketch(km, seqs[i], seq_lens[i], abpt->w, abpt->k, i, 0, abpt->amb_strand, mm);
         mm_c[i+1] = mm->n;
     }
-    if (abpt->verbose > 0) fprintf(stderr, "done!\n");
+    if (abpt->verbose >= ABPOA_INFO_VERBOSE) fprintf(stderr, "done!\n");
     return mm->n;
 }
 
@@ -731,11 +731,9 @@ int abpoa_build_guide_tree_partition(uint8_t **seqs, int *seq_lens, int n_seq, a
         // collect minimizer hit anchors between t and q
         collect_anchors1(km, &anchors, mm1, mm_c, tid, qid, seq_lens[qid], abpt->k);
         // filtering and only keep LIS anchors
-#ifdef __DEBUG__
-        fprintf(stderr, "%d vs %d (tot_n: %ld)\n", tid, qid, anchors.n);
-#endif
+        if (abpt->verbose >= ABPOA_LONG_DEBUG_VERBOSE) fprintf(stderr, "%d vs %d (tot_n: %ld)\n", tid, qid, anchors.n);
         // alignment mode: different chaining result for global/local/extend
-        abpoa_dp_chaining(km, &anchors, par_anchors, abpt, seq_lens[tid], seq_lens[qid]);
+        abpoa_dp_chaining(km, &anchors, par_anchors, abpt, seq_lens[tid], seq_lens[qid], abpt->verbose);
         par_c[i] = par_anchors->n;
         kfree(km, anchors.a);
     }
diff --git a/src/simd_abpoa_align.c b/src/simd_abpoa_align.c
index 9c2363c..1db098b 100644
--- a/src/simd_abpoa_align.c
+++ b/src/simd_abpoa_align.c
@@ -35,6 +35,16 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
 #define SIMDTotalBytes 16
 #endif
 
+void simd_output_pre_nodes(int *pre_index, int pre_n, int dp_i, int dp_j, int cur_op, int verbose) {
+    if (verbose >= ABPOA_LONG_DEBUG_VERBOSE) {
+        int k;
+        fprintf(stderr, "%d, %d, %d\t", dp_i, dp_j, cur_op);
+        for (k = 0; k < pre_n; ++k) { /* print all pre node for node i*/
+            fprintf(stderr, "%d, ", pre_index[k]);
+        } fprintf(stderr, "\n");
+    }
+}
+
 #define print_simd(s, str, score_t) {                                \
     int _i; score_t *_a = (score_t*)(s);                             \
     fprintf(stderr, "%s\t", str);                                    \
@@ -43,7 +53,6 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
     } fprintf(stderr, "\n");                                         \
 }
 
-
 #define simd_abpoa_print_lg_matrix(score_t, beg_index, end_index) { \
     for (j = 0; j < end_index-beg_index; ++j) {                     \
         fprintf(stderr, "index: %d\t", j);                          \
@@ -56,7 +65,7 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
 }
 
 #define simd_abpoa_print_ag_matrix(score_t, beg_index, end_index) {  \
-    for (j = beg_index; j < end_index; ++j) {                        \
+    for (j = 0; j < end_index-beg_index; ++j) {                      \
         fprintf(stderr, "index: %d\t", j);                           \
         dp_h = DP_HEF + j * 3 * dp_sn; dp_e1 = dp_h + dp_sn;         \
         _dp_h = (score_t*)dp_h, _dp_e1 = (score_t*)dp_e1;            \
@@ -70,7 +79,7 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
     score_t *_dp_h = (score_t*)dp_h, *_dp_e1 = (score_t*)dp_e1;                                          \
     score_t *_dp_e2 = (score_t*)dp_e2, *_dp_f1 = (score_t*)dp_f1, *_dp_f2 = (score_t*)dp_f2;             \
     fprintf(stderr, "%s\tindex: %d\t", str, index_i);                                                    \
-    for (i = dp_beg[index_i]; i <= (dp_end[index_i]/16+1)*16-1; ++i) {                                   \
+    for (i = dp_beg[index_i]; i <= dp_end[index_i]; ++i) {                                               \
         fprintf(stderr, "%d:(%d,%d,%d,%d,%d)\t", i, _dp_h[i], _dp_e1[i],_dp_e2[i], _dp_f1[i],_dp_f2[i]); \
     } fprintf(stderr, "\n");                                                                             \
 }
@@ -169,7 +178,7 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
             }                                                                                               \
         }                                                                                                   \
         if (hit == 0) err_fatal_simple("Error in lg_backtrack.");                                           \
-     /* fprintf(stderr, "%d, %d, (%d)\n", i, j, indel_first); */                                            \
+        simd_output_pre_nodes(pre_index[i], pre_n[i], i, j, 0, abpt->verbose);                              \
     }                                                                                                       \
     if (j > 0) cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CINS, j, -1, j-1);                         \
     /* reverse cigar */                                                                                     \
@@ -273,7 +282,7 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
             }                                                                                               \
         }                                                                                                   \
         if (hit == 0) err_fatal_simple("Error in ag_backtrack.");                                           \
-     /* fprintf(stderr, "%d, %d, %d (indel_first: %d)\n", i, j, cur_op, indel_first); */                    \
+        simd_output_pre_nodes(pre_index[i], pre_n[i], i, j, cur_op, abpt->verbose);                         \
     }                                                                                                       \
     if (j > 0) cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CINS, j, -1, j-1);                         \
     /* reverse cigar */                                                                                     \
@@ -290,7 +299,7 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
     abpoa_cigar_t *cigar = 0;                                                                               \
     i = best_i, j = best_j, _start_i = best_i, _start_j = best_j;                                           \
     id = abpoa_graph_index_to_node_id(graph, i+beg_index);                                                  \
-    if (best_j < qlen) cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CINS, qlen-j, -1, qlen-1);         \
+    if (best_j < qlen) cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CINS, qlen-best_j, -1, qlen-1);    \
     SIMDi *dp_h = DP_H2E2F + dp_sn * i * 5; _dp_h = (score_t*)dp_h;                                         \
     int indel_first = 1; /* prefer to keep gaps at the end */                                               \
     while (i > 0 && j > 0) {                                                                                \
@@ -305,24 +314,25 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
                 if (j-1 < dp_beg[pre_i] || j-1 > dp_end[pre_i]) continue;                                   \
                 _pre_dp_h = (score_t*)(DP_H2E2F + dp_sn * pre_i * 5);                                       \
                 if (_pre_dp_h[j-1] + s == _dp_h[j]) {                                                       \
-                    cur_op = ABPOA_ALL_OP; hit = 1;                                                         \
                     cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CMATCH, 1, id, j-1);                  \
                     i = pre_i; --j; id = abpoa_graph_index_to_node_id(graph, i+beg_index); hit = 1;         \
                     dp_h = DP_H2E2F + dp_sn * i * 5; _dp_h = (score_t*)dp_h;                                \
+                    cur_op = ABPOA_ALL_OP;                                                                  \
                     ++res->n_aln_bases; res->n_matched_bases += is_match ? 1 : 0;                           \
                     break;                                                                                  \
                 }                                                                                           \
             }                                                                                               \
         }                                                                                                   \
         if (hit == 0 && cur_op & ABPOA_E_OP) { /* deletion */                                               \
+            _dp_e1 = (score_t*)(dp_h + dp_sn); _dp_e2 = (score_t*)(dp_h + dp_sn * 2);                       \
             for (k = 0; k < pre_n[i]; ++k) {                                                                \
                 pre_i = pre_index_i[k];                                                                     \
                 if (j < dp_beg[pre_i] || j > dp_end[pre_i]) continue;                                       \
+                _pre_dp_h = (score_t*)(DP_H2E2F + dp_sn * pre_i * 5);                                       \
                 if (cur_op & ABPOA_E1_OP) {                                                                 \
                     _pre_dp_e1 = (score_t*)(DP_H2E2F + dp_sn * (pre_i * 5 + 1));                            \
                     if (cur_op & ABPOA_M_OP) {                                                              \
                         if (_dp_h[j] == _pre_dp_e1[j]) {                                                    \
-                            _pre_dp_h = (score_t*)(DP_H2E2F + dp_sn * pre_i * 5);                           \
                             if (_pre_dp_h[j] - gap_oe1 == _pre_dp_e1[j]) cur_op = ABPOA_M_OP | ABPOA_F_OP;  \
                             else cur_op = ABPOA_E1_OP;                                                      \
                             hit = 1; cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CDEL, 1, id, j-1);   \
@@ -331,9 +341,7 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
                             break;                                                                          \
                         }                                                                                   \
                     } else {                                                                                \
-                        _dp_e1 = (score_t*)(dp_h + dp_sn);                                                  \
                         if (_dp_e1[j] == _pre_dp_e1[j] - gap_ext1) {                                        \
-                            _pre_dp_h = (score_t*)(DP_H2E2F + dp_sn * pre_i * 5);                           \
                             if (_pre_dp_h[j] - gap_oe1 == _pre_dp_e1[j]) cur_op = ABPOA_M_OP | ABPOA_F_OP;  \
                             else cur_op = ABPOA_E1_OP;                                                      \
                             hit = 1; cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CDEL, 1, id, j-1);   \
@@ -347,7 +355,6 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
                     _pre_dp_e2 = (score_t*)(DP_H2E2F + dp_sn * (pre_i * 5 + 2));                            \
                     if (cur_op & ABPOA_M_OP) {                                                              \
                         if (_dp_h[j] == _pre_dp_e2[j]) {                                                    \
-                            _pre_dp_h = (score_t*)(DP_H2E2F + dp_sn * pre_i * 5);                           \
                             if (_pre_dp_h[j] - gap_oe2 == _pre_dp_e2[j]) cur_op = ABPOA_M_OP | ABPOA_F_OP;  \
                             else cur_op = ABPOA_E2_OP;                                                      \
                             hit = 1; cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CDEL, 1, id, j-1);   \
@@ -356,9 +363,7 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
                             break;                                                                          \
                         }                                                                                   \
                     } else {                                                                                \
-                        _dp_e2 = (score_t*)(dp_h + dp_sn * 2);                                              \
                         if (_dp_e2[j] == _pre_dp_e2[j] - gap_ext2) {                                        \
-                            _pre_dp_h = (score_t*)(DP_H2E2F + dp_sn * pre_i * 5);                           \
                             if (_pre_dp_h[j] - gap_oe2 == _pre_dp_e2[j]) cur_op = ABPOA_M_OP | ABPOA_F_OP;  \
                             else cur_op = ABPOA_E2_OP;                                                      \
                             hit = 1; cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CDEL, 1, id, j-1);   \
@@ -400,16 +405,16 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
                 ++res->n_aln_bases;                                                                         \
             }                                                                                               \
         }                                                                                                   \
-        if (hit == 0 && cur_op & ABPOA_M_OP && indel_first == 1) { /* match/mismatch */                     \
+        if (hit == 0 && (cur_op & ABPOA_M_OP) && (indel_first == 1)) { /* match/mismatch */                 \
             for (k = 0; k < pre_n[i]; ++k) {                                                                \
                 pre_i = pre_index_i[k];                                                                     \
                 if (j-1 < dp_beg[pre_i] || j-1 > dp_end[pre_i]) continue;                                   \
                 _pre_dp_h = (score_t*)(DP_H2E2F + dp_sn * pre_i * 5);                                       \
                 if (_pre_dp_h[j-1] + s == _dp_h[j]) {                                                       \
-                    cur_op = ABPOA_ALL_OP; hit = 1;                                                         \
                     cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CMATCH, 1, id, j-1);                  \
                     i = pre_i; --j; id = abpoa_graph_index_to_node_id(graph, i+beg_index); hit = 1;         \
                     dp_h = DP_H2E2F + dp_sn * i * 5; _dp_h = (score_t*)dp_h;                                \
+                    cur_op = ABPOA_ALL_OP;                                                                  \
                     ++res->n_aln_bases; res->n_matched_bases += is_match ? 1 : 0;                           \
                     indel_first = 0;                                                                        \
                     break;                                                                                  \
@@ -417,7 +422,7 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
             }                                                                                               \
         }                                                                                                   \
         if (hit == 0) err_fatal_simple("Error in cg_backtrack.");                                           \
-     /* fprintf(stderr, "%d, %d, %d\n", i, j, cur_op); */                                                   \
+        simd_output_pre_nodes(pre_index[i], pre_n[i], i, j, cur_op, abpt->verbose);                         \
     }                                                                                                       \
     if (j > 0) cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CINS, j, -1, j-1);                         \
     /* reverse cigar */                                                                                     \
@@ -434,16 +439,16 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
 #define simd_abpoa_var(score_t, sp, SIMDSetOne, SIMDShiftOneN)                                      \
     /* int tot_dp_sn = 0; */                                                                        \
     abpoa_graph_t *graph = ab->abg; abpoa_simd_matrix_t *abm = ab->abm;                             \
-    int matrix_row_n = end_index-beg_index+1, matrix_col_n = qlen + 1;                              \
+    int64_t matrix_row_n = end_index-beg_index+1, matrix_col_n = qlen + 1;                          \
     int **pre_index, *pre_n, _pre_index, _pre_n, pre_i;                                             \
     int i, j, k, *dp_beg, *dp_beg_sn, *dp_end, *dp_end_sn, node_id, index_i, dp_i;                  \
     int beg, end, beg_sn, end_sn, _beg_sn, _end_sn, pre_beg_sn, pre_end, sn_i;                      \
-    int pn, log_n, size, qp_sn, dp_sn; /* pn: value per SIMDi, qp_sn/dp_sn/d_sn: segmented length*/ \
+    int pn, log_n, size; int64_t qp_sn, dp_sn; /* pn: # value per SIMDi, qp_sn/dp_sn: segmented length*/ \
     SIMDi *dp_h, *pre_dp_h, *qp, *qi=NULL;                                                          \
     score_t *_dp_h=NULL, *_qi, best_score = sp.inf_min, inf_min = sp.inf_min;                       \
     int *mat = abpt->mat, m = abpt->m; score_t gap_ext1 = abpt->gap_ext1;                           \
     int w = abpt->wb < 0 ? qlen : abpt->wb+(int)(abpt->wf*qlen); /* when w < 0, do whole global */  \
-    int best_i = 0, best_j = 0, best_id = 0, max, max_i=-1;                                         \
+    int best_i = 0, best_j = 0, best_id = 0, max, left_max_i=-1, right_max_i=-1;                    \
     SIMDi zero = SIMDSetZeroi(), SIMD_INF_MIN = SIMDSetOne(inf_min);                                \
     pn = sp.num_of_value; qp_sn = dp_sn = (matrix_col_n + pn - 1) / pn;                             \
     log_n = sp.log_num, size = sp.size; qp = abm->s_mem;                                            \
@@ -564,7 +569,6 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
         dp_beg[0] = 0, dp_end[0] = qlen;                                                                \
     }                                                                                                   \
     dp_beg_sn[0] = (dp_beg[0])/pn; dp_end_sn[0] = (dp_end[0])/pn;                                       \
-    dp_beg[0] = dp_beg_sn[0] * pn; dp_end[0] = (dp_end_sn[0]+1)*pn-1;                                   \
     dp_h = DP_H; _end_sn = MIN_OF_TWO(dp_end_sn[0]+1, dp_sn-1);                                         \
 }
 
@@ -582,7 +586,6 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
         dp_beg[0] = 0, dp_end[0] = qlen;                                                                \
     }                                                                                                   \
     dp_beg_sn[0] = (dp_beg[0])/pn; dp_end_sn[0] = (dp_end[0])/pn;                                       \
-    dp_beg[0] = dp_beg_sn[0] * pn; dp_end[0] = (dp_end_sn[0]+1)*pn-1;                                   \
     dp_h = DP_HEF; dp_e1 = dp_h + dp_sn; dp_f1 = dp_e1 + dp_sn;                                         \
     _end_sn = MIN_OF_TWO(dp_end_sn[0]+1, dp_sn-1);                                                      \
 }
@@ -601,7 +604,6 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
         dp_beg[0] = 0, dp_end[0] = qlen;                                                                \
     }                                                                                                   \
     dp_beg_sn[0] = (dp_beg[0])/pn; dp_end_sn[0] = (dp_end[0])/pn;                                       \
-    dp_beg[0] = dp_beg_sn[0] * pn; dp_end[0] = (dp_end_sn[0]+1)*pn-1;                                   \
     dp_h = DP_H2E2F; dp_e1 = dp_h+dp_sn; dp_e2 = dp_e1+dp_sn; dp_f1 = dp_e2+dp_sn; dp_f2 = dp_f1+dp_sn; \
     _end_sn = MIN_OF_TWO(dp_end_sn[0]+1, dp_sn-1);                                                      \
 }
@@ -609,7 +611,7 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
 #define simd_abpoa_lg_first_dp(score_t) {                                   \
     simd_abpoa_lg_first_row;                                                \
     if (abpt->align_mode == ABPOA_LOCAL_MODE) {                             \
-        for (i = 0; i <= _end_sn; ++i)                                       \
+        for (i = 0; i <= _end_sn; ++i)                                      \
             dp_h[i] = zero;                                                 \
     } else {                                                                \
         for (i = 0; i <= _end_sn; ++i) {                                    \
@@ -625,7 +627,7 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
 #define simd_abpoa_ag_first_dp(score_t) {                                   \
     simd_abpoa_ag_first_row;                                                \
     if (abpt->align_mode == ABPOA_LOCAL_MODE) {                             \
-        for (i = 0; i <= _end_sn; ++i)                                       \
+        for (i = 0; i <= _end_sn; ++i)                                      \
             dp_h[i] = dp_e1[i] = dp_f1[i] = zero;                           \
     } else {                                                                \
         for (i = 0; i <= _end_sn; ++i) {                                    \
@@ -643,7 +645,7 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
 #define simd_abpoa_cg_first_dp(score_t) {                                             \
     simd_abpoa_cg_first_row;                                                          \
     if (abpt->align_mode == ABPOA_LOCAL_MODE) {                                       \
-        for (i = 0; i <= _end_sn; ++i)                                                 \
+        for (i = 0; i <= _end_sn; ++i)                                                \
             dp_h[i] = dp_e1[i] = dp_e2[i] = dp_f1[i] = dp_f2[i] = zero;               \
     } else {                                                                          \
         for (i = 0; i <= _end_sn; ++i) {                                              \
@@ -698,47 +700,48 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
     }                                                                                                                                                              \
 }
 
-#define simd_abpoa_lg_dp(score_t, SIMDShiftOneN, SIMDMax, SIMDAdd, SIMDSub) {                       \
-    node_id = abpoa_graph_index_to_node_id(graph, index_i);                                         \
-    SIMDi *q = qp + graph->node[node_id].base * qp_sn, first, remain;                               \
-    dp_h = &DP_H[dp_i * dp_sn]; _dp_h = (score_t*)dp_h;                                             \
-    int min_pre_beg_sn, max_pre_end_sn;                                                             \
-    if (abpt->wb < 0) {                                                                             \
-        beg = dp_beg[dp_i] = 0, end = dp_end[dp_i] = qlen;                                          \
-        beg_sn = dp_beg_sn[dp_i] = (dp_beg[dp_i])/pn; end_sn = dp_end_sn[dp_i] = (dp_end[dp_i])/pn; \
-        min_pre_beg_sn = 0, max_pre_end_sn = end_sn;                                                \
-    } else {                                                                                        \
-        beg = GET_AD_DP_BEGIN(graph, w, node_id, end_node_id, qlen), end = GET_AD_DP_END(graph, w, node_id, end_node_id, qlen); \
-        beg_sn = beg / pn; min_pre_beg_sn = INT32_MAX, max_pre_end_sn = -1;                         \
-        for (i = 0; i < pre_n[dp_i]; ++i) {                                                         \
-            pre_i = pre_index[dp_i][i];                                                             \
-            if (min_pre_beg_sn > dp_beg_sn[pre_i]) min_pre_beg_sn = dp_beg_sn[pre_i];               \
-            if (max_pre_end_sn < dp_end_sn[pre_i]) max_pre_end_sn = dp_end_sn[pre_i];               \
-        } if (beg_sn < min_pre_beg_sn) beg_sn = min_pre_beg_sn;                                     \
-        dp_beg_sn[dp_i] = beg_sn; beg = dp_beg[dp_i] = dp_beg_sn[dp_i] * pn;                        \
-        end_sn = dp_end_sn[dp_i] = end/pn; end = dp_end[dp_i] = (dp_end_sn[dp_i]+1)*pn-1;           \
-    }                                                                                               \
-    /* loop query */                                                                                                         \
-    /* first pre_node */                                                                                                     \
-    pre_i = pre_index[dp_i][0];                                                                                              \
-    pre_dp_h = DP_H + pre_i * dp_sn;                                                                                         \
-    pre_end = dp_end[pre_i];                                                                                                 \
-    pre_beg_sn = dp_beg_sn[pre_i];                                                                                           \
-    /* set M from (pre_i, q_i-1), E from (pre_i, q_i) */                                                                     \
-    if (abpt->align_mode == ABPOA_LOCAL_MODE) {                                                                              \
-        _beg_sn = 0, _end_sn = end_sn; first = SIMDShiftRight(zero, SIMDTotalBytes-SIMDShiftOneN);                           \
-    } else {                                                                                                                 \
-        if (pre_beg_sn < beg_sn) _beg_sn = beg_sn, first = SIMDShiftRight(pre_dp_h[beg_sn-1], SIMDTotalBytes-SIMDShiftOneN); \
-        else _beg_sn = pre_beg_sn, first = SIMDShiftRight(SIMD_INF_MIN, SIMDTotalBytes-SIMDShiftOneN);                       \
-        _end_sn = MIN_OF_THREE((pre_end+1)/pn, end_sn, dp_sn-1);                                                             \
-        for (i = beg_sn; i < _beg_sn; ++i) dp_h[i] = SIMD_INF_MIN;                                                           \
-        for (i = _end_sn+1; i <= MIN_OF_TWO(end_sn+1, dp_sn-1); ++i) dp_h[i] = SIMD_INF_MIN;                                 \
-    }                                                                                                                        \
-    for (sn_i = _beg_sn; sn_i <= _end_sn; ++sn_i) { /* SIMD parallelization */                                               \
-        remain = SIMDShiftLeft(pre_dp_h[sn_i], SIMDShiftOneN);                                                               \
-        dp_h[sn_i] = SIMDMax(SIMDAdd(SIMDOri(first, remain), q[sn_i]), SIMDSub(pre_dp_h[sn_i], GAP_E1));                     \
-        first = SIMDShiftRight(pre_dp_h[sn_i], SIMDTotalBytes-SIMDShiftOneN);                                                \
-    }                                                                                                                        \
+#define simd_abpoa_lg_dp(score_t, SIMDShiftOneN, SIMDMax, SIMDAdd, SIMDSub) {                                                     \
+    node_id = abpoa_graph_index_to_node_id(graph, index_i);                                                                       \
+    SIMDi *q = qp + graph->node[node_id].base * qp_sn, first, remain;                                                             \
+    dp_h = &DP_H[dp_i * dp_sn]; _dp_h = (score_t*)dp_h;                                                                           \
+    int min_pre_beg, min_pre_beg_sn, max_pre_end_sn;                                                                              \
+    if (abpt->wb < 0) {                                                                                                           \
+        beg = dp_beg[dp_i] = 0, end = dp_end[dp_i] = qlen;                                                                        \
+        beg_sn = dp_beg_sn[dp_i] = (dp_beg[dp_i])/pn; end_sn = dp_end_sn[dp_i] = (dp_end[dp_i])/pn;                               \
+        min_pre_beg_sn = 0, max_pre_end_sn = end_sn;                                                                              \
+    } else {                                                                                                                      \
+        beg = GET_AD_DP_BEGIN(graph, w, node_id, end_node_id, qlen), end = GET_AD_DP_END(graph, w, node_id, end_node_id, qlen);   \
+        beg_sn = beg / pn; min_pre_beg = min_pre_beg_sn = INT32_MAX, max_pre_end_sn = -1;                                         \
+        for (i = 0; i < pre_n[dp_i]; ++i) {                                                                                       \
+            pre_i = pre_index[dp_i][i];                                                                                           \
+            if (min_pre_beg > dp_beg[pre_i]) min_pre_beg = dp_beg[pre_i], min_pre_beg_sn = dp_beg_sn[pre_i];                      \
+            if (max_pre_end_sn < dp_end_sn[pre_i]) max_pre_end_sn = dp_end_sn[pre_i];                                             \
+        }                                                                                                                         \
+        if (beg_sn < min_pre_beg_sn) {                                                                                            \
+            beg = min_pre_beg; beg_sn = min_pre_beg_sn;                                                                           \
+        }                                                                                                                         \
+        dp_beg_sn[dp_i] = beg_sn; dp_beg[dp_i] = beg; end_sn = dp_end_sn[dp_i] = end/pn; dp_end[dp_i] = end;                      \
+    }                                                                                                                             \
+    /* loop query */                                                                                                              \
+    /* first pre_node */                                                                                                          \
+    pre_i = pre_index[dp_i][0];                                                                                                   \
+    pre_dp_h = DP_H + pre_i * dp_sn;                                                                                              \
+    pre_end = dp_end[pre_i]; pre_beg_sn = dp_beg_sn[pre_i];                                                                       \
+    /* set M from (pre_i, q_i-1), E from (pre_i, q_i) */                                                                          \
+    if (abpt->align_mode == ABPOA_LOCAL_MODE) {                                                                                   \
+        _beg_sn = 0, _end_sn = end_sn; first = SIMDShiftRight(zero, SIMDTotalBytes-SIMDShiftOneN);                                \
+    } else {                                                                                                                      \
+        if (pre_beg_sn < beg_sn) _beg_sn = beg_sn, first = SIMDShiftRight(pre_dp_h[beg_sn-1], SIMDTotalBytes-SIMDShiftOneN);      \
+        else _beg_sn = pre_beg_sn, first = SIMDShiftRight(SIMD_INF_MIN, SIMDTotalBytes-SIMDShiftOneN);                            \
+        _end_sn = MIN_OF_THREE((pre_end+1)/pn, end_sn, dp_sn-1);                                                                  \
+        for (i = beg_sn; i < _beg_sn; ++i) dp_h[i] = SIMD_INF_MIN;                                                                \
+        for (i = _end_sn+1; i <= MIN_OF_TWO(end_sn+1, dp_sn-1); ++i) dp_h[i] = SIMD_INF_MIN;                                      \
+    }                                                                                                                             \
+    for (sn_i = _beg_sn; sn_i <= _end_sn; ++sn_i) { /* SIMD parallelization */                                                    \
+        remain = SIMDShiftLeft(pre_dp_h[sn_i], SIMDShiftOneN);                                                                    \
+        dp_h[sn_i] = SIMDMax(SIMDAdd(SIMDOri(first, remain), q[sn_i]), SIMDSub(pre_dp_h[sn_i], GAP_E1));                          \
+        first = SIMDShiftRight(pre_dp_h[sn_i], SIMDTotalBytes-SIMDShiftOneN);                                                     \
+    }                                                                                                                             \
     /* get max m and e */                                                                                                         \
     for (i = 1; i < pre_n[dp_i]; ++i) {                                                                                           \
         pre_i = pre_index[dp_i][i];                                                                                               \
@@ -759,23 +762,26 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
             first = SIMDShiftRight(pre_dp_h[sn_i], SIMDTotalBytes-SIMDShiftOneN);                                                 \
         } /* now we have max(h,e) stored at dp_h */                                                                               \
     }                                                                                                                             \
-    /* new F start */                                                                                                                  \
-    first = SIMDOri(SIMDAndi(dp_h[beg_sn], PRE_MASK[0]), SUF_MIN[0]);                                                                  \
-    for (sn_i = beg_sn; sn_i <= end_sn; ++sn_i) {                                                                                      \
-        if (abpt->align_mode == ABPOA_LOCAL_MODE) {                                                                                    \
-            set_num = pn;                                                                                                              \
-        } else {                                                                                                                       \
-            if (sn_i < min_pre_beg_sn) {                                                                                               \
-                _err_fatal_simple(__func__, "sn_i < min_pre_beg_sn\n");                                                                \
-            } else if (sn_i > max_pre_end_sn) {                                                                                        \
-                set_num = sn_i == max_pre_end_sn+1 ? 1 : 0;                                                                            \
-            } else set_num = pn;                                                                                                       \
-        }                                                                                                                              \
-        dp_h[sn_i] = SIMDMax(dp_h[sn_i], first);                                                                                       \
-        SIMD_SET_F(dp_h[sn_i], log_n, set_num, PRE_MIN, PRE_MASK, SUF_MIN, GAP_E1S, SIMDMax, SIMDAdd, SIMDSub, SIMDShiftOneN);         \
+    /* new F start */                                                                                                             \
+    for (i = beg_sn * pn; i < beg; ++i) _dp_h[i] = inf_min;                                                                       \
+    for (i = end+1; i < (end_sn+1)*pn; ++i) _dp_h[i] = inf_min;                                                                   \
+    first = SIMDOri(SIMDAndi(dp_h[beg_sn], PRE_MASK[0]), SUF_MIN[0]);                                                             \
+    for (sn_i = beg_sn; sn_i <= end_sn; ++sn_i) {                                                                                 \
+        if (abpt->align_mode == ABPOA_LOCAL_MODE) {                                                                               \
+            set_num = pn;                                                                                                         \
+        } else {                                                                                                                  \
+            if (sn_i < min_pre_beg_sn) {                                                                                          \
+                _err_fatal_simple(__func__, "sn_i < min_pre_beg_sn\n");                                                           \
+            } else if (sn_i > max_pre_end_sn) {                                                                                   \
+                set_num = sn_i == max_pre_end_sn+1 ? 1 : 0;                                                                       \
+            } else set_num = pn;                                                                                                  \
+        }                                                                                                                         \
+        dp_h[sn_i] = SIMDMax(dp_h[sn_i], first);                                                                                  \
+        if (sn_i == end_sn) for (i = end+1; i < (end_sn+1)*pn; ++i) _dp_h[i] = inf_min;                                           \
+        SIMD_SET_F(dp_h[sn_i], log_n, set_num, PRE_MIN, PRE_MASK, SUF_MIN, GAP_E1S, SIMDMax, SIMDAdd, SIMDSub, SIMDShiftOneN);    \
         first = SIMDOri(SIMDAndi(SIMDShiftRight(SIMDSub(dp_h[sn_i], GAP_E1), SIMDTotalBytes-SIMDShiftOneN), PRE_MASK[0]), SUF_MIN[0]); \
-    }                                                                                                                                  \
-    if (abpt->align_mode == ABPOA_LOCAL_MODE) for (sn_i = 0; sn_i <= end_sn; ++sn_i) dp_h[sn_i] = SIMDMax(zero, dp_h[sn_i]);           \
+    }                                                                                                                             \
+    if (abpt->align_mode == ABPOA_LOCAL_MODE) for (sn_i = 0; sn_i <= end_sn; ++sn_i) dp_h[sn_i] = SIMDMax(zero, dp_h[sn_i]);      \
 }
 
 #define simd_abpoa_ag_dp(score_t, SIMDShiftOneN, SIMDMax, SIMDAdd, SIMDSub, SIMDGetIfGreater, SIMDSetIfGreater, SIMDSetIfEqual) { \
@@ -783,21 +789,23 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
     SIMDi *q = qp + graph->node[node_id].base * qp_sn, first, remain;                                                             \
     dp_h = DP_HEF + dp_i * 3 * dp_sn; dp_e1 = dp_h + dp_sn; dp_f1 = dp_e1 + dp_sn;                                                \
     _dp_h = (score_t*)dp_h, _dp_e1 = (score_t*)dp_e1, _dp_f1 = (score_t*)dp_f1;                                                   \
-    int min_pre_beg_sn, max_pre_end_sn;                                                                                           \
+    int min_pre_beg, min_pre_beg_sn, max_pre_end_sn;                                                                              \
     if (abpt->wb < 0) {                                                                                                           \
         beg = dp_beg[dp_i] = 0, end = dp_end[dp_i] = qlen;                                                                        \
         beg_sn = dp_beg_sn[dp_i] = (dp_beg[dp_i])/pn; end_sn = dp_end_sn[dp_i] = (dp_end[dp_i])/pn;                               \
         min_pre_beg_sn = 0, max_pre_end_sn = end_sn;                                                                              \
     } else {                                                                                                                      \
         beg = GET_AD_DP_BEGIN(graph, w, node_id, end_node_id, qlen), end = GET_AD_DP_END(graph, w, node_id, end_node_id, qlen);   \
-        beg_sn = beg / pn; min_pre_beg_sn = INT32_MAX, max_pre_end_sn = -1;                                                       \
+        beg_sn = beg / pn; min_pre_beg = min_pre_beg_sn = INT32_MAX, max_pre_end_sn = -1;                                         \
         for (i = 0; i < pre_n[dp_i]; ++i) {                                                                                       \
             pre_i = pre_index[dp_i][i];                                                                                           \
-            if (min_pre_beg_sn > dp_beg_sn[pre_i]) min_pre_beg_sn = dp_beg_sn[pre_i];                                             \
+            if (min_pre_beg > dp_beg[pre_i]) min_pre_beg = dp_beg[pre_i], min_pre_beg_sn = dp_beg_sn[pre_i];                      \
             if (max_pre_end_sn < dp_end_sn[pre_i]) max_pre_end_sn = dp_end_sn[pre_i];                                             \
-        } if (beg_sn < min_pre_beg_sn) beg_sn = min_pre_beg_sn;                                                                   \
-        dp_beg_sn[dp_i] = beg_sn; beg = dp_beg[dp_i] = dp_beg_sn[dp_i] * pn;                                                      \
-        end_sn = dp_end_sn[dp_i] = end/pn; end = dp_end[dp_i] = (dp_end_sn[dp_i]+1)*pn-1;                                         \
+        }                                                                                                                         \
+        if (beg_sn < min_pre_beg_sn) {                                                                                            \
+            beg_sn = min_pre_beg_sn; beg = min_pre_beg;                                                                           \
+        }                                                                                                                         \
+        dp_beg_sn[dp_i] = beg_sn; dp_beg[dp_i] = beg; end_sn = dp_end_sn[dp_i] = end/pn; dp_end[dp_i] = end;                      \
     }                                                                                                                             \
     /* loop query */                                                                                                              \
     /* first pre_node */                                                                                                          \
@@ -854,6 +862,8 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
     for (sn_i = beg_sn; sn_i <= end_sn; ++sn_i) { /* SIMD parallelization */                                                      \
         dp_h[sn_i] = SIMDAdd(dp_h[sn_i], q[sn_i]);                                                                                \
     }                                                                                                                             \
+    for (i = beg_sn * pn; i < beg; ++i) _dp_h[i] = _dp_e1[i] = inf_min;                                                           \
+    for (i = end+1; i < (end_sn+1)*pn; ++i) _dp_h[i] = _dp_e1[i] = inf_min;                                                       \
     /* new F start */                                                                                                             \
     first = SIMDShiftRight(SIMDShiftLeft(dp_h[beg_sn], SIMDTotalBytes-SIMDShiftOneN), SIMDTotalBytes-SIMDShiftOneN);              \
     for (sn_i = beg_sn; sn_i <= end_sn; ++sn_i) {                                                                                 \
@@ -876,9 +886,11 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
         dp_h[sn_i] = SIMDMax(dp_h[sn_i], dp_e1[sn_i]); SIMDi tmp = dp_h[sn_i];                                                    \
         if (abpt->align_mode == ABPOA_LOCAL_MODE) {                                                                               \
             dp_h[sn_i] = SIMDMax(zero, SIMDMax(dp_h[sn_i], dp_f1[sn_i]));                                                         \
+            if (sn_i == end_sn) for (i = end+1; i < (end_sn+1)*pn; ++i) _dp_h[i] = _dp_e1[i] = inf_min;                           \
             SIMDSetIfEqual(dp_e1[sn_i], dp_h[sn_i],tmp, SIMDMax(SIMDSub(dp_e1[sn_i],GAP_E1), SIMDSub(dp_h[sn_i],GAP_OE1)),zero);  \
         } else {                                                                                                                  \
             dp_h[sn_i] = SIMDMax(dp_h[sn_i], dp_f1[sn_i]);                                                                        \
+            if (sn_i == end_sn) for (i = end+1; i < (end_sn+1)*pn; ++i) _dp_h[i] = _dp_e1[i] = inf_min;                           \
             SIMDSetIfEqual(dp_e1[sn_i], dp_h[sn_i],tmp, SIMDMax(SIMDSub(dp_e1[sn_i],GAP_E1), SIMDSub(dp_h[sn_i],GAP_OE1)),SIMD_INF_MIN); \
         }                                                                                                                         \
     }                                                                                                                             \
@@ -889,25 +901,31 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
     SIMDi *q = qp + graph->node[node_id].base * qp_sn, first, remain;                                                             \
     dp_h = DP_H2E2F+dp_i*5*dp_sn; dp_e1 = dp_h+dp_sn; dp_e2 = dp_e1+dp_sn; dp_f1 = dp_e2+dp_sn; dp_f2 = dp_f1+dp_sn;              \
     _dp_h=(score_t*)dp_h, _dp_e1=(score_t*)dp_e1, _dp_e2=(score_t*)dp_e2, _dp_f1=(score_t*)dp_f1, _dp_f2=(score_t*)dp_f2;         \
-    int min_pre_beg_sn, max_pre_end_sn;                                                                                           \
+    int min_pre_beg, min_pre_beg_sn, max_pre_end_sn;                                                                              \
     if (abpt->wb < 0) {                                                                                                           \
         beg = dp_beg[dp_i] = 0, end = dp_end[dp_i] = qlen;                                                                        \
         beg_sn = dp_beg_sn[dp_i] = beg/pn; end_sn = dp_end_sn[dp_i] = end/pn;                                                     \
         min_pre_beg_sn = 0, max_pre_end_sn = end_sn;                                                                              \
     } else {                                                                                                                      \
         beg = GET_AD_DP_BEGIN(graph, w, node_id, end_node_id, qlen), end = GET_AD_DP_END(graph, w, node_id, end_node_id, qlen);   \
-        beg_sn = beg / pn; min_pre_beg_sn = INT32_MAX, max_pre_end_sn = -1;                                                       \
+        if (abpt->verbose >= ABPOA_LONG_DEBUG_VERBOSE)                                                                            \
+            fprintf(stderr, "index: %d (node: %d): raw beg: %d, end: %d\n", index_i-beg_index, node_id, beg, end);                \
+        beg_sn = beg / pn; min_pre_beg = min_pre_beg_sn = INT32_MAX, max_pre_end_sn = -1;                                         \
         for (i = 0; i < pre_n[dp_i]; ++i) {                                                                                       \
             pre_i = pre_index[dp_i][i];                                                                                           \
-            if (min_pre_beg_sn > dp_beg_sn[pre_i]) min_pre_beg_sn = dp_beg_sn[pre_i];                                             \
+            if (abpt->verbose >= ABPOA_LONG_DEBUG_VERBOSE)                                                                        \
+                fprintf(stderr, "\tpre_i: %d, pre_dp_beg: %d, pre_dp_beg_sn: %d\n", pre_i, dp_beg[pre_i], dp_beg_sn[pre_i]);      \
+            if (min_pre_beg > dp_beg[pre_i]) min_pre_beg = dp_beg[pre_i], min_pre_beg_sn = dp_beg_sn[pre_i];                      \
             if (max_pre_end_sn < dp_end_sn[pre_i]) max_pre_end_sn = dp_end_sn[pre_i];                                             \
-        } if (beg_sn < min_pre_beg_sn) beg_sn = min_pre_beg_sn;                                                                   \
-        dp_beg_sn[dp_i] = beg_sn; beg = dp_beg[dp_i] = dp_beg_sn[dp_i] * pn;                                                      \
-        end_sn = dp_end_sn[dp_i] = end/pn; end = dp_end[dp_i] = (dp_end_sn[dp_i]+1)*pn-1;                                         \
-     /* fprintf(stderr, "index: %d, beg: %d, end: %d, beg_sn: %d, end_sn: %d\n", index_i, beg, end, beg_sn, end_sn); */           \
+        }                                                                                                                         \
+        if (beg_sn < min_pre_beg_sn) {                                                                                            \
+            assert(beg < min_pre_beg); beg = min_pre_beg; beg_sn = min_pre_beg_sn;                                                \
+        }                                                                                                                         \
+        dp_beg_sn[dp_i] = beg_sn; dp_beg[dp_i] = beg; end_sn = dp_end_sn[dp_i] = end/pn; dp_end[dp_i] = end;                      \
+        if (abpt->verbose >= ABPOA_LONG_DEBUG_VERBOSE) {                                                                          \
+            fprintf(stderr, "index: %d (node: %d): beg: %d, end: %d, beg_sn: %d, end_sn: %d, max_left: %d, max_right: %d\n", index_i-beg_index, node_id, beg, end, beg_sn, end_sn, abpoa_graph_node_id_to_max_pos_left(graph, node_id), abpoa_graph_node_id_to_max_pos_right(graph, node_id)); \
+        }                                                                                                                         \
     }                                                                                                                             \
- /* fprintf(stderr, "%d: beg, end: %d, %d\n", index_i, beg, end); */                                                              \
-    /* tot_dp_sn += (end_sn - beg_sn + 1); */                                                                                     \
     /* loop query */                                                                                                              \
     /* first pre_node */                                                                                                          \
     pre_i = pre_index[dp_i][0];                                                                                                   \
@@ -968,10 +986,12 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
         }                                                                                                                         \
     }                                                                                                                             \
     /* compare M, E, and F */                                                                                                     \
- /* fprintf(stderr, "5 index_i: %d, beg_sn: %d, end_sn: %d\n", index_i, _beg_sn, _end_sn); */                                     \
+ /* fprintf(stderr, "5 index_i: %d, beg_sn: %d, end_sn: %d\n", index_i, beg_sn, end_sn); */                                       \
     for (sn_i = beg_sn; sn_i <= end_sn; ++sn_i) { /* SIMD parallelization */                                                      \
         dp_h[sn_i] = SIMDAdd(dp_h[sn_i], q[sn_i]);                                                                                \
     }                                                                                                                             \
+    for (i = beg_sn * pn; i < beg; ++i) _dp_h[i] = _dp_e1[i] = _dp_e2[i] = inf_min;                                               \
+    for (i = end+1; i < (end_sn+1)*pn; ++i) _dp_h[i] = _dp_e1[i] = _dp_e2[i] = inf_min;                                           \
     /* new F start */                                                                                                             \
     first = SIMDShiftRight(SIMDShiftLeft(dp_h[beg_sn], SIMDTotalBytes-SIMDShiftOneN), SIMDTotalBytes-SIMDShiftOneN);              \
     SIMDi first2 = first;                                                                                                         \
@@ -997,11 +1017,13 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
         first2 = SIMDShiftRight(SIMDMax(dp_h[sn_i], SIMDAdd(dp_f2[sn_i], GAP_O2)), SIMDTotalBytes-SIMDShiftOneN);                 \
         if (abpt->align_mode == ABPOA_LOCAL_MODE) {                                                                               \
             dp_h[sn_i] = SIMDMax(zero, SIMDMax(dp_h[sn_i], SIMDMax(dp_f1[sn_i], dp_f2[sn_i])));                                   \
+            if (sn_i == end_sn) for (i = end+1; i < (end_sn+1)*pn; ++i) _dp_h[i] = _dp_e1[i] = _dp_e2[i] = inf_min;               \
             dp_e1[sn_i] = SIMDMax(zero,SIMDMax(SIMDSub(dp_e1[sn_i],GAP_E1),SIMDSub(dp_h[sn_i],GAP_OE1)));                         \
             dp_e2[sn_i] = SIMDMax(zero,SIMDMax(SIMDSub(dp_e2[sn_i],GAP_E2),SIMDSub(dp_h[sn_i],GAP_OE2)));                         \
         } else {                                                                                                                  \
             /* H = max{H, F}    */                                                                                                \
             dp_h[sn_i] = SIMDMax(dp_h[sn_i], SIMDMax(dp_f1[sn_i], dp_f2[sn_i]));                                                  \
+            if (sn_i == end_sn) for (i = end+1; i < (end_sn+1)*pn; ++i) _dp_h[i] = _dp_e1[i] = _dp_e2[i] = inf_min;               \
             /* e for next cell */                                                                                                 \
             dp_e1[sn_i] = SIMDMax(SIMDSub(dp_e1[sn_i],GAP_E1),SIMDSub(dp_h[sn_i],GAP_OE1));                                       \
             dp_e2[sn_i] = SIMDMax(SIMDSub(dp_e2[sn_i],GAP_E2),SIMDSub(dp_h[sn_i],GAP_OE2));                                       \
@@ -1042,28 +1064,27 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
 
 #define simd_abpoa_max_in_row(score_t, SIMDSetIfGreater, SIMDGetIfGreater) { \
     /* select max dp_h */                                                    \
-    max = inf_min, max_i = -1;                                               \
-    SIMDi a = dp_h[end_sn], b = qi[end_sn];                                  \
-    if (end_sn == qlen / pn) SIMDSetIfGreater(a, zero, b, SIMD_INF_MIN, a);  \
-    for (i = beg_sn; i < end_sn; ++i) {                                      \
-        SIMDGetIfGreater(b, a, dp_h[i], a, qi[i], b);                        \
-    }                                                                        \
-    _dp_h = (score_t*)&a, _qi = (score_t*)&b;                                \
-    for (i = 0; i < pn; ++i) {                                               \
+    max = inf_min, left_max_i = right_max_i = -1;                            \
+    _dp_h = (score_t*)dp_h, _qi = (score_t*)qi;                              \
+    for (i = beg; i <= end; ++i) {                                           \
         if (_dp_h[i] > max) {                                                \
-            max = _dp_h[i]; max_i = _qi[i];                                  \
+            max = _dp_h[i];                                                  \
+            left_max_i = right_max_i = _qi[i];                               \
+        } else if (_dp_h[i] == max) {                                        \
+            right_max_i = _qi[i];                                            \
         }                                                                    \
     }                                                                        \
 }
 
-#define simd_abpoa_ada_max_i   {                                                                                        \
-    /* set max_pos_left/right for next nodes */                                                                         \
-    int out_i = max_i + 1;                                                                                              \
-    for (i = 0; i < graph->node[node_id].out_edge_n; ++i) {                                                             \
-        int out_node_id = graph->node[node_id].out_id[i];                                                               \
-        if (out_i > graph->node_id_to_max_pos_right[out_node_id]) graph->node_id_to_max_pos_right[out_node_id] = out_i; \
-        if (out_i < graph->node_id_to_max_pos_left[out_node_id]) graph->node_id_to_max_pos_left[out_node_id] = out_i;   \
-    }                                                                                                                   \
+#define simd_abpoa_ada_max_i   {                                             \
+    /* set max_pos_left/right for next nodes */                              \
+    for (i = 0; i < graph->node[node_id].out_edge_n; ++i) {                  \
+        int out_node_id = graph->node[node_id].out_id[i];                    \
+        if (right_max_i+1 > graph->node_id_to_max_pos_right[out_node_id])    \
+            graph->node_id_to_max_pos_right[out_node_id] = right_max_i+1;    \
+        if (left_max_i+1 < graph->node_id_to_max_pos_left[out_node_id])      \
+            graph->node_id_to_max_pos_left[out_node_id] = left_max_i+1;      \
+    }                                                                        \
 }
 
 // TODO end_bonus for extension
@@ -1077,11 +1098,11 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
         simd_abpoa_lg_dp(score_t, SIMDShiftOneN, SIMDMax, SIMDAdd, SIMDSub);                    \
         if (abpt->align_mode == ABPOA_LOCAL_MODE) {                                             \
             simd_abpoa_max_in_row(score_t, SIMDSetIfGreater, SIMDGetIfGreater);                 \
-            set_global_max_score(max, dp_i, max_i);                                             \
+            set_global_max_score(max, dp_i, left_max_i);                                        \
         }                                                                                       \
         if (abpt->align_mode == ABPOA_EXTEND_MODE) {                                            \
             simd_abpoa_max_in_row(score_t, SIMDSetIfGreater, SIMDGetIfGreater);                 \
-            set_extend_max_score(max, dp_i, max_i);                                             \
+            set_extend_max_score(max, dp_i, right_max_i);                                       \
         }                                                                                       \
         if (abpt->wb >= 0) {                                                                    \
             if (abpt->align_mode == ABPOA_GLOBAL_MODE) {                                        \
@@ -1092,7 +1113,11 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
     }                                                                                           \
     if (abpt->align_mode == ABPOA_GLOBAL_MODE) simd_abpoa_global_get_max(score_t, DP_H, dp_sn); \
     res->best_score = best_score;                                                               \
- /* simd_abpoa_print_lg_matrix(score_t, beg_index, end_index); printf("best_score: (%d, %d) -> %d\n", best_i, best_j, best_score); */                          \
+    if (abpt->verbose >= ABPOA_DEBUG_VERBOSE) {                                                 \
+        if (abpt->verbose >= ABPOA_LONG_DEBUG_VERBOSE)                                          \
+            simd_abpoa_print_lg_matrix(score_t, beg_index, end_index);                          \
+        fprintf(stderr, "best_score: (%d, %d) -> %d\n", best_i, best_j, best_score);            \
+    }                                                                                           \
     if (abpt->ret_cigar) simd_abpoa_lg_backtrack(score_t);                                      \
     simd_abpoa_free_var; SIMDFree(GAP_E1S);                                                     \
 }
@@ -1107,10 +1132,10 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
         simd_abpoa_ag_dp(score_t, SIMDShiftOneN, SIMDMax, SIMDAdd, SIMDSub, SIMDGetIfGreater, SIMDSetIfGreater, SIMDSetIfEqual); \
         if (abpt->align_mode == ABPOA_LOCAL_MODE) {                                                 \
             simd_abpoa_max_in_row(score_t, SIMDSetIfGreater, SIMDGetIfGreater);                     \
-            set_global_max_score(max, dp_i, max_i);                                                 \
+            set_global_max_score(max, dp_i, left_max_i);                                            \
         } else if (abpt->align_mode == ABPOA_EXTEND_MODE) {                                         \
             simd_abpoa_max_in_row(score_t, SIMDSetIfGreater, SIMDGetIfGreater);                     \
-            set_extend_max_score(max, dp_i, max_i);                                                 \
+            set_extend_max_score(max, dp_i, right_max_i);                                           \
         }                                                                                           \
         if (abpt->wb >= 0) {                                                                        \
             if (abpt->align_mode == ABPOA_GLOBAL_MODE) {                                            \
@@ -1121,7 +1146,11 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
     }                                                                                               \
     if (abpt->align_mode == ABPOA_GLOBAL_MODE) simd_abpoa_global_get_max(score_t, DP_HEF, 3*dp_sn); \
     res->best_score = best_score;                                                                   \
- /* simd_abpoa_print_ag_matrix(score_t, beg_index, end_index); fprintf(stderr, "best_score: (%d, %d) -> %d\n", best_i, best_j, best_score); */ \
+    if (abpt->verbose >= ABPOA_DEBUG_VERBOSE) {                                                     \
+        if (abpt->verbose >= ABPOA_LONG_DEBUG_VERBOSE)                                              \
+            simd_abpoa_print_ag_matrix(score_t, beg_index, end_index);                              \
+        fprintf(stderr, "best_score: (%d, %d) -> %d\n", best_i, best_j, best_score);                \
+    }                                                                                               \
     if (abpt->ret_cigar) simd_abpoa_ag_backtrack(score_t);                                          \
     simd_abpoa_free_var; SIMDFree(GAP_E1S);                                                         \
 }
@@ -1136,10 +1165,10 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
         simd_abpoa_cg_dp(score_t, SIMDShiftOneN, SIMDMax, SIMDAdd, SIMDSub, SIMDGetIfGreater, SIMDSetIfGreater, SIMDSetIfEqual); \
         if (abpt->align_mode == ABPOA_LOCAL_MODE) {                                                   \
             simd_abpoa_max_in_row(score_t, SIMDSetIfGreater, SIMDGetIfGreater);                       \
-            set_global_max_score(max, dp_i, max_i);                                                   \
+            set_global_max_score(max, dp_i, left_max_i);                                              \
         } else if (abpt->align_mode == ABPOA_EXTEND_MODE) {                                           \
             simd_abpoa_max_in_row(score_t, SIMDSetIfGreater, SIMDGetIfGreater);                       \
-            set_extend_max_score(max, dp_i, max_i);                                                   \
+            set_extend_max_score(max, dp_i, right_max_i);                                             \
         }                                                                                             \
         if (abpt->wb >= 0) {                                                                          \
             if (abpt->align_mode == ABPOA_GLOBAL_MODE) {                                              \
@@ -1148,11 +1177,13 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
             simd_abpoa_ada_max_i;                                                                     \
         }                                                                                             \
     }                                                                                                 \
- /* printf("dp_sn: %d\n", tot_dp_sn); */                                                              \
     if (abpt->align_mode == ABPOA_GLOBAL_MODE) simd_abpoa_global_get_max(score_t, DP_H2E2F, 5*dp_sn); \
     res->best_score = best_score;                                                                     \
- /* simd_abpoa_print_cg_matrix(score_t, beg_index, end_index); */                                     \
- /* fprintf(stderr,"best_score: (%d, %d) -> %d\n",best_i,best_j,best_score); */                       \
+    if (abpt->verbose >= ABPOA_DEBUG_VERBOSE) {                                                       \
+        if (abpt->verbose >= ABPOA_LONG_DEBUG_VERBOSE)                                                \
+            simd_abpoa_print_cg_matrix(score_t, beg_index, end_index);                                \
+        fprintf(stderr,"best_score: (%d, %d) -> %d\n",best_i, best_j, best_score);                    \
+    }                                                                                                 \
     if (abpt->ret_cigar) simd_abpoa_cg_backtrack(score_t);                                            \
     simd_abpoa_free_var; SIMDFree(GAP_E1S); SIMDFree(GAP_E2S);                                        \
 }
@@ -1191,7 +1222,8 @@ int simd_abpoa_realloc(abpoa_t *ab, int gn, int qlen, abpoa_para_t *abpt, SIMD_p
         // err_func_format_printf(__func__, "Warning: Graph is too large or query is too long.\n");
         // return 1;
     // }
-    // fprintf(stderr, "%lld, %lld, %lld\n", (long long)gn, (long long)ab->abm->s_msize, (long long)s_msize);
+    if (abpt->verbose >= ABPOA_DEBUG_VERBOSE)
+        fprintf(stderr, "realloc: graph_node %lld, qlen: %d, (%lld, %lld)\n", (long long)gn, qlen, (long long)ab->abm->s_msize, (long long)s_msize);
     if (s_msize > ab->abm->s_msize) {
         if (ab->abm->s_mem) SIMDFree(ab->abm->s_mem);
         kroundup64(s_msize); ab->abm->s_msize = s_msize;
@@ -1208,7 +1240,7 @@ int simd_abpoa_realloc(abpoa_t *ab, int gn, int qlen, abpoa_para_t *abpt, SIMD_p
     return 0;
 }
 
-void abpoa_init_var(abpoa_para_t *abpt, uint8_t *query, int qlen, SIMDi *qp, SIMDi *qi, int *mat, int qp_sn, int pn, SIMDi SIMD_INF_MIN) {
+void abpoa_init_var(abpoa_para_t *abpt, uint8_t *query, int qlen, SIMDi *qp, SIMDi *qi, int *mat, int64_t qp_sn, int pn, SIMDi SIMD_INF_MIN) {
     int i, j, k; int32_t *_qi;
     /* generate the query profile */
     for (i = 0; i < qp_sn * abpt->m; ++i) qp[i] = SIMD_INF_MIN;
@@ -1225,7 +1257,10 @@ void abpoa_init_var(abpoa_para_t *abpt, uint8_t *query, int qlen, SIMDi *qp, SIM
     }
 }
 
-void abpoa_cg_first_dp(abpoa_para_t *abpt, abpoa_graph_t *graph, uint8_t *index_map, int beg_node_id, int end_node_id, int *dp_beg, int *dp_end, int *dp_beg_sn, int *dp_end_sn, int pn, int qlen, int w, int dp_sn, SIMDi *DP_H2E2F, SIMDi SIMD_INF_MIN, int32_t inf_min, int gap_open1, int gap_ext1, int gap_open2, int gap_ext2, int gap_oe1, int gap_oe2) {
+void abpoa_cg_first_dp(abpoa_para_t *abpt, abpoa_graph_t *graph, uint8_t *index_map, int beg_node_id, int end_node_id, 
+                       int *dp_beg, int *dp_end, int *dp_beg_sn, int *dp_end_sn, int pn, int qlen, int w, int64_t dp_sn, 
+                       SIMDi *DP_H2E2F, SIMDi SIMD_INF_MIN, int32_t inf_min, 
+                       int gap_open1, int gap_ext1, int gap_open2, int gap_ext2, int gap_oe1, int gap_oe2) {
     int i, _end_sn;
     if (abpt->wb >= 0) {
         graph->node_id_to_max_pos_left[beg_node_id] = graph->node_id_to_max_pos_right[beg_node_id] = 0;
@@ -1234,12 +1269,12 @@ void abpoa_cg_first_dp(abpoa_para_t *abpt, abpoa_graph_t *graph, uint8_t *index_
             if (index_map[abpoa_graph_node_id_to_index(graph, out_id)])
                 graph->node_id_to_max_pos_left[out_id] = graph->node_id_to_max_pos_right[out_id] = 1;
         }
-        dp_beg[0] = GET_AD_DP_BEGIN(graph, w, beg_node_id, end_node_id, qlen), dp_end[0] = GET_AD_DP_END(graph, w, beg_node_id, end_node_id, qlen);
+        dp_beg[0] = 0; // GET_AD_DP_BEGIN(graph, w, beg_node_id, end_node_id, qlen)
+        dp_end[0] = GET_AD_DP_END(graph, w, beg_node_id, end_node_id, qlen);
     } else {
         dp_beg[0] = 0, dp_end[0] = qlen;
     }
     dp_beg_sn[0] = (dp_beg[0])/pn; dp_end_sn[0] = (dp_end[0])/pn;
-    dp_beg[0] = dp_beg_sn[0] * pn; dp_end[0] = (dp_end_sn[0]+1)*pn-1;
     SIMDi *dp_h = DP_H2E2F; SIMDi *dp_e1 = dp_h + dp_sn; SIMDi *dp_e2 = dp_e1 + dp_sn, *dp_f1 = dp_e2 + dp_sn, *dp_f2 = dp_f1 + dp_sn;
     _end_sn = MIN_OF_TWO(dp_end_sn[0]+1, dp_sn-1);
 
@@ -1249,40 +1284,37 @@ void abpoa_cg_first_dp(abpoa_para_t *abpt, abpoa_graph_t *graph, uint8_t *index_
     int32_t *_dp_h = (int32_t*)dp_h, *_dp_e1 = (int32_t*)dp_e1, *_dp_e2 = (int32_t*)dp_e2, *_dp_f1 = (int32_t*)dp_f1, *_dp_f2 = (int32_t*)dp_f2;
     _dp_h[0] = 0; _dp_e1[0] = -(gap_oe1); _dp_e2[0] = -(gap_oe2); _dp_f1[0] = _dp_f2[0] = inf_min;
     for (i = 1; i <= dp_end[0]; ++i) { /* no SIMD parallelization */
-        _dp_f1[i] = -(gap_open1 + gap_ext1 * i);
-        _dp_f2[i] = -(gap_open2 + gap_ext2 * i);
-        _dp_h[i] = MAX_OF_TWO(_dp_f1[i], _dp_f2[i]); // -MIN_OF_TWO(gap_open1+gap_ext1*i, gap_open2+gap_ext2*i);
+        _dp_f1[i] = -(gap_open1 + gap_ext1 * i); _dp_f2[i] = -(gap_open2 + gap_ext2 * i);
+        _dp_h[i] = MAX_OF_TWO(_dp_f1[i], _dp_f2[i]);
     }
 }
 
-int abpoa_max(SIMDi SIMD_INF_MIN, SIMDi zero, int inf_min, SIMDi *dp_h, SIMDi *qi, int qlen, int pn, int beg_sn, int end_sn) {
+void abpoa_max(SIMDi SIMD_INF_MIN, SIMDi zero, int inf_min, SIMDi *dp_h, SIMDi *qi, int qlen, int pn, int beg, int end, int *left_max_i, int *right_max_i) {
     /* select max dp_h */
-    int max = inf_min, max_i = -1, i;
-    SIMDi a = dp_h[end_sn], b = qi[end_sn];
-    if (end_sn == qlen / pn) SIMDSetIfGreateri32(a, zero, b, SIMD_INF_MIN, a);
-    for (i = beg_sn; i < end_sn; ++i) {
-        SIMDGetIfGreateri32(b, a, dp_h[i], a, qi[i], b);
-    }
-    int32_t *_dp_h = (int32_t*)&a, *_qi = (int32_t*)&b;
-    for (i = 0; i < pn; ++i) {
+    int max = inf_min, i; *left_max_i = *right_max_i = -1;
+    int32_t *_dp_h = (int32_t*)dp_h, *_qi = (int32_t*)qi;
+    for (i = beg; i <= end; ++i) {
         if (_dp_h[i] > max) {
-            max = _dp_h[i]; max_i = _qi[i];
+            max = _dp_h[i]; *left_max_i = *right_max_i = _qi[i];
+        } else if (_dp_h[i] == max) {
+            *right_max_i = _qi[i];
         }
     }
-    return max_i;
 }
 
-void abpoa_ada_max_i(int max_i, abpoa_graph_t *graph, int node_id) {
+void abpoa_ada_max_i(int left_max_i, int right_max_i, abpoa_graph_t *graph, int node_id) {
     /* set max_pos_left/right for next nodes */
-    int out_i = max_i + 1; int i;
+    int i;
     for (i = 0; i < graph->node[node_id].out_edge_n; ++i) {
         int out_node_id = graph->node[node_id].out_id[i];
-        if (out_i > graph->node_id_to_max_pos_right[out_node_id]) graph->node_id_to_max_pos_right[out_node_id] = out_i;
-        if (out_i < graph->node_id_to_max_pos_left[out_node_id]) graph->node_id_to_max_pos_left[out_node_id] = out_i;
+        if (right_max_i+1 > graph->node_id_to_max_pos_right[out_node_id])
+            graph->node_id_to_max_pos_right[out_node_id] = right_max_i+1;
+        if (left_max_i+1 < graph->node_id_to_max_pos_left[out_node_id])
+            graph->node_id_to_max_pos_left[out_node_id] = left_max_i+1;
     }
 }
 
-void abpoa_global_get_max(abpoa_graph_t *graph, int beg_index, int end_node_id, uint8_t *index_map, SIMDi *DP_H_HE, int dp_sn, int qlen, int *dp_end, int32_t *best_score, int *best_i, int *best_j) {
+void abpoa_global_get_max(abpoa_graph_t *graph, int beg_index, int end_node_id, uint8_t *index_map, SIMDi *DP_H_HE, int64_t dp_sn, int qlen, int *dp_end, int32_t *best_score, int *best_i, int *best_j) {
     int in_id, in_index, dp_i, i;
     for (i = 0; i < graph->node[end_node_id].in_edge_n; ++i) {
         in_id = graph->node[end_node_id].in_id[i];
@@ -1300,26 +1332,37 @@ void abpoa_global_get_max(abpoa_graph_t *graph, int beg_index, int end_node_id,
     }
 }
 
-int abpoa_cg_dp(SIMDi *q, SIMDi *dp_h, SIMDi *dp_e1, SIMDi *dp_e2, SIMDi *dp_f1, SIMDi *dp_f2, int **pre_index, int *pre_n, int index_i, int dp_i, abpoa_graph_t *graph, abpoa_para_t *abpt, int dp_sn, int pn, int qlen, int w, SIMDi *DP_H2E2F, SIMDi SIMD_INF_MIN, SIMDi GAP_O1, SIMDi GAP_O2, SIMDi GAP_E1, SIMDi GAP_E2, SIMDi GAP_OE1, SIMDi GAP_OE2, SIMDi* GAP_E1S, SIMDi* GAP_E2S, SIMDi *PRE_MIN, SIMDi *PRE_MASK, SIMDi *SUF_MIN, int log_n, int *dp_beg, int *dp_end, int *dp_beg_sn, int *dp_end_sn, int end_node_id) {
+int abpoa_cg_dp(SIMDi *q, SIMDi *dp_h, SIMDi *dp_e1, SIMDi *dp_e2, SIMDi *dp_f1, SIMDi *dp_f2, int inf_min,
+                int **pre_index, int *pre_n, int beg_index, int index_i, int dp_i, abpoa_graph_t *graph, abpoa_para_t *abpt, 
+                int64_t dp_sn, int pn, int qlen, int w, SIMDi *DP_H2E2F, SIMDi SIMD_INF_MIN, 
+                SIMDi GAP_O1, SIMDi GAP_O2, SIMDi GAP_E1, SIMDi GAP_E2, SIMDi GAP_OE1, SIMDi GAP_OE2, SIMDi* GAP_E1S, SIMDi* GAP_E2S, 
+                SIMDi *PRE_MIN, SIMDi *PRE_MASK, SIMDi *SUF_MIN, int log_n, 
+                int *dp_beg, int *dp_end, int *dp_beg_sn, int *dp_end_sn, int end_node_id) {
     int tot_dp_sn = 0, i, pre_i, node_id = abpoa_graph_index_to_node_id(graph, index_i);
-    int min_pre_beg_sn, max_pre_end_sn, beg, end, beg_sn, end_sn, pre_end, pre_end_sn, pre_beg_sn, sn_i;
+    int min_pre_beg, min_pre_beg_sn, max_pre_end_sn, beg, end, beg_sn, end_sn, pre_end, pre_end_sn, pre_beg_sn, sn_i;
     if (abpt->wb < 0) {
         beg = dp_beg[dp_i] = 0, end = dp_end[dp_i] = qlen;
         beg_sn = dp_beg_sn[dp_i] = beg/pn; end_sn = dp_end_sn[dp_i] = end/pn;
         min_pre_beg_sn = 0, max_pre_end_sn = end_sn;
     } else {
         beg = GET_AD_DP_BEGIN(graph, w, node_id, end_node_id, qlen), end = GET_AD_DP_END(graph, w, node_id, end_node_id, qlen);
-        beg_sn = beg / pn; min_pre_beg_sn = INT32_MAX, max_pre_end_sn = -1;
+        if (abpt->verbose >= ABPOA_LONG_DEBUG_VERBOSE)
+            fprintf(stderr, "index: %d (node: %d): raw beg: %d, end: %d\n", index_i-beg_index, node_id, beg, end);
+        beg_sn = beg / pn; min_pre_beg = min_pre_beg_sn = INT32_MAX, max_pre_end_sn = -1;
         for (i = 0; i < pre_n[dp_i]; ++i) {
             pre_i = pre_index[dp_i][i];
-            if (min_pre_beg_sn > dp_beg_sn[pre_i]) min_pre_beg_sn = dp_beg_sn[pre_i];
+            if (abpt->verbose >= ABPOA_LONG_DEBUG_VERBOSE)
+                fprintf(stderr, "\tpre_i: %d, pre_dp_beg: %d, pre_dp_beg_sn: %d\n", pre_i, dp_beg[pre_i], dp_beg_sn[pre_i]);
+            if (min_pre_beg > dp_beg[pre_i]) min_pre_beg = dp_beg[pre_i], min_pre_beg_sn = dp_beg_sn[pre_i];
             if (max_pre_end_sn < dp_end_sn[pre_i]) max_pre_end_sn = dp_end_sn[pre_i];
-        } if (beg_sn < min_pre_beg_sn) beg_sn = min_pre_beg_sn;
-        dp_beg_sn[dp_i] = beg_sn; beg = dp_beg[dp_i] = dp_beg_sn[dp_i] * pn;
-        end_sn = dp_end_sn[dp_i] = end/pn; end = dp_end[dp_i] = (dp_end_sn[dp_i]+1)*pn-1;
-#ifdef __DEBUG__
-    fprintf(stderr, "index: %d (node: %d): beg: %d, end: %d, beg_sn: %d, end_sn: %d\n", index_i, node_id, beg, end, beg_sn, end_sn);
-#endif
+        } 
+        if (beg_sn < min_pre_beg_sn) {
+            assert(beg < min_pre_beg); beg_sn = min_pre_beg_sn; beg = min_pre_beg;
+        }
+        // NOT extend dp_beg/end based on dp_beg/end_sn
+        dp_beg_sn[dp_i] = beg_sn; dp_beg[dp_i] = beg; end_sn = dp_end_sn[dp_i] = end/pn; dp_end[dp_i] = end;
+        if (abpt->verbose >= ABPOA_LONG_DEBUG_VERBOSE)
+            fprintf(stderr, "index: %d (node: %d): beg: %d, end: %d, beg_sn: %d, end_sn: %d, max_left: %d, max_right: %d\n", index_i-beg_index, node_id, beg, end, beg_sn, end_sn, graph->node_id_to_max_pos_left[node_id], graph->node_id_to_max_pos_right[node_id]);
     }
     tot_dp_sn += (end_sn - beg_sn + 1);
     /* loop query */
@@ -1333,7 +1376,7 @@ int abpoa_cg_dp(SIMDi *q, SIMDi *dp_h, SIMDi *dp_e1, SIMDi *dp_e2, SIMDi *dp_f1,
     /* set M from (pre_i, q_i-1) */
     if (pre_beg_sn < beg_sn) _beg_sn = beg_sn, first = SIMDShiftRight(pre_dp_h[beg_sn-1], SIMDTotalBytes-SIMDShiftOneNi32);
     else _beg_sn = pre_beg_sn, first = SIMDShiftRight(SIMD_INF_MIN, SIMDTotalBytes-SIMDShiftOneNi32);
-    _end_sn = MIN_OF_THREE((pre_end+1)/pn, end_sn, dp_sn-1);
+    _end_sn = MIN_OF_THREE((pre_end+1)/pn, end_sn, dp_sn-1); // pre_end+1: match/mismatch operations
     for (i = beg_sn; i < _beg_sn; ++i) dp_h[i] = SIMD_INF_MIN;
     for (i = _end_sn+1; i <= MIN_OF_TWO(end_sn+1, dp_sn-1); ++i) dp_h[i] = SIMD_INF_MIN;
     for (sn_i = _beg_sn; sn_i <= _end_sn; ++sn_i) { /* SIMD parallelization */
@@ -1349,7 +1392,7 @@ int abpoa_cg_dp(SIMDi *q, SIMDi *dp_h, SIMDi *dp_e1, SIMDi *dp_e2, SIMDi *dp_f1,
         dp_e1[sn_i] = pre_dp_e1[sn_i];
         dp_e2[sn_i] = pre_dp_e2[sn_i];
     }
-    // if (index_i == 13095) debug_simd_abpoa_print_cg_matrix_row("1", int32_t, index_i);
+    // debug_simd_abpoa_print_cg_matrix_row("1", int32_t, index_i);
     // new init end
     /* get max m and e */
     for (i = 1; i < pre_n[dp_i]; ++i) {
@@ -1381,6 +1424,11 @@ int abpoa_cg_dp(SIMDi *q, SIMDi *dp_h, SIMDi *dp_e1, SIMDi *dp_e2, SIMDi *dp_f1,
     /* new F start */
     first = SIMDShiftRight(SIMDShiftLeft(dp_h[beg_sn], SIMDTotalBytes-SIMDShiftOneNi32), SIMDTotalBytes-SIMDShiftOneNi32);
     int set_num; SIMDi first2 = first;//, tmp;
+    int32_t *_dp_h = (int32_t*)dp_h, *_dp_e1 = (int32_t*)dp_e1, *_dp_e2 = (int32_t*)dp_e2;
+    // debug_simd_abpoa_print_cg_matrix_row("4", int32_t, index_i);
+    // set h/e as inf_min for cells out of range: < dp_beg or > dp_end
+    for (i = beg_sn * pn; i < beg; ++i) _dp_h[i] = _dp_e1[i] = _dp_e2[i] = inf_min;
+    for (i = end+1; i < (end_sn+1)*pn; ++i) _dp_h[i] = _dp_e1[i] = _dp_e2[i] = inf_min;
     for (sn_i = beg_sn; sn_i <= end_sn; ++sn_i) {
         if (sn_i < min_pre_beg_sn) {
             _err_fatal_simple(__func__, "sn_i < min_pre_beg_sn\n");
@@ -1403,17 +1451,23 @@ int abpoa_cg_dp(SIMDi *q, SIMDi *dp_h, SIMDi *dp_e1, SIMDi *dp_e2, SIMDi *dp_f1,
         first2 = SIMDShiftRight(SIMDMaxi32(dp_h[sn_i], SIMDAddi32(dp_f2[sn_i], GAP_O2)), SIMDTotalBytes-SIMDShiftOneNi32);
         /* H = max{H, F}    */
         dp_h[sn_i] = SIMDMaxi32(SIMDMaxi32(dp_h[sn_i], dp_f1[sn_i]), dp_f2[sn_i]);
+        if (sn_i == end_sn) for (i = end+1; i < (end_sn+1)*pn; ++i) _dp_h[i] = _dp_e1[i] = _dp_e2[i] = inf_min;
         // if (sn_i==beg_sn) debug_simd_abpoa_print_cg_matrix_row("4.4", int32_t, index_i);
         /* e for next cell */
         // SIMDSetIfEquali32(dp_e1[sn_i], dp_h[sn_i], tmp, SIMDMaxi32(SIMDSubi32(dp_e1[sn_i], GAP_E1), SIMDSubi32(dp_h[sn_i], GAP_OE1)), SIMD_INF_MIN);
         dp_e1[sn_i] = SIMDMaxi32(SIMDSubi32(dp_e1[sn_i], GAP_E1), SIMDSubi32(dp_h[sn_i], GAP_OE1));
         // SIMDSetIfEquali32(dp_e2[sn_i], dp_h[sn_i], tmp, SIMDMaxi32(SIMDSubi32(dp_e2[sn_i], GAP_E2), SIMDSubi32(dp_h[sn_i], GAP_OE2)), SIMD_INF_MIN);
         dp_e2[sn_i] = SIMDMaxi32(SIMDSubi32(dp_e2[sn_i], GAP_E2), SIMDSubi32(dp_h[sn_i], GAP_OE2));
+        // debug_simd_abpoa_print_cg_matrix_row("4.5", int32_t, index_i);
     }
+    // debug_simd_abpoa_print_cg_matrix_row("5", int32_t, index_i);
     return tot_dp_sn;
 }
 
-void abpoa_cg_backtrack(SIMDi *DP_H2E2F, int **pre_index, int *pre_n, int *dp_beg, int *dp_end, int dp_sn, int m, int *mat, int gap_ext1, int gap_ext2, int gap_oe1, int gap_oe2, int beg_index, int best_dp_i, int best_dp_j, int qlen, abpoa_graph_t *graph, abpoa_para_t *abpt, uint8_t *query, abpoa_res_t *res) {
+void abpoa_cg_backtrack(SIMDi *DP_H2E2F, int **pre_index, int *pre_n, int *dp_beg, int *dp_end, int64_t dp_sn, 
+                        int m, int *mat, int gap_ext1, int gap_ext2, int gap_oe1, int gap_oe2, 
+                        int beg_index, int best_dp_i, int best_dp_j, int qlen, 
+                        abpoa_graph_t *graph, abpoa_para_t *abpt, uint8_t *query, abpoa_res_t *res) {
     int dp_i, dp_j, k, pre_i, n_c = 0, m_c = 0, id, hit, cur_op = ABPOA_ALL_OP, _start_i, _start_j;
     SIMDi *dp_h;
     int32_t s, is_match, *_dp_h=NULL, *_dp_e1, *_dp_e2, *_pre_dp_h, *_pre_dp_e1, *_pre_dp_e2, *_dp_f1, *_dp_f2; abpoa_cigar_t *cigar = 0;
@@ -1540,10 +1594,8 @@ void abpoa_cg_backtrack(SIMDi *DP_H2E2F, int **pre_index, int *pre_n, int *dp_be
                 }
             }
         }
-        if (hit == 0) exit(1);
-#ifdef __DEBUG__
-        fprintf(stderr, "%d, %d, %d\n", dp_i, dp_j, cur_op);
-#endif
+        if (hit == 0) err_fatal_simple("Error in cg_backtrack.");
+        simd_output_pre_nodes(pre_index[dp_i], pre_n[dp_i], dp_i, dp_j, cur_op, abpt->verbose);
     }
     if (dp_j > 0) cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CINS, dp_j, -1, dp_j-1);
     /* reverse cigar */
@@ -1554,15 +1606,23 @@ void abpoa_cg_backtrack(SIMDi *DP_H2E2F, int **pre_index, int *pre_n, int *dp_be
     /*abpoa_print_cigar(n_c, *graph_cigar, graph);*/
 }
 
-int abpoa_cg_global_align_sequence_to_graph_core(abpoa_t *ab, int beg_node_id, int beg_index, int end_node_id, int end_index, uint8_t *index_map, int qlen, uint8_t *query, abpoa_para_t *abpt, SIMD_para_t sp, abpoa_res_t *res) {
-    int tot_dp_sn = 0;
+int abpoa_cg_global_align_sequence_to_graph_core(abpoa_t *ab, 
+                                                 int beg_node_id, int beg_index, int end_node_id, int end_index, 
+                                                 uint8_t *index_map, int qlen, uint8_t *query, 
+                                                 abpoa_para_t *abpt, SIMD_para_t sp, abpoa_res_t *res) {
+    int i, j;
     abpoa_graph_t *graph = ab->abg; abpoa_simd_matrix_t *abm = ab->abm;
-    int matrix_row_n = end_index-beg_index+1, matrix_col_n = qlen + 1;
+    int64_t matrix_row_n = end_index-beg_index+1, matrix_col_n = qlen + 1;
     int **pre_index, *pre_n, _pre_index, _pre_n;
-    int i, j, *dp_beg, *dp_beg_sn, *dp_end, *dp_end_sn, node_id, index_i, dp_i;
-    int beg_sn, end_sn;
-    int pn, log_n, size, qp_sn, dp_sn; /* pn: value per SIMDi, qp_sn/dp_sn/d_sn: segmented length*/
+    int node_id, index_i, dp_i;
+    int pn, log_n, size; int64_t qp_sn, dp_sn; /* pn: value per SIMDi, qp_sn/dp_sn/d_sn: segmented length*/
     SIMDi *dp_h, *qp, *qi;
+    // dp_beg/end: beg/end position of dp band in query (column) for each node (row)
+    // dp_beg/end_sn: beg/end VECTOR position of dp band in query (segmented column) for each node (row)
+    // XXX to keep consistency between different VECTOR size (SSE41/AVX2/AVX/512):
+    // .   dp cells within dp_beg/end_sn VECTORs but not within dp_beg/end need to be set to -inf
+    int *dp_beg, *dp_beg_sn, *dp_end, *dp_end_sn; 
+
     int32_t best_score = sp.inf_min, inf_min = sp.inf_min;
     int *mat = abpt->mat, best_i = 0, best_j = 0; int32_t gap_ext1 = abpt->gap_ext1;
     int w = abpt->wb < 0 ? qlen : abpt->wb + (int)(abpt->wf * qlen); /* when w < 0, do whole global */
@@ -1573,12 +1633,15 @@ int abpoa_cg_global_align_sequence_to_graph_core(abpoa_t *ab, int beg_node_id, i
     int32_t gap_open1 = abpt->gap_open1, gap_oe1 = gap_open1 + gap_ext1;
     int32_t gap_open2 = abpt->gap_open2, gap_ext2 = abpt->gap_ext2, gap_oe2 = gap_open2 + gap_ext2;
     SIMDi *DP_H2E2F, *dp_e1, *dp_e2, *dp_f2, *dp_f1;
-    SIMDi GAP_O1 = SIMDSetOnei32(gap_open1), GAP_O2 = SIMDSetOnei32(gap_open2), GAP_E1 = SIMDSetOnei32(gap_ext1), GAP_E2 = SIMDSetOnei32(gap_ext2), GAP_OE1 = SIMDSetOnei32(gap_oe1), GAP_OE2 = SIMDSetOnei32(gap_oe2);
+    SIMDi GAP_O1 = SIMDSetOnei32(gap_open1), GAP_O2 = SIMDSetOnei32(gap_open2);
+    SIMDi GAP_E1 = SIMDSetOnei32(gap_ext1), GAP_E2 = SIMDSetOnei32(gap_ext2);
+    SIMDi GAP_OE1 = SIMDSetOnei32(gap_oe1), GAP_OE2 = SIMDSetOnei32(gap_oe2);
     DP_H2E2F = qp + qp_sn * abpt->m; qi = DP_H2E2F + dp_sn * matrix_row_n * 5;
 
     // for SET_F mask[pn], suf_min[pn], pre_min[logN]
     SIMDi *PRE_MASK, *SUF_MIN, *PRE_MIN, *GAP_E1S, *GAP_E2S;
-    PRE_MASK = (SIMDi*)SIMDMalloc((pn+1) * size, size), SUF_MIN = (SIMDi*)SIMDMalloc((pn+1) * size, size), PRE_MIN = (SIMDi*)SIMDMalloc(pn * size, size), GAP_E1S =  (SIMDi*)SIMDMalloc(log_n * size, size), GAP_E2S =  (SIMDi*)SIMDMalloc(log_n * size, size);
+    PRE_MASK = (SIMDi*)SIMDMalloc((pn+1) * size, size), SUF_MIN = (SIMDi*)SIMDMalloc((pn+1) * size, size); 
+    PRE_MIN = (SIMDi*)SIMDMalloc(pn * size, size), GAP_E1S = (SIMDi*)SIMDMalloc(log_n * size, size), GAP_E2S =  (SIMDi*)SIMDMalloc(log_n * size, size);
     for (i = 0; i < pn; ++i) {
         int32_t *pre_mask = (int32_t*)(PRE_MASK + i);
         for (j = 0; j <= i; ++j) pre_mask[j] = -1;
@@ -1612,26 +1675,34 @@ int abpoa_cg_global_align_sequence_to_graph_core(abpoa_t *ab, int beg_node_id, i
         }
         pre_n[dp_i] = _pre_n;
     }
-    abpoa_cg_first_dp(abpt, graph, index_map, beg_node_id, end_node_id, dp_beg, dp_end, dp_beg_sn, dp_end_sn, pn, qlen, w, dp_sn, DP_H2E2F, SIMD_INF_MIN, inf_min, gap_open1, gap_ext1, gap_open2, gap_ext2, gap_oe1, gap_oe2);
+    abpoa_cg_first_dp(abpt, graph, index_map, beg_node_id, end_node_id, 
+                      dp_beg, dp_end, dp_beg_sn, dp_end_sn, pn, qlen, w, dp_sn, 
+                      DP_H2E2F, SIMD_INF_MIN, inf_min, gap_open1, gap_ext1, gap_open2, gap_ext2, gap_oe1, gap_oe2);
 
     for (index_i=beg_index+1, dp_i=1; index_i<end_index; ++index_i, ++dp_i) {
         if (index_map[index_i] == 0) continue;
         node_id = abpoa_graph_index_to_node_id(graph, index_i);
         SIMDi *q = qp + graph->node[node_id].base * qp_sn;
-        dp_h = DP_H2E2F + (dp_i*5) * dp_sn; dp_e1 = dp_h + dp_sn; dp_e2 = dp_e1 + dp_sn; dp_f1 = dp_e2 + dp_sn; dp_f2 = dp_f1 + dp_sn;
-        tot_dp_sn += abpoa_cg_dp(q, dp_h, dp_e1, dp_e2, dp_f1, dp_f2, pre_index, pre_n, index_i, dp_i, graph, abpt, dp_sn, pn, qlen, w, DP_H2E2F, SIMD_INF_MIN, GAP_O1, GAP_O2, GAP_E1, GAP_E2, GAP_OE1, GAP_OE2, GAP_E1S, GAP_E2S, PRE_MIN, PRE_MASK, SUF_MIN, log_n, dp_beg, dp_end, dp_beg_sn, dp_end_sn, end_node_id);
+        dp_h = DP_H2E2F + (dp_i*5) * dp_sn; 
+        dp_e1 = dp_h + dp_sn; dp_e2 = dp_e1 + dp_sn; 
+        dp_f1 = dp_e2 + dp_sn; dp_f2 = dp_f1 + dp_sn;
+        abpoa_cg_dp(q, dp_h, dp_e1, dp_e2, dp_f1, dp_f2, inf_min,
+                    pre_index, pre_n, beg_index, index_i, dp_i, 
+                    graph, abpt, dp_sn, pn, qlen, w, 
+                    DP_H2E2F, SIMD_INF_MIN, GAP_O1, GAP_O2, GAP_E1, GAP_E2, GAP_OE1, GAP_OE2, GAP_E1S, GAP_E2S, 
+                    PRE_MIN, PRE_MASK, SUF_MIN, log_n, 
+                    dp_beg, dp_end, dp_beg_sn, dp_end_sn, end_node_id);
         if (abpt->wb >= 0) {
-            beg_sn = dp_beg_sn[dp_i], end_sn = dp_end_sn[dp_i];
-            int max_i = abpoa_max(SIMD_INF_MIN, zero, inf_min, dp_h, qi, qlen, pn, beg_sn, end_sn);
-            abpoa_ada_max_i(max_i, graph, node_id);
+            int left_max_i, right_max_i;
+            abpoa_max(SIMD_INF_MIN, zero, inf_min, dp_h, qi, qlen, pn, dp_beg[dp_i], dp_end[dp_i], &left_max_i, &right_max_i);
+            abpoa_ada_max_i(left_max_i, right_max_i, graph, node_id);
         }
     }
-    // printf("dp_sn: %d\n", tot_dp_sn);
-    // printf("dp_sn: %d, node_n: %d, seq_n: %d\n", tot_dp_sn, graph->node_n, qlen);
     abpoa_global_get_max(graph, beg_index, end_node_id, index_map, DP_H2E2F, 5*dp_sn, qlen, dp_end, &best_score, &best_i, &best_j);
-#ifdef __DEBUG__
-    simd_abpoa_print_cg_matrix(int32_t, beg_index, end_index); fprintf(stderr, "best_score: (%d, %d) -> %d\n", best_i, best_j, best_score);
-#endif
+    if (abpt->verbose >= ABPOA_DEBUG_VERBOSE) {
+        if (abpt->verbose >= ABPOA_LONG_DEBUG_VERBOSE) simd_abpoa_print_cg_matrix(int32_t, beg_index, end_index); 
+        fprintf(stderr, "best_score: (%d, %d) -> %d\n", best_i, best_j, best_score);
+    }
     res->best_score = best_score;
     abpoa_cg_backtrack(DP_H2E2F, pre_index, pre_n, dp_beg, dp_end, dp_sn, abpt->m, mat, gap_ext1, gap_ext2, gap_oe1, gap_oe2, beg_index, best_i, best_j, qlen, graph, abpt, query, res);
     for (i = 0; i < matrix_row_n; ++i) free(pre_index[i]);
@@ -1644,8 +1715,10 @@ int abpoa_cg_global_align_sequence_to_graph_core(abpoa_t *ab, int beg_node_id, i
 // align query to subgraph between beg_node_id and end_node_id (both are excluded)
 // generally: beg/end are the SRC/SINK_node
 int simd_abpoa_align_sequence_to_subgraph(abpoa_t *ab, abpoa_para_t *abpt, int beg_node_id, int end_node_id, uint8_t *query, int qlen, abpoa_res_t *res) {
-    // if (abpt->simd_flag == 0) err_fatal_simple("No SIMD instruction available.");
-
+    int old_verbose = abpt->verbose;
+    if (abpt->verbose >= ABPOA_DEBUG_VERBOSE) fprintf(stderr, "beg_id: %d, end_id: %d, qlen: %d\n", beg_node_id, end_node_id, qlen);
+    // if (qlen == 8957) // for debug
+        // abpt->verbose = ABPOA_LONG_DEBUG_VERBOSE;
     int i, j, beg_index = ab->abg->node_id_to_index[beg_node_id], end_index = ab->abg->node_id_to_index[end_node_id];
     int gn = end_index - beg_index + 1;
     uint8_t *index_map = (uint8_t*)_err_calloc(ab->abg->node_n, sizeof(uint8_t));
@@ -1660,19 +1733,16 @@ int simd_abpoa_align_sequence_to_subgraph(abpoa_t *ab, abpoa_para_t *abpt, int b
         }
     }
 
-#ifdef __DEBUG__
+#ifdef __SIMD_DEBUG__
     _simd_p32.inf_min = MAX_OF_TWO(abpt->gap_ext1, abpt->gap_ext2) * 31 +MAX_OF_THREE(INT32_MIN + abpt->min_mis, INT32_MIN + abpt->gap_open1 + abpt->gap_ext1, INT32_MIN + abpt->gap_open2 + abpt->gap_ext2);
     if (simd_abpoa_realloc(ab, gn, qlen, abpt, _simd_p32)) return 0;
-    if (abpt->gap_mode == ABPOA_CONVEX_GAP) abpoa_cg_global_align_sequence_to_graph_core(ab, beg_node_id, beg_index, end_node_id, end_index, index_map, qlen, query, abpt, _simd_p32, res);
+    if (abpt->gap_mode == ABPOA_CONVEX_GAP)
+        abpoa_cg_global_align_sequence_to_graph_core(ab, beg_node_id, beg_index, end_node_id, end_index, index_map, qlen, query, abpt, _simd_p32, res);
 #else
     int32_t max_score, bits, mem_ret=0, gap_ext1 = abpt->gap_ext1, gap_ext2 = abpt->gap_ext2;
     int32_t gap_oe1 = abpt->gap_open1+gap_ext1, gap_oe2 = abpt->gap_open2+gap_ext2;
-    // if (abpt->simd_flag & SIMD_AVX512F && !(abpt->simd_flag & SIMD_AVX512BW))
-    //    max_score = INT16_MAX + 1; // AVX512F has no 8/16 bits operations
-    // else {
     int len = qlen > gn ? qlen : gn;
     max_score = MAX_OF_TWO(qlen * abpt->max_mat, len * abpt->gap_ext1 + abpt->gap_open1);
-    // }
     if (max_score <= INT16_MAX - abpt->min_mis - gap_oe1 - gap_oe2) {
         _simd_p16.inf_min = MAX_OF_THREE(INT16_MIN + abpt->min_mis, INT16_MIN + gap_oe1, INT16_MIN + gap_oe2) + 31 * MAX_OF_TWO(gap_ext1, gap_ext2);
         mem_ret = simd_abpoa_realloc(ab, gn, qlen, abpt, _simd_p16);
@@ -1709,6 +1779,7 @@ int simd_abpoa_align_sequence_to_subgraph(abpoa_t *ab, abpoa_para_t *abpt, int b
     }
 #endif
     free(index_map);
+    abpt->verbose = old_verbose;
     return 0;
 }
 
diff --git a/test_data/heter.fq b/test_data/heter.fq
new file mode 100644
index 0000000..bfbb23f
--- /dev/null
+++ b/test_data/heter.fq
@@ -0,0 +1,60 @@
+@m64062_200517_230654/46596725/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200517_230654/122620624/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCCACCATCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCATCCCCACCGCCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200604_115437/178193244/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200517_230654/73204047/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200604_115437/157813105/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200604_115437/141952744/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCAGTTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCATCCCCACCGCCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCCTCCACCAACATCCCCACCATCCCCACCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200604_115437/19531191/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCCCATTAACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCATCCCCACCGCCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACACCATTCTCACCATCTCCACCAACATCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCAATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200604_115437/120652681/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCATCCCCACCGCCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200604_115437/28773203/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCATCCCCACCGCCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200604_115437/75628767/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTCCATCCATTCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCATCCCCATTACCATCCCACCACCATTTCCACCATCCCACCATCATCCCCACCACCATCCCCAGTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCATCCCCACCGCCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200517_230654/180226763/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200517_230654/29755012/ccs
+CCATTCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCATCCCCACCGCCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200517_230654/154468686/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATGCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCACCACCATCCTCACTACCATCCCACCACCATTCCACCATTCCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCACCACCATCTCCATTACCATCCCCACCACCATCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCATCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCATCCCCACCGCCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCACCCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200604_115437/146211230/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200517_230654/85983296/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII