From 1794e1a3c69f869bfb6c4a6bd4fc09e87d52f2d2 Mon Sep 17 00:00:00 2001
From: Yan Gao <gaoy1@chop.edu>
Date: Mon, 15 Jan 2024 20:57:43 -0500
Subject: [PATCH 01/38] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 258025f..99564c2 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@
 <!-- [![PyPI](https://img.shields.io/pypi/v/pyabpoa.svg?style=flat)](https://pypi.python.org/pypi/pyabpoa) -->
 ## Updates (v1.5.1)
 
-- Fix a memory allocation bug caused by int overflow when memery is large (>4G)
+- Fix a memory allocation bug caused by int overflow when memory is large (>4G)
 
 ## Getting started
 Download the [latest release](https://github.com/yangao07/abPOA/releases):

From 6c29fc2a095183e3c076dc621590780111eb15fc Mon Sep 17 00:00:00 2001
From: Chris Wright <chris.wright@nanoporetech.com>
Date: Tue, 16 Jan 2024 16:59:25 +0000
Subject: [PATCH 02/38] Stub out github action for python wheels

---
 .github/workflows/wheels.yml | 76 ++++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)
 create mode 100644 .github/workflows/wheels.yml

diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
new file mode 100644
index 0000000..2971a7a
--- /dev/null
+++ b/.github/workflows/wheels.yml
@@ -0,0 +1,76 @@
+name: Build and upload to PyPI
+
+# Build on every branch push, tag push, and pull request change:
+on: [push, pull_request]
+# Alternatively, to publish when a (published) GitHub Release is created, use the following:
+# on:
+#   push:
+#   pull_request:
+#   release:
+#     types:
+#       - published
+env:
+  CIBW_BUILD_VERBOSITY: 3
+  #CIBW_TEST_COMMAND: python -m unittest discover {project}/tests
+  # Disable building PyPy wheels on all platforms
+  CIBW_SKIP: pp*
+
+jobs:
+  build_wheels:
+    name: Build wheels for ${{ matrix.python }}-${{ matrix.buildplat[1] }}
+    runs-on: ${{ matrix.buildplat[0] }}
+    strategy:
+      # Ensure that a wheel builder finishes even if another fails
+      fail-fast: false
+      matrix:
+        buildplat:
+        - [ubuntu-latest, manylinux_x86_64, auto]
+        - [macos-latest, macosx_x86_64, x86_64]
+        - [macos-latest, macosx_arm64, arm64]
+        python: ["cp38", "cp39", "cp310", "cp311", "cp312"]
+
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Build wheels
+        uses: pypa/cibuildwheel@v2.15.0
+        env:
+          CIBW_ARCHS: ${{ matrix.buildplat[2] }}
+          CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }}
+
+      - uses: actions/upload-artifact@v3
+        with:
+          path: ./wheelhouse/*.whl
+
+  build_sdist:
+    name: Build source distribution
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Build sdist
+        run: pipx run build --sdist
+
+      - uses: actions/upload-artifact@v3
+        with:
+          path: dist/*.tar.gz
+
+  upload_pypi:
+    needs: [build_wheels, build_sdist]
+    runs-on: ubuntu-latest
+    # upload to PyPI on every tag starting with 'v'
+    if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
+    # alternatively, to publish when a GitHub Release is created, use the following rule:
+    # if: github.event_name == 'release' && github.event.action == 'published'
+    steps:
+      - uses: actions/download-artifact@v3
+        with:
+          # unpacks default artifact into dist/
+          # if `name: artifact` is omitted, the action will create extra parent dir
+          name: artifact
+          path: dist
+
+      - uses: pypa/gh-action-pypi-publish@v1.5.0
+        with:
+          user: __token__
+          password: ${{ secrets.pypi_password }}

From 5e80eb3f9b72ee42bb829e88dda1335e90352944 Mon Sep 17 00:00:00 2001
From: Chris Wright <chris.wright@nanoporetech.com>
Date: Tue, 16 Jan 2024 17:09:24 +0000
Subject: [PATCH 03/38] checkout submodules in actions

---
 .github/workflows/wheels.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index 2971a7a..a8bf564 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -31,6 +31,8 @@ jobs:
 
     steps:
       - uses: actions/checkout@v3
+        with:
+            submodules: 'true'
 
       - name: Build wheels
         uses: pypa/cibuildwheel@v2.15.0
@@ -47,6 +49,8 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
+        with:
+            submodules: 'true'
 
       - name: Build sdist
         run: pipx run build --sdist

From 2cac60cc8e7507e904b37ac5a35be8c09e8231ed Mon Sep 17 00:00:00 2001
From: Chris Wright <chris.wright@nanoporetech.com>
Date: Tue, 16 Jan 2024 17:21:51 +0000
Subject: [PATCH 04/38] Pin cython cos reasons

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 52c0e7e..a54f54d 100644
--- a/setup.py
+++ b/setup.py
@@ -66,7 +66,7 @@
     author_email = "gaoy1@chop.edu",
     license = "MIT",
     keywords = "multiple-sequence-alignment  partial-order-graph-alignment",
-    setup_requires=["cython"],
+    setup_requires=["cython<3"], # see https://github.com/cython/cython/issues/5568
     # Build instructions
     ext_modules = [
         Extension(

From 23dc7b1cffd6bc47ce9e2af437819a5aa40f29ab Mon Sep 17 00:00:00 2001
From: Chris Wright <chris.wright@nanoporetech.com>
Date: Tue, 16 Jan 2024 17:34:22 +0000
Subject: [PATCH 05/38] Skip one mac build

---
 .github/workflows/wheels.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index a8bf564..9dd88a8 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -28,6 +28,10 @@ jobs:
         - [macos-latest, macosx_x86_64, x86_64]
         - [macos-latest, macosx_arm64, arm64]
         python: ["cp38", "cp39", "cp310", "cp311", "cp312"]
+        exclude:
+            # This one fails, I (cjw85) don't care for why
+            - buildplat: macosx_arm64
+            - python: "cp38"
 
     steps:
       - uses: actions/checkout@v3

From b99dd31f1c9f9631f827164b6dcb93e5a5658694 Mon Sep 17 00:00:00 2001
From: Chris Wright <chris.wright@nanoporetech.com>
Date: Tue, 16 Jan 2024 17:44:13 +0000
Subject: [PATCH 06/38] Dont skip mac build

---
 .github/workflows/wheels.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index 9dd88a8..362faf3 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -28,10 +28,10 @@ jobs:
         - [macos-latest, macosx_x86_64, x86_64]
         - [macos-latest, macosx_arm64, arm64]
         python: ["cp38", "cp39", "cp310", "cp311", "cp312"]
-        exclude:
-            # This one fails, I (cjw85) don't care for why
-            - buildplat: macosx_arm64
-            - python: "cp38"
+        #exclude:
+        #    # This one fails, I (cjw85) don't care for why
+        #    - buildplat: macosx_arm64
+        #    - python: "cp38"
 
     steps:
       - uses: actions/checkout@v3

From 6ad7362645f631c8b2391f786d541c67c566c297 Mon Sep 17 00:00:00 2001
From: Chris Wright <chris.wright@nanoporetech.com>
Date: Tue, 16 Jan 2024 17:57:06 +0000
Subject: [PATCH 07/38] Dont do mac arm for now

---
 .github/workflows/wheels.yml | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index 362faf3..5b68e3a 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -26,12 +26,9 @@ jobs:
         buildplat:
         - [ubuntu-latest, manylinux_x86_64, auto]
         - [macos-latest, macosx_x86_64, x86_64]
-        - [macos-latest, macosx_arm64, arm64]
+        # skip these for now, need more work
+        #- [macos-latest, macosx_arm64, arm64]
         python: ["cp38", "cp39", "cp310", "cp311", "cp312"]
-        #exclude:
-        #    # This one fails, I (cjw85) don't care for why
-        #    - buildplat: macosx_arm64
-        #    - python: "cp38"
 
     steps:
       - uses: actions/checkout@v3

From 4a1a3362f05cbc373fd7e120a1d2a1202f9042c9 Mon Sep 17 00:00:00 2001
From: Yan Gao <gaoy1@chop.edu>
Date: Tue, 16 Jan 2024 18:51:30 -0500
Subject: [PATCH 08/38] Create c-cpp.yml

---
 .github/workflows/c-cpp.yml | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 .github/workflows/c-cpp.yml

diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml
new file mode 100644
index 0000000..dfe8487
--- /dev/null
+++ b/.github/workflows/c-cpp.yml
@@ -0,0 +1,17 @@
+name: C/C++ CI
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: make
+      run: make

From 3c983aade783e265b7d5eb5b28df9be88d7ee533 Mon Sep 17 00:00:00 2001
From: Yan Gao <gaoy1@chop.edu>
Date: Tue, 16 Jan 2024 18:58:18 -0500
Subject: [PATCH 09/38] Update c-cpp.yml

---
 .github/workflows/c-cpp.yml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml
index dfe8487..338854c 100644
--- a/.github/workflows/c-cpp.yml
+++ b/.github/workflows/c-cpp.yml
@@ -8,10 +8,13 @@ on:
 
 jobs:
   build:
-
     runs-on: ubuntu-latest
-
     steps:
-    - uses: actions/checkout@v3
+    - name: Preparing Repositories
+      uses: actions/checkout@v3
+      with:
+        token: '${{ secrets.ACCESS_TOKEN }}'
+        submodules: recursive
+
     - name: make
       run: make

From 2822424a782bf220382414c18784b920ebe26468 Mon Sep 17 00:00:00 2001
From: Yan Gao <gaoy1@chop.edu>
Date: Tue, 16 Jan 2024 19:05:36 -0500
Subject: [PATCH 10/38] Update c-cpp.yml

---
 .github/workflows/c-cpp.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml
index 338854c..b1ba78d 100644
--- a/.github/workflows/c-cpp.yml
+++ b/.github/workflows/c-cpp.yml
@@ -13,7 +13,6 @@ jobs:
     - name: Preparing Repositories
       uses: actions/checkout@v3
       with:
-        token: '${{ secrets.ACCESS_TOKEN }}'
         submodules: recursive
 
     - name: make

From e160755e4e68ab614ed43e2dd5744b83f4abf70a Mon Sep 17 00:00:00 2001
From: Yan Gao <gaoy1@chop.edu>
Date: Tue, 16 Jan 2024 19:09:13 -0500
Subject: [PATCH 11/38] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 99564c2..a3b5e15 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 [![PyPI](https://img.shields.io/pypi/dm/pyabpoa.svg?label=pip%20install)](https://pypi.python.org/pypi/pyabpoa)
 [![Published in Bioinformatics](https://img.shields.io/badge/Published%20in-Bioinformatics-blue.svg)](https://dx.doi.org/10.1093/bioinformatics/btaa963)
 [![GitHub Issues](https://img.shields.io/github/issues/yangao07/abPOA.svg?label=Issues)](https://github.com/yangao07/abPOA/issues)
-[![Build Status](https://img.shields.io/travis/yangao07/abPOA/main.svg?label=Main)](https://travis-ci.org/yangao07/abPOA)
+[![C/C++ CI](https://github.com/yangao07/abPOA/actions/workflows/c-cpp.yml/badge.svg)](https://github.com/yangao07/abPOA/actions/workflows/c-cpp.yml)
 [![License](https://img.shields.io/badge/License-MIT-black.svg)](https://github.com/yangao07/abPOA/blob/main/LICENSE)
 <!-- [![PyPI](https://img.shields.io/pypi/v/pyabpoa.svg?style=flat)](https://pypi.python.org/pypi/pyabpoa) -->
 ## Updates (v1.5.1)

From 0a924da169af95bdbc1a55be1bc7aafae22c4f8e Mon Sep 17 00:00:00 2001
From: Yan Gao <gaoy1@chop.edu>
Date: Wed, 17 Jan 2024 00:00:13 -0500
Subject: [PATCH 12/38] Update c-cpp.yml

---
 .github/workflows/c-cpp.yml | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml
index b1ba78d..c293ee7 100644
--- a/.github/workflows/c-cpp.yml
+++ b/.github/workflows/c-cpp.yml
@@ -7,7 +7,7 @@ on:
     branches: [ "main" ]
 
 jobs:
-  build:
+  build-linux:
     runs-on: ubuntu-latest
     steps:
     - name: Preparing Repositories
@@ -17,3 +17,14 @@ jobs:
 
     - name: make
       run: make
+
+  build-macos:
+    runs-on: macos-latest
+    steps:
+    - name: Preparing Repositories
+      uses: actions/checkout@v3
+      with:
+        submodules: recursive
+
+    - name: make
+      run: make

From f1eb926e59f3289f02a6697279b37aebd640cffa Mon Sep 17 00:00:00 2001
From: Yan Gao <yangao07@hit.edu.cn>
Date: Wed, 17 Jan 2024 00:06:05 -0500
Subject: [PATCH 13/38] update CI.yml

---
 .github/workflows/linux-CI.yml                | 19 +++++++++++++++++++
 .github/workflows/{c-cpp.yml => macos-CI.yml} | 13 +------------
 README.md                                     |  3 ++-
 3 files changed, 22 insertions(+), 13 deletions(-)
 create mode 100644 .github/workflows/linux-CI.yml
 rename .github/workflows/{c-cpp.yml => macos-CI.yml} (56%)

diff --git a/.github/workflows/linux-CI.yml b/.github/workflows/linux-CI.yml
new file mode 100644
index 0000000..773529b
--- /dev/null
+++ b/.github/workflows/linux-CI.yml
@@ -0,0 +1,19 @@
+name: linux
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+
+jobs:
+  build-linux:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Preparing Repositories
+      uses: actions/checkout@v3
+      with:
+        submodules: recursive
+
+    - name: make
+      run: make
\ No newline at end of file
diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/macos-CI.yml
similarity index 56%
rename from .github/workflows/c-cpp.yml
rename to .github/workflows/macos-CI.yml
index c293ee7..6ca2c17 100644
--- a/.github/workflows/c-cpp.yml
+++ b/.github/workflows/macos-CI.yml
@@ -1,4 +1,4 @@
-name: C/C++ CI
+name: macos
 
 on:
   push:
@@ -7,17 +7,6 @@ on:
     branches: [ "main" ]
 
 jobs:
-  build-linux:
-    runs-on: ubuntu-latest
-    steps:
-    - name: Preparing Repositories
-      uses: actions/checkout@v3
-      with:
-        submodules: recursive
-
-    - name: make
-      run: make
-
   build-macos:
     runs-on: macos-latest
     steps:
diff --git a/README.md b/README.md
index a3b5e15..f7dfc2c 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,8 @@
 [![PyPI](https://img.shields.io/pypi/dm/pyabpoa.svg?label=pip%20install)](https://pypi.python.org/pypi/pyabpoa)
 [![Published in Bioinformatics](https://img.shields.io/badge/Published%20in-Bioinformatics-blue.svg)](https://dx.doi.org/10.1093/bioinformatics/btaa963)
 [![GitHub Issues](https://img.shields.io/github/issues/yangao07/abPOA.svg?label=Issues)](https://github.com/yangao07/abPOA/issues)
-[![C/C++ CI](https://github.com/yangao07/abPOA/actions/workflows/c-cpp.yml/badge.svg)](https://github.com/yangao07/abPOA/actions/workflows/c-cpp.yml)
+[![C/C++ CI](https://github.com/yangao07/abPOA/actions/workflows/linux-CI.yml/badge.svg)](https://github.com/yangao07/abPOA/actions/workflows/linux-CI.yml)
+[![C/C++ CI](https://github.com/yangao07/abPOA/actions/workflows/macos-CI.yml/badge.svg)](https://github.com/yangao07/abPOA/actions/workflows/macos-CI.yml)
 [![License](https://img.shields.io/badge/License-MIT-black.svg)](https://github.com/yangao07/abPOA/blob/main/LICENSE)
 <!-- [![PyPI](https://img.shields.io/pypi/v/pyabpoa.svg?style=flat)](https://pypi.python.org/pypi/pyabpoa) -->
 ## Updates (v1.5.1)

From 52fd78e1a5c3a16eea4ec92d1df58745ebd42abf Mon Sep 17 00:00:00 2001
From: Yan Gao <yangao07@hit.edu.cn>
Date: Wed, 17 Jan 2024 20:06:16 -0500
Subject: [PATCH 14/38] update setup.py

---
 .github/workflows/wheels.yml |  2 +-
 setup.py                     | 15 +++++++++------
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index 5b68e3a..1ceec11 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -27,7 +27,7 @@ jobs:
         - [ubuntu-latest, manylinux_x86_64, auto]
         - [macos-latest, macosx_x86_64, x86_64]
         # skip these for now, need more work
-        #- [macos-latest, macosx_arm64, arm64]
+        - [macos-latest, macosx_arm64, arm64]
         python: ["cp38", "cp39", "cp310", "cp311", "cp312"]
 
     steps:
diff --git a/setup.py b/setup.py
index a54f54d..21ee45b 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,4 @@
-import os, platform, sys
+import os
 
 try:
     from setuptools import setup, Extension
@@ -9,19 +9,22 @@
 
 simde = ['-DUSE_SIMDE', '-DSIMDE_ENABLE_NATIVE_ALIASES']
 
-if platform.system() == "Darwin":
+machine_system = os.popen("uname").readlines()[0].rsplit()[0]
+machine_arch = os.popen("uname -m").readlines()[0].rsplit()[0]
+
+if machine_system == "Darwin":
     # note: see https://github.com/pypa/wheel/issues/406
     simd_flag = ['-march=native', '-D__AVX2__', '-mmacosx-version-min=10.9']
-    if platform.machine() in ["aarch64", "arm64"]:
+    if machine_arch in ["aarch64", "arm64"]:
         os.environ['_PYTHON_HOST_PLATFORM'] = "macosx-10.9-arm64"
         os.environ['ARCHFLAGS'] = "-arch arm64"
-    else:
+    else: # x86_64
         os.environ['_PYTHON_HOST_PLATFORM'] = "macosx-10.9-x86_64"
         os.environ['ARCHFLAGS'] = "-arch x86_64"
 else:
-    if platform.machine() in ["aarch64", "arm64"]:
+    if machine_arch in ["aarch64", "arm64"]:
         simd_flag = ['-march=armv8-a+simd', '-D__AVX2__']
-    elif platform.machine() in ["aarch32"]:
+    elif machine_arch in ["aarch32"]:
         simd_flag = ['-march=armv8-a+simd', '-mfpu=auto -D__AVX2__']
     else:
         simd_flag=['-march=native']

From bc0c1ce6451a8578f17cac87e17d64184d619ffb Mon Sep 17 00:00:00 2001
From: Yan Gao <yangao07@hit.edu.cn>
Date: Wed, 17 Jan 2024 20:38:08 -0500
Subject: [PATCH 15/38] update setup.py

---
 setup.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/setup.py b/setup.py
index 21ee45b..8960eb1 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,5 @@
-import os
+#!/usr/bin/env python
+import os, platform, sys
 
 try:
     from setuptools import setup, Extension
@@ -9,24 +10,21 @@
 
 simde = ['-DUSE_SIMDE', '-DSIMDE_ENABLE_NATIVE_ALIASES']
 
-machine_system = os.popen("uname").readlines()[0].rsplit()[0]
-machine_arch = os.popen("uname -m").readlines()[0].rsplit()[0]
-
-if machine_system == "Darwin":
+if platform.system() == "Darwin":
     # note: see https://github.com/pypa/wheel/issues/406
     simd_flag = ['-march=native', '-D__AVX2__', '-mmacosx-version-min=10.9']
-    if machine_arch in ["aarch64", "arm64"]:
+    if platform.machine() in ["aarch64", "arm64"]:
         os.environ['_PYTHON_HOST_PLATFORM'] = "macosx-10.9-arm64"
         os.environ['ARCHFLAGS'] = "-arch arm64"
     else: # x86_64
         os.environ['_PYTHON_HOST_PLATFORM'] = "macosx-10.9-x86_64"
         os.environ['ARCHFLAGS'] = "-arch x86_64"
-else:
-    if machine_arch in ["aarch64", "arm64"]:
+else: # Linux
+    if platform.machine() in ["aarch64", "arm64"]:
         simd_flag = ['-march=armv8-a+simd', '-D__AVX2__']
-    elif machine_arch in ["aarch32"]:
+    elif platform.machine() in ["aarch32"]:
         simd_flag = ['-march=armv8-a+simd', '-mfpu=auto -D__AVX2__']
-    else:
+    else: # x86_64
         simd_flag=['-march=native']
         if os.getenv('SSE4', False):
             simd_flag=['-msse4.1']

From 017a3ac3a7084b5a574dc6f100bf7f4fb4ea44e1 Mon Sep 17 00:00:00 2001
From: Yan Gao <yangao07@hit.edu.cn>
Date: Wed, 17 Jan 2024 20:52:10 -0500
Subject: [PATCH 16/38] update setup.py

---
 setup.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/setup.py b/setup.py
index 8960eb1..f8c54f8 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,4 @@
-#!/usr/bin/env python
-import os, platform, sys
+import os
 
 try:
     from setuptools import setup, Extension
@@ -10,19 +9,22 @@
 
 simde = ['-DUSE_SIMDE', '-DSIMDE_ENABLE_NATIVE_ALIASES']
 
-if platform.system() == "Darwin":
+machine_system = os.popen("uname").readlines()[0].rsplit()[0]
+machine_arch = os.popen("uname -m").readlines()[0].rsplit()[0]
+
+if machine_system == "Darwin":
     # note: see https://github.com/pypa/wheel/issues/406
     simd_flag = ['-march=native', '-D__AVX2__', '-mmacosx-version-min=10.9']
-    if platform.machine() in ["aarch64", "arm64"]:
+    if machine_arch in ["aarch64", "arm64"]:
         os.environ['_PYTHON_HOST_PLATFORM'] = "macosx-10.9-arm64"
         os.environ['ARCHFLAGS'] = "-arch arm64"
     else: # x86_64
         os.environ['_PYTHON_HOST_PLATFORM'] = "macosx-10.9-x86_64"
         os.environ['ARCHFLAGS'] = "-arch x86_64"
-else: # Linux
-    if platform.machine() in ["aarch64", "arm64"]:
+else:
+    if machine_arch in ["aarch64", "arm64"]:
         simd_flag = ['-march=armv8-a+simd', '-D__AVX2__']
-    elif platform.machine() in ["aarch32"]:
+    elif machine_arch in ["aarch32"]:
         simd_flag = ['-march=armv8-a+simd', '-mfpu=auto -D__AVX2__']
     else: # x86_64
         simd_flag=['-march=native']

From 8a2dc2fc49b10bb1b57ef8738b7f8c6a48cbad79 Mon Sep 17 00:00:00 2001
From: Yan Gao <yangao07@hit.edu.cn>
Date: Sun, 17 Mar 2024 12:41:18 -0400
Subject: [PATCH 17/38] update README

---
 README.md | 71 +++++++++++++++++++++++++++++++------------------------
 1 file changed, 40 insertions(+), 31 deletions(-)

diff --git a/README.md b/README.md
index f7dfc2c..baa0f13 100644
--- a/README.md
+++ b/README.md
@@ -44,12 +44,13 @@ abpoa ./test_data/seq.fa > cons.fa
     - [Building abPOA from source files](#building-abpoa-from-source-files)
     - [Pre-built binary executable file for Linux/Unix or MacOS](#pre-built-binary-executable-file-for-linuxunix-or-macos)
   - [General usage](#general-usage)
-    - [To generate consensus sequence](#to-generate-consensus-sequence)
-    - [To generate multiple consensus sequences](#to-generate-multiple-consensus-sequences)
-    - [To generate row-column multiple sequence alignment in FASTA format](#to-generate-row-column-multiple-sequence-alignment-in-fasta-format)
-    - [To generate graph information in GFA format](#to-generate-graph-information-in-gfa-format)
-    - [To align sequence to an existing graph in GFA/MSA format](#to-align-sequence-to-an-existing-graph-in-gfamsa-format)
-    - [To generate a plot of the alignment graph](#to-generate-a-plot-of-the-alignment-graph)
+    - [Generate consensus sequence](#generate-consensus-sequence)
+    - [Generate multiple consensus sequences](#generate-multiple-consensus-sequences)
+    - [Generate row-column multiple sequence alignment in FASTA format](#generate-row-column-multiple-sequence-alignment-in-fasta-format)
+    - [Generate graph information in GFA format](#generate-graph-information-in-gfa-format)
+    - [Align sequence to an existing graph in GFA/MSA format](#align-sequence-to-an-existing-graph-in-gfamsa-format)
+    - [Generate consensus sequence for amino acid sequences](#generate-consensus-sequence-for-amino-acid-sequences)
+    - [Generate a plot of the alignment graph](#generate-a-plot-of-the-alignment-graph)
   - [Input](#input)
   - [Output](#output)
     - [Consensus sequence](#consensus-sequence)
@@ -80,15 +81,15 @@ It right now supports SSE2/SSE4.1/AVX2 vectorization.
 
 For more information, please refer to our [paper](https://dx.doi.org/10.1093/bioinformatics/btaa963) published in Bioinformatics.
 
-## <a name="install"></a>Installation
+## Installation
 
-### <a name="conda"></a>Installing abPOA via conda
+### Installing abPOA via conda
 On Linux/Unix and Mac OS, abPOA can be installed via
 ```
 conda install -c bioconda abpoa   # install abPOA program
 ```
 
-### <a name="build"></a>Building abPOA from source files
+### Building abPOA from source files
 You can also build abPOA from source files. 
 Make sure you have gcc (>=6.4.0) and zlib installed before compiling.
 It is recommended to download the [latest release](https://github.com/yangao07/abPOA/releases).
@@ -104,7 +105,7 @@ git clone --recursive https://github.com/yangao07/abPOA.git
 cd abPOA; make
 ```
 
-### <a name="binary"></a>Pre-built binary executable file for Linux/Unix or MacOS
+### Pre-built binary executable file for Linux/Unix or MacOS
 If you meet any compiling issue, please try the pre-built binary file for linux:
 ```
 wget https://github.com/yangao07/abPOA/releases/download/v1.5.1/abPOA-v1.5.1_x64-linux.tar.gz
@@ -116,27 +117,27 @@ wget https://github.com/yangao07/abPOA/releases/download/v1.5.1/abPOA-v1.5.1_arm
 tar -zxvf abPOA-v1.5.1_arm64-macos.tar.gz
 ```
 
-## <a name="usage"></a>General usage
-### <a name="gen_1cons"></a>To generate consensus sequence
+## General usage
+### Generate consensus sequence
 
 ```
 abpoa seq.fa > cons.fa
 ```
 
-### <a name="gen_mcons"></a>To generate multiple consensus sequences
+### Generate multiple consensus sequences
 
 ```
 abpoa heter.fa -d2 > 2cons.fa
 ```
 
-### <a name="gen_msa"></a>To generate row-column multiple sequence alignment in FASTA format
+### Generate row-column multiple sequence alignment in FASTA format
 
 ```
 abpoa seq.fa -r1 > out.msa
 abpoa seq.fa -r2 > out_cons.msa
 ```
 
-### <a name="gen_gfa"></a>To generate graph information in [GFA](https://github.com/GFA-spec/GFA-spec/blob/master/GFA1.md) format
+### Generate graph information in [GFA](https://github.com/GFA-spec/GFA-spec/blob/master/GFA1.md) format
 
 ```
 abpoa seq.fa -r3 > out.gfa
@@ -146,7 +147,7 @@ To include the generated consensus sequence as a path in the GFA file:
 abpoa seq.fa -r4 > out.gfa
 ```
 
-### <a name="aln_to_gfa"></a>To align sequence to an existing graph in GFA/MSA format
+### Align sequence to an existing graph in GFA/MSA format
 ```
 abpoa -i in.gfa seq.fa -r3 > out.gfa
 abpoa -i in.msa seq.fa -r1 > out.msa
@@ -158,14 +159,22 @@ abpoa seq1.fa -r1 > seq1.msa
 abpoa -i seq1.msa seq2.fa > cons.fa
 ```
 
-### <a name="gen_plot"></a>To generate a plot of the alignment graph
+### Generate consensus sequence for amino acid sequences
+```
+abpoa -c -t BLOSUM62.mtx input_aa.fa > output_aa_cons.fa
+```
+abPOA provides two score matrix files for amino acid sequences: `BLOSUM62.mtx`, `HOXD70.mtx`.
+
+You can also use any score matrix, as long as it has the same format as the above two.
+
+### Generate a plot of the alignment graph
 
 ```
 abpoa seq.fa -g poa.png > cons.fa
 ```
 See [Plot of alignment graph](#plot) for more details about the plot file.
 
-## <a name="input"></a>Input
+## Input
 abPOA works with FASTA, FASTQ, gzip'd FASTA(.fa.gz) and gzip'd FASTQ(.fq.gz) formats. The input file is 
 expected to contains multiple sequences which will be processed sequentially to perform the iterative 
 sequence-to-graph (partial order) alignment.
@@ -174,8 +183,8 @@ abPOA can also take a list of filenames as input with option `-l`, where each li
 file containing multiple sequences. Each sequence file is then individually aligned by abPOA to generate a
 consensus sequence.
 
-## <a name="output"></a>Output
-### <a name="cons"></a>Consensus sequence 
+## Output
+### Consensus sequence 
 By default, abPOA only outputs the consensus sequence generated from the final alignment graph.
 It is in FASTA format with the name field set as "Consensus_sequence".
 For example:
@@ -195,7 +204,7 @@ CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATC
 >Consensus_sequence_2
 CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCATCCCCACCGCCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
 ```
-### <a name="msa"></a>Row-column multiple sequence alignment
+### Row-column multiple sequence alignment
 abPOA can also output the row-column multiple sequence alignment (RC-MSA) of all the aligned sequences in FASTA format.
 For example:
 ```
@@ -210,7 +219,7 @@ ACGTGTACA--TTGAC
 ```
 The `-` in the sequence stands for alignment gap. 
 
-### <a name="gfa"></a>Full graph information
+### Full graph information
 abPOA can output the final alignment graph in GFA format.
 Each segment line (`S` line) represents one node and each link line (`L` line) represents one edge between two nodes.
 The original input sequences and the generated consensus sequence are described as paths in `P` lines.
@@ -220,7 +229,7 @@ abPOA outputs two graph-related numbers in the header line (`H` line):
 
 Please refer to the [GFA specification](https://github.com/GFA-spec/GFA-spec/blob/master/GFA1.md) for more details of the GFA format.
 
-### <a name="plot"></a>Plot of alignment graph
+### Plot of alignment graph
 
 abPOA can generate a plot of the final partial order alignment graph with the help of `graphviz dot`. 
 For example:
@@ -233,11 +242,11 @@ The numbers inside the nodes are the node IDs. The numbers on the edges are the
 Make sure you have `dot` installed before using abPOA to generate the plot.
 For Linux/Unix systems: `sudo apt-get install graphviz`.
 
-## <a name="description"></a>Algorithm description
-### <a name="banding"></a>Adaptive banding
+## Algorithm description
+### Adaptive banding
 To understand how the adaptive banding working, please refer to our [Bioinformatics paper](https://dx.doi.org/10.1093/bioinformatics/btaa963).
 
-### <a name="seeding"></a>Minimizer-based seeding mode
+### Minimizer-based seeding mode
 As abPOA always allocates quadratic size of memory, for very long input sequences (>10 kb), memory usage will be a challenge.
 
 To solve this issue, we develop a minimizer-based seeding and partition method to split the sequence and graph with a small window.
@@ -250,7 +259,7 @@ A second round of chaining is then performed on all the local minimizer chains t
 With this global chain, abPOA selects a series of minimizer hits as partition anchors which has at least a distance of 500 bp (by default, -n/--min-poa-win).
 Within each partitioned window, abPOA performs banded partial order alignment separately and combines all the alignment results at the end.
 
-### <a name="tree"></a>Minimizer-based progressive tree
+### Minimizer-based progressive tree
 Instead of aligning all the sequences in the original order, abPOA can alternatively build a progressive tree to guide the alignment order.
 The generation of the progressive tree is also based on minimizers.
 For each pair of sequences, abPOA calculates their similarity score which is the Jaccard similarity of the minimizers, i.e. the number of minimizer hits divided by the total number of all minimizers from the two sequences.
@@ -262,22 +271,22 @@ With all the similarity scores (minimizer-based Jaccard similarity), abPOA build
 
 Then, abPOA performs partial order alignment following the order of sequences in this progressive tree set.
 
-### <a name="mcons"></a>Multiple consensus sequences
+### Multiple consensus sequences
 abPOA supports generating multiple consensus sequences from the final alignment graph (set -d/--max-num-cons as >1).
 
 The general underlying idea is to group input sequences into multiple clusters based on the heterozygous bases in the graph,
 Then, one consensus sequence is separately generated for each cluster of input sequences.
 The minimum allele frequency for each heterozygous base is 0.25 (by default, -q/--min-freq). 
 
-## <a name="dev"></a>For development
+## For development
 abPOA is not only a stand-alone tool for MSA and consensus calling, it can also work as a programming library. [example.c](example.c) shows how to use the C APIs of abPOA to take a set of sequences as input and perform MSA and consensus calling. Basically, the library file `libabpoa.a` and two header files [abpoa.h](include/abpoa.h) and [simd_instruction.h](include/simd_instruction.h) are needed to make the abPOA library work in your program.
 
 abPOA also provides Python bindings to all the primary C APIs. Refer to [python/README.md](python/README.md) for more details.
 
-## <a name="eval"></a>Evaluation datasets
+## Evaluation datasets
 The evaluation datasets and scripts used in [abPOA paper](https://dx.doi.org/10.1093/bioinformatics/btaa963) can be found in [abPOA-v1.0.5](https://github.com/yangao07/abPOA/releases/tag/v1.0.5).
 
-## <a name="contact"></a>Contact
+## Contact
 Yan Gao gaoy1@chop.edu
 
 Yi Xing xingyi@chop.edu

From e234dcef3061691293faa65c2dcb07a38622dc7c Mon Sep 17 00:00:00 2001
From: Yan Gao <yangao07@hit.edu.cn>
Date: Fri, 5 Apr 2024 16:20:11 -0400
Subject: [PATCH 18/38] fix instruction-dependent result difference and seg
 fault due to large memory usage (#66)

---
 Makefile               |  18 +-
 TODO.md                |   9 +
 include/abpoa.h        |   5 +
 src/abpoa.c            |   2 +
 src/abpoa.h            |   5 +
 src/abpoa_align.c      |  19 +-
 src/abpoa_output.c     |  49 ++---
 src/abpoa_seed.c       |  60 +++---
 src/simd_abpoa_align.c | 451 +++++++++++++++++++++++------------------
 9 files changed, 350 insertions(+), 268 deletions(-)
 create mode 100644 TODO.md

diff --git a/Makefile b/Makefile
index 0b6df3f..2c5f8f0 100644
--- a/Makefile
+++ b/Makefile
@@ -4,20 +4,28 @@ default: abpoa
 #CC          = gcc
 OS          := $(shell uname)
 ARCH        := $(shell arch)
-EXTRA_FLAGS = -Wno-unused-function -Wno-misleading-indentation -DUSE_SIMDE -DSIMDE_ENABLE_NATIVE_ALIASES
-CFLAGS      = -Wall -O3 $(EXTRA_FLAGS)
+# add -fno-tree-vectorize to avoid certain vectorization errors in O3 optimization
+# right now, we are using -O3 for the best performance, and no vectorization errors were found
+EXTRA_FLAGS = -Wall -Wno-unused-function -Wno-misleading-indentation -DUSE_SIMDE -DSIMDE_ENABLE_NATIVE_ALIASES # -fno-tree-vectorize
 
+DFLAGS	  =
 # for debug
 ifneq ($(debug),)
-	DFLAGS   =   -D __DEBUG__
+	DFLAGS  +=   -D __DEBUG__
 endif
+ifneq ($(sdebug),)
+	DFLAGS  +=   -D __SIMD_DEBUG__
+endif
+
 # for gdb
 ifneq ($(gdb),)
-	CFLAGS   = -Wall -g ${DFLAGS} $(EXTRA_FLAGS)
+	OPT_FLAGS = -g
 else
-	CFLAGS   = -Wall -O3 ${DFLAGS} $(EXTRA_FLAGS)
+	OPT_FLAGS = -O3
 endif
 
+CFLAGS        = OPT_FLAGS ${DFLAGS} $(EXTRA_FLAGS)
+
 # for gprof
 ifneq ($(pg),)
 	PG_FLAG  =   -pg
diff --git a/TODO.md b/TODO.md
new file mode 100644
index 0000000..3c7e96d
--- /dev/null
+++ b/TODO.md
@@ -0,0 +1,9 @@
+# To test after commit
+1. run through
+   1. SSE/AVX2/AVX512
+   2. 16/32bits
+   3. global/local
+   4. lg/ag/cg
+2. same result
+   1. SSE vs AVX2 vs AVX512
+   2. 16bits vs 32 bits
\ No newline at end of file
diff --git a/include/abpoa.h b/include/abpoa.h
index 87bbdcf..ae9a6b7 100644
--- a/include/abpoa.h
+++ b/include/abpoa.h
@@ -38,6 +38,11 @@
 #define ABPOA_HB 0
 #define ABPOA_HC 1
 
+#define ABPOA_NONE_VERBOSE 0
+#define ABPOA_INFO_VERBOSE 1
+#define ABPOA_DEBUG_VERBOSE 2
+#define ABPOA_LONG_DEBUG_VERBOSE 3
+
 // NOTE: upper boundary of in_edge_n is pow(2,30)
 // for MATCH/MISMATCH: node_id << 34  | query_id << 4 | op
 // for INSERTION:      query_id << 34 | op_len << 4   | op
diff --git a/src/abpoa.c b/src/abpoa.c
index 64105c1..031ee4b 100644
--- a/src/abpoa.c
+++ b/src/abpoa.c
@@ -54,6 +54,7 @@ const struct option abpoa_long_opt [] = {
 
     { "help", 0, NULL, 'h' },
     { "version", 0, NULL, 'v' },
+    { "verbose", 1, NULL, 'V'},
 
     { 0, 0, 0, 0}
 };
@@ -122,6 +123,7 @@ int abpoa_usage(void)
 
     err_printf("    -h --help               print this help usage information\n");
     err_printf("    -v --version            show version number\n");
+    err_printf("    -V --verbose     INT    verbose level (0-2). 0: none, 1: information, 2: debug [0]\n");
 
 
     err_printf("\n");
diff --git a/src/abpoa.h b/src/abpoa.h
index 87bbdcf..ae9a6b7 100644
--- a/src/abpoa.h
+++ b/src/abpoa.h
@@ -38,6 +38,11 @@
 #define ABPOA_HB 0
 #define ABPOA_HC 1
 
+#define ABPOA_NONE_VERBOSE 0
+#define ABPOA_INFO_VERBOSE 1
+#define ABPOA_DEBUG_VERBOSE 2
+#define ABPOA_LONG_DEBUG_VERBOSE 3
+
 // NOTE: upper boundary of in_edge_n is pow(2,30)
 // for MATCH/MISMATCH: node_id << 34  | query_id << 4 | op
 // for INSERTION:      query_id << 34 | op_len << 4   | op
diff --git a/src/abpoa_align.c b/src/abpoa_align.c
index 7eca502..3829392 100644
--- a/src/abpoa_align.c
+++ b/src/abpoa_align.c
@@ -133,7 +133,7 @@ abpoa_para_t *abpoa_init_para(void) {
     abpt->min_w = ABPOA_MIN_POA_WIN;
     abpt->progressive_poa = 0; // progressive partial order alignment
 
-    abpt->verbose = 0;
+    abpt->verbose = ABPOA_NONE_VERBOSE;
 
     // abpt->simd_flag = simd_check();
 
@@ -199,9 +199,7 @@ int abpoa_anchor_poa(abpoa_t *ab, abpoa_para_t *abpt, uint8_t **seqs, int **weig
     // uint8_t *seq1;
     for (_i = 0; _i < n_seq; ++_i) {
         i = read_id_map[_i]; read_id = exist_n_seq + i; qlen = seq_lens[i]; whole_res.n_cigar = 0, whole_res.m_cigar = 0, whole_res.graph_cigar = 0;
-#ifdef __DEBUG__
-        fprintf(stderr, "seq: # %d\n", i);
-#endif
+        if (abpt->verbose >= ABPOA_DEBUG_VERBOSE) fprintf(stderr, "seq: # %d\n", i);
         // seq-to-graph alignment and add alignment within each split window
         if (_i == 0) ai = 0; else ai = par_c[_i-1];
 
@@ -252,10 +250,7 @@ int abpoa_anchor_poa(abpoa_t *ab, abpoa_para_t *abpt, uint8_t **seqs, int **weig
         for (; ai < par_c[_i]; ++ai) {
             end_tpos = ((par_anchors.a[ai] >> 32) & 0x7fffffff) - k + 1; end_id = tpos_to_node_id[end_tpos];
             end_qpos = (uint32_t)par_anchors.a[ai] - k + 1;
-
-#ifdef __DEBUG__
-            fprintf(stderr, "\tanchor: t: %d (id: %d), q: %d\n", end_tpos, end_id, end_qpos);
-#endif
+            if (abpt->verbose >= ABPOA_DEBUG_VERBOSE) fprintf(stderr, "\tanchor: t: %d (id: %d), q: %d\n", end_tpos, end_id, end_qpos);
 
             res.graph_cigar = 0; res.n_cigar = 0;
             abpoa_align_sequence_to_subgraph(ab, abpt, beg_id, end_id, qseq+beg_qpos, end_qpos-beg_qpos, &res);
@@ -277,9 +272,7 @@ int abpoa_anchor_poa(abpoa_t *ab, abpoa_para_t *abpt, uint8_t **seqs, int **weig
         }
         end_id = ABPOA_SINK_NODE_ID; end_qpos = seq_lens[i];
 
-#ifdef __DEBUG__
-            fprintf(stderr, "\tanchor: t: %d (id: %d), q: %d\n", end_tpos, end_id, end_qpos);
-#endif
+        if (abpt->verbose >= ABPOA_DEBUG_VERBOSE) fprintf(stderr, "\tanchor: t: %d (id: %d), q: %d\n", end_tpos, end_id, end_qpos);
         res.graph_cigar = 0; res.n_cigar = 0;
         abpoa_align_sequence_to_subgraph(ab, abpt, beg_id, end_id, qseq+beg_qpos, end_qpos-beg_qpos, &res);
         abpoa_push_whole_cigar(&whole_res.n_cigar, &whole_res.m_cigar, &whole_res.graph_cigar, res.n_cigar, res.graph_cigar);
@@ -307,9 +300,7 @@ int abpoa_poa(abpoa_t *ab, abpoa_para_t *abpt, uint8_t **seqs, int **weights, in
     // uint8_t *seq1;
     for (i = 0; i < n_seq; ++i) {
         qlen = seq_lens[i]; qseq = seqs[i]; weight = weights[i]; read_id = exist_n_seq + i;
-#ifdef __DEBUG__
-        fprintf(stderr, "seq: # %d\n", i);
-#endif
+        if (abpt->verbose >= ABPOA_DEBUG_VERBOSE) fprintf(stderr, "seq: # %d\n", i);
         res.graph_cigar = 0; res.n_cigar = 0;
         if (abpoa_align_sequence_to_graph(ab, abpt, qseq, qlen, &res) >= 0) {
             if (abpt->amb_strand && (res.best_score < MIN_OF_TWO(qlen, ab->abg->node_n-2) * abpt->max_mat * .3333)) { // TODO .3333
diff --git a/src/abpoa_output.c b/src/abpoa_output.c
index 697962d..72c1192 100644
--- a/src/abpoa_output.c
+++ b/src/abpoa_output.c
@@ -728,7 +728,7 @@ int reassign_hap(int **clu_haps, int *clu_size, uint64_t **clu_read_ids, int rea
 
 // read_weight is NOT used here, no matter use_qv is set or not.
 // collect minimized set of het bases
-int abpoa_set_het_row_column_ids_weight(abpoa_graph_t *abg, uint64_t ***read_ids, int *het_poss, int **rc_weight, int msa_l, int n_seq, int m, int min_w, int read_ids_n) {
+int abpoa_set_het_row_column_ids_weight(abpoa_graph_t *abg, uint64_t ***read_ids, int *het_poss, int **rc_weight, int msa_l, int n_seq, int m, int min_w, int read_ids_n, int verbose) {
     int i, j, k, n, rank;
     uint64_t b, one = 1, *whole_read_ids = (uint64_t*)_err_calloc(read_ids_n, sizeof(uint64_t));
     for (i = 0; i < n_seq; ++i) {
@@ -782,12 +782,12 @@ int abpoa_set_het_row_column_ids_weight(abpoa_graph_t *abg, uint64_t ***read_ids
             if (iden == 1) continue;
 
             het_poss[n_het_pos++] = rank;
-#ifdef __DEBUG__
-            fprintf(stderr, "%d\t", rank);
-            for (j = 0; j < m; ++j) {
-                fprintf(stderr, "%c: %d\t", "ACGT-"[j], rc_weight[rank][j]);
-            } fprintf(stderr, "\n");
-#endif
+            if (verbose >= ABPOA_LONG_DEBUG_VERBOSE) {
+                fprintf(stderr, "%d\t", rank);
+                for (j = 0; j < m; ++j) {
+                    fprintf(stderr, "%c: %d\t", "ACGT-"[j], rc_weight[rank][j]);
+                } fprintf(stderr, "\n");
+            }
         }
     }
     free(n_branch); free(node_map);
@@ -796,7 +796,8 @@ int abpoa_set_het_row_column_ids_weight(abpoa_graph_t *abg, uint64_t ***read_ids
 
 // group read into clusters based on all het bases
 // initial cluster size could be > max_n_cons
-int abpoa_collect_clu_hap_read_ids(int *het_poss, int n_het_pos, uint64_t ***read_ids, int read_ids_n, int n_seq, int m, int min_w, int max_n_cons, uint64_t ***clu_read_ids, int *_m_clu) {
+int abpoa_collect_clu_hap_read_ids(int *het_poss, int n_het_pos, uint64_t ***read_ids, int read_ids_n, 
+                                   int n_seq, int m, int min_w, int max_n_cons, uint64_t ***clu_read_ids, int *_m_clu, int verbose) {
     if (n_het_pos == 0) return 1;
     int i, j, k, n_clu = 0, m_clu = 2;
     int **clu_haps = (int**)_err_malloc(2 * sizeof(int*));
@@ -832,25 +833,25 @@ int abpoa_collect_clu_hap_read_ids(int *het_poss, int n_het_pos, uint64_t ***rea
         }
     }
     if (n_clu < 2) err_fatal(__func__, "# haplotypes: %d\n", n_clu);
-#ifdef __DEBUG__
-    fprintf(stderr, "n_clu: %d\n", n_clu);
-    for (i = 0; i < n_clu; ++i) {
-        for (j = 0; j < n_het_pos; ++j) {
-            fprintf(stderr, "%d\t", clu_haps[i][j]);
+    if (verbose >= ABPOA_LONG_DEBUG_VERBOSE) {
+        fprintf(stderr, "n_clu: %d\n", n_clu);
+        for (i = 0; i < n_clu; ++i) {
+            for (j = 0; j < n_het_pos; ++j) {
+                fprintf(stderr, "%d\t", clu_haps[i][j]);
+            }
+            fprintf(stderr, "\tsize: %d\n", clu_size[i]);
         }
-        fprintf(stderr, "\tsize: %d\n", clu_size[i]);
     }
-#endif
 
     // assign haplotype with reads < min_w to haplotype with reads >= min_w
     // keep at most _max_n_cons_ haps and read ids, weight need to >= min_w
     n_clu = reassign_hap(clu_haps, clu_size, *clu_read_ids, read_ids_n, n_clu, min_w, max_n_cons, n_het_pos);
-#ifdef __DEBUG__
-    fprintf(stderr, "After re-assign: n_clu: %d\n", n_clu);
-    for (i = 0; i < n_clu; ++i) {
-        fprintf(stderr, "%d:\tsize: %d\n", i, clu_size[i]);
+    if (verbose >= ABPOA_LONG_DEBUG_VERBOSE) {
+        fprintf(stderr, "After re-assign: n_clu: %d\n", n_clu);
+        for (i = 0; i < n_clu; ++i) {
+            fprintf(stderr, "%d:\tsize: %d\n", i, clu_size[i]);
+        }
     }
-#endif
     for (i = 0; i < m_clu; ++i) free(clu_haps[i]); free(clu_haps); free(clu_size);
     *_m_clu = m_clu;
     return n_clu;
@@ -858,7 +859,7 @@ int abpoa_collect_clu_hap_read_ids(int *het_poss, int n_het_pos, uint64_t ***rea
 
 // read_weight is NOT used here
 // cluster reads into _n_clu_ groups based on heterogeneous bases
-int abpoa_multip_read_clu(abpoa_graph_t *abg, int src_id, int sink_id, int n_seq, int m, int max_n_cons, double min_freq, uint64_t ***clu_read_ids, int *_m_clu) {
+int abpoa_multip_read_clu(abpoa_graph_t *abg, int src_id, int sink_id, int n_seq, int m, int max_n_cons, double min_freq, uint64_t ***clu_read_ids, int *_m_clu, int verbose) {
     abpoa_set_msa_rank(abg, src_id, sink_id);
     int i, j, n_clu, m_clu, read_ids_n = (n_seq-1)/64+1;
     int msa_l = abg->node_id_to_msa_rank[sink_id]-1, min_w = MAX_OF_TWO(1, n_seq * min_freq); // TODO fastq-qual weight
@@ -878,11 +879,11 @@ int abpoa_multip_read_clu(abpoa_graph_t *abg, int src_id, int sink_id, int n_seq
     } 
     // find min set of het nodes
     int *het_poss = (int*)_err_calloc(msa_l, sizeof(int));
-    int n_het_pos = abpoa_set_het_row_column_ids_weight(abg, read_ids, het_poss, rc_weight, msa_l, n_seq, m, min_w, read_ids_n);
+    int n_het_pos = abpoa_set_het_row_column_ids_weight(abg, read_ids, het_poss, rc_weight, msa_l, n_seq, m, min_w, read_ids_n, verbose);
     
     if (n_het_pos < 1) n_clu = 1;
     // collect at most _max_n_cons_ haplotypes and corresponding read ids
-    else n_clu = abpoa_collect_clu_hap_read_ids(het_poss, n_het_pos, read_ids, read_ids_n, n_seq, m, min_w, max_n_cons, clu_read_ids, &m_clu);
+    else n_clu = abpoa_collect_clu_hap_read_ids(het_poss, n_het_pos, read_ids, read_ids_n, n_seq, m, min_w, max_n_cons, clu_read_ids, &m_clu, verbose);
 
     for (i = 0; i < msa_l; ++i) {
         for (j = 0; j < m; ++j) free(read_ids[i][j]);
@@ -906,7 +907,7 @@ void abpoa_generate_consensus(abpoa_t *ab, abpoa_para_t *abpt) {
     int n_clu, m_clu, n_seq = ab->abs->n_seq; uint64_t **clu_read_ids;
     int read_ids_n = (n_seq-1)/64+1;
 
-    if (abpt->max_n_cons > 1) n_clu = abpoa_multip_read_clu(abg, ABPOA_SRC_NODE_ID, ABPOA_SINK_NODE_ID, n_seq, abpt->m, abpt->max_n_cons, abpt->min_freq, &clu_read_ids, &m_clu);
+    if (abpt->max_n_cons > 1) n_clu = abpoa_multip_read_clu(abg, ABPOA_SRC_NODE_ID, ABPOA_SINK_NODE_ID, n_seq, abpt->m, abpt->max_n_cons, abpt->min_freq, &clu_read_ids, &m_clu, abpt->verbose);
     else n_clu = 1;
 
     abpoa_cons_t *abc = ab->abc;
diff --git a/src/abpoa_seed.c b/src/abpoa_seed.c
index 13cbb05..277413e 100644
--- a/src/abpoa_seed.c
+++ b/src/abpoa_seed.c
@@ -231,7 +231,7 @@ void mm_aa_sketch(void *km, const uint8_t *str, int len, int w, int k, uint32_t
 int abpoa_build_guide_tree(abpoa_para_t *abpt, int n_seq, ab_u128_v *mm, int *tree_id_map) {
     if (mm->n == 0) return 0;
 
-    if (abpt->verbose > 0) fprintf(stderr, "[%s] Building progressive guide tree ... ", __func__);
+    if (abpt->verbose >= ABPOA_INFO_VERBOSE) fprintf(stderr, "[%s] Building progressive guide tree ... ", __func__);
     size_t i, _i, j; int rid1, rid2;                                          // mm_hit_n: mimizer hits between each two sequences
                                                                               // 0: 0
                                                                               // 1: 0 1
@@ -319,7 +319,7 @@ int abpoa_build_guide_tree(abpoa_para_t *abpt, int n_seq, ab_u128_v *mm, int *tr
     }
 
     free(mm_hit_n); free(jac_sim);
-    if (abpt->verbose > 0) fprintf(stderr, "done!\n");
+    if (abpt->verbose >= ABPOA_INFO_VERBOSE) fprintf(stderr, "done!\n");
     return 0;
 }
 
@@ -382,7 +382,9 @@ int get_local_chain_score(int j_end_tpos, int j_end_qpos, int i_end_anchor_i, ab
 // local chains:
 //   x: strand | end_tpos | end_qpos
 //   y: end_anchor_i | start_anchor_i
-int abpoa_dp_chaining_of_local_chains(void *km, ab_u128_t *local_chains, int n_local_chains, ab_u64_v *anchors, int *score, int *pre_id, ab_u64_v *par_anchors, int min_w, int tlen, int qlen) {
+int abpoa_dp_chaining_of_local_chains(void *km, ab_u128_t *local_chains, int n_local_chains, 
+                                      ab_u64_v *anchors, int *score, int *pre_id, ab_u64_v *par_anchors, 
+                                      int min_w, int tlen, int qlen, int verbose) {
     int i, j, st, score1, global_max_score=INT32_MIN, global_max_i=-1;
     int *chain_score = (int*)kmalloc(km, n_local_chains * 4), *pre_chain_id = (int*)kmalloc(km, n_local_chains * 4);
     size_t _n = par_anchors->n;
@@ -452,13 +454,13 @@ int abpoa_dp_chaining_of_local_chains(void *km, ab_u128_t *local_chains, int n_l
         par_anchors->a[par_anchors->n-i-1] = tmp;
     }
 
-#ifdef __DEBUG__
-    for (i = _n; i < par_anchors->n; ++i) {
-        uint64_t ia = par_anchors->a[i];
-        // strand, rpos, qpos
-        fprintf(stderr, "%c\t%ld\t%d\n", "+-"[ia >> 63], (ia>>32) & 0x7fffffff, ((uint32_t)ia));
+    if (verbose >= ABPOA_LONG_DEBUG_VERBOSE) {
+        for (i = _n; i < par_anchors->n; ++i) {
+            uint64_t ia = par_anchors->a[i];
+            // strand, rpos, qpos
+            fprintf(stderr, "%c\t%ld\t%d\n", "+-"[ia >> 63], (ia>>32) & 0x7fffffff, ((uint32_t)ia));
+        }
     }
-#endif
     kfree(km, chain_score), kfree(km, pre_chain_id);
     return 0;
 }
@@ -482,7 +484,7 @@ static int get_chain_score(int max_bw, int *score, int i_qpos, int i_tpos, int j
 // Dynamic Programming-based Chaining for global alignment mode
 // anchors:
 //          strand<<63 | tpos<<32 | qpos
-int abpoa_dp_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, abpoa_para_t *abpt, int tlen, int qlen) {
+int abpoa_dp_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, abpoa_para_t *abpt, int tlen, int qlen, int verbose) {
     int i, j, st, n_a = anchors->n;
     int *score = (int32_t*)kmalloc(km, n_a * 4), *pre_id = (int32_t*)kmalloc(km, n_a * 4), *end_pos = (int32_t*)kmalloc(km, n_a * 4);
     memset(end_pos, 0, n_a * 4);
@@ -516,9 +518,7 @@ int abpoa_dp_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, abpoa_
 
             if (pre_id[j] >= 0) end_pos[pre_id[j]] = i;
         }
-#ifdef __DEBUG__
-        fprintf(stderr, "%d pre_id: %d, score: %d, tpos: %d, qpos: %d\n", i, max_j, max_score, i_tpos, i_qpos);
-#endif
+        if (verbose >= ABPOA_LONG_DEBUG_VERBOSE) fprintf(stderr, "%d pre_id: %d, score: %d, tpos: %d, qpos: %d\n", i, max_j, max_score, i_tpos, i_qpos);
         score[i] = max_score, pre_id[i] = max_j;
     }
 
@@ -570,7 +570,8 @@ int abpoa_dp_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, abpoa_
     }
 
     radix_sort_ab_128x(local_chains+tot_chain_i+1, local_chains + n_local_chains);
-    abpoa_dp_chaining_of_local_chains(km, local_chains+tot_chain_i+1, n_local_chains-1-tot_chain_i, anchors, score, pre_id, par_anchors, min_w, tlen, qlen);
+    abpoa_dp_chaining_of_local_chains(km, local_chains+tot_chain_i+1, n_local_chains-1-tot_chain_i, 
+                                      anchors, score, pre_id, par_anchors, min_w, tlen, qlen, abpt->verbose);
 
     kfree(km, score); kfree(km, pre_id); kfree(km, end_pos); kfree(km, local_chains);
     return 0;
@@ -630,7 +631,7 @@ int LIS(void *km, int tot_n, uint64_t *rank, int n) {
 // output:
 //   anchor list size: n
 //   list of anchors: anchors
-int LIS_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, int min_w) {
+int LIS_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, int min_w, int verbose) {
     size_t i, j, n_a = anchors->n, n_for, n_rev;
     uint64_t *for_rank = (uint64_t*)kmalloc(km, sizeof(uint64_t) * n_a);
     uint64_t *rev_rank = (uint64_t*)kmalloc(km, sizeof(uint64_t) * n_a);
@@ -663,10 +664,8 @@ int LIS_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, int min_w)
         n = n_rev; rank = rev_rank; kfree(km, for_rank);
     }
     // filter anchors
-    int last_tpos = -1, last_qpos = -1, cur_tpos, cur_qpos;
-#ifdef __DEBUG__
-    size_t _n = par_anchors->n;
-#endif
+    int last_tpos = -1, last_qpos = -1, cur_tpos, cur_qpos; size_t _n = 0;
+    if (verbose >= ABPOA_LONG_DEBUG_VERBOSE) _n = par_anchors->n;
     for (i = 0; i < n; ++i) {
         j = (int)rank[i]-1;
         cur_tpos = (anchors->a[j] >> 32) & 0x7fffffff;
@@ -677,18 +676,19 @@ int LIS_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, int min_w)
         kv_push(uint64_t, 0, *par_anchors, anchors->a[j]); // store LIS anchors into par_anchors
         last_tpos = cur_tpos; last_qpos = cur_qpos;
     }
-#ifdef __DEBUG__
-    for (i = _n; i < par_anchors->n; ++i) {
-        uint64_t ia = par_anchors->a[i];
-        // strand, rpos, qpos
-        fprintf(stderr, "%c\t%ld\t%d\n", "+-"[ia >> 63], (ia>>32) & 0x7fffffff, ((uint32_t)ia));
+
+    if (verbose >= ABPOA_LONG_DEBUG_VERBOSE) {
+        for (i = _n; i < par_anchors->n; ++i) {
+            uint64_t ia = par_anchors->a[i];
+            // strand, rpos, qpos
+            fprintf(stderr, "%c\t%ld\t%d\n", "+-"[ia >> 63], (ia>>32) & 0x7fffffff, ((uint32_t)ia));
+        }
     }
-#endif
     return 0;
 }
 
 int abpoa_collect_mm(void *km, uint8_t **seqs, int *seq_lens, int n_seq, abpoa_para_t *abpt, ab_u128_v *mm, int *mm_c) {
-    if (abpt->verbose > 0) fprintf(stderr, "[%s] Collecting minimizers ... ", __func__);
+    if (abpt->verbose >= ABPOA_INFO_VERBOSE) fprintf(stderr, "[%s] Collecting minimizers ... ", __func__);
     int i;
     mm_c[0] = 0;
     for (i = 0; i < n_seq; ++i) { // collect minimizers
@@ -696,7 +696,7 @@ int abpoa_collect_mm(void *km, uint8_t **seqs, int *seq_lens, int n_seq, abpoa_p
         else mm_sketch(km, seqs[i], seq_lens[i], abpt->w, abpt->k, i, 0, abpt->amb_strand, mm);
         mm_c[i+1] = mm->n;
     }
-    if (abpt->verbose > 0) fprintf(stderr, "done!\n");
+    if (abpt->verbose >= ABPOA_INFO_VERBOSE) fprintf(stderr, "done!\n");
     return mm->n;
 }
 
@@ -731,11 +731,9 @@ int abpoa_build_guide_tree_partition(uint8_t **seqs, int *seq_lens, int n_seq, a
         // collect minimizer hit anchors between t and q
         collect_anchors1(km, &anchors, mm1, mm_c, tid, qid, seq_lens[qid], abpt->k);
         // filtering and only keep LIS anchors
-#ifdef __DEBUG__
-        fprintf(stderr, "%d vs %d (tot_n: %ld)\n", tid, qid, anchors.n);
-#endif
+        if (abpt->verbose >= ABPOA_LONG_DEBUG_VERBOSE) fprintf(stderr, "%d vs %d (tot_n: %ld)\n", tid, qid, anchors.n);
         // alignment mode: different chaining result for global/local/extend
-        abpoa_dp_chaining(km, &anchors, par_anchors, abpt, seq_lens[tid], seq_lens[qid]);
+        abpoa_dp_chaining(km, &anchors, par_anchors, abpt, seq_lens[tid], seq_lens[qid], abpt->verbose);
         par_c[i] = par_anchors->n;
         kfree(km, anchors.a);
     }
diff --git a/src/simd_abpoa_align.c b/src/simd_abpoa_align.c
index 9c2363c..6362ec1 100644
--- a/src/simd_abpoa_align.c
+++ b/src/simd_abpoa_align.c
@@ -35,6 +35,16 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
 #define SIMDTotalBytes 16
 #endif
 
+void simd_output_pre_nodes(int *pre_index, int pre_n, int dp_i, int dp_j, int cur_op, int verbose) {
+    if (verbose >= ABPOA_LONG_DEBUG_VERBOSE) {
+        int k;
+        fprintf(stderr, "%d, %d, %d\t", dp_i, dp_j, cur_op);
+        for (k = 0; k < pre_n; ++k) { /* print all pre node for node i*/
+            fprintf(stderr, "%d, ", pre_index[k]);
+        } fprintf(stderr, "\n");
+    }
+}
+
 #define print_simd(s, str, score_t) {                                \
     int _i; score_t *_a = (score_t*)(s);                             \
     fprintf(stderr, "%s\t", str);                                    \
@@ -43,7 +53,6 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
     } fprintf(stderr, "\n");                                         \
 }
 
-
 #define simd_abpoa_print_lg_matrix(score_t, beg_index, end_index) { \
     for (j = 0; j < end_index-beg_index; ++j) {                     \
         fprintf(stderr, "index: %d\t", j);                          \
@@ -56,7 +65,7 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
 }
 
 #define simd_abpoa_print_ag_matrix(score_t, beg_index, end_index) {  \
-    for (j = beg_index; j < end_index; ++j) {                        \
+    for (j = 0; j < end_index-beg_index; ++j) {                      \
         fprintf(stderr, "index: %d\t", j);                           \
         dp_h = DP_HEF + j * 3 * dp_sn; dp_e1 = dp_h + dp_sn;         \
         _dp_h = (score_t*)dp_h, _dp_e1 = (score_t*)dp_e1;            \
@@ -70,7 +79,7 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
     score_t *_dp_h = (score_t*)dp_h, *_dp_e1 = (score_t*)dp_e1;                                          \
     score_t *_dp_e2 = (score_t*)dp_e2, *_dp_f1 = (score_t*)dp_f1, *_dp_f2 = (score_t*)dp_f2;             \
     fprintf(stderr, "%s\tindex: %d\t", str, index_i);                                                    \
-    for (i = dp_beg[index_i]; i <= (dp_end[index_i]/16+1)*16-1; ++i) {                                   \
+    for (i = dp_beg[index_i]; i <= dp_end[index_i]; ++i) {                                               \
         fprintf(stderr, "%d:(%d,%d,%d,%d,%d)\t", i, _dp_h[i], _dp_e1[i],_dp_e2[i], _dp_f1[i],_dp_f2[i]); \
     } fprintf(stderr, "\n");                                                                             \
 }
@@ -169,7 +178,7 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
             }                                                                                               \
         }                                                                                                   \
         if (hit == 0) err_fatal_simple("Error in lg_backtrack.");                                           \
-     /* fprintf(stderr, "%d, %d, (%d)\n", i, j, indel_first); */                                            \
+        simd_output_pre_nodes(pre_index[i], pre_n[i], i, j, 0, abpt->verbose);                              \
     }                                                                                                       \
     if (j > 0) cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CINS, j, -1, j-1);                         \
     /* reverse cigar */                                                                                     \
@@ -273,7 +282,7 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
             }                                                                                               \
         }                                                                                                   \
         if (hit == 0) err_fatal_simple("Error in ag_backtrack.");                                           \
-     /* fprintf(stderr, "%d, %d, %d (indel_first: %d)\n", i, j, cur_op, indel_first); */                    \
+        simd_output_pre_nodes(pre_index[i], pre_n[i], i, j, cur_op, abpt->verbose);                         \
     }                                                                                                       \
     if (j > 0) cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CINS, j, -1, j-1);                         \
     /* reverse cigar */                                                                                     \
@@ -290,7 +299,7 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
     abpoa_cigar_t *cigar = 0;                                                                               \
     i = best_i, j = best_j, _start_i = best_i, _start_j = best_j;                                           \
     id = abpoa_graph_index_to_node_id(graph, i+beg_index);                                                  \
-    if (best_j < qlen) cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CINS, qlen-j, -1, qlen-1);         \
+    if (best_j < qlen) cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CINS, qlen-best_j, -1, qlen-1);    \
     SIMDi *dp_h = DP_H2E2F + dp_sn * i * 5; _dp_h = (score_t*)dp_h;                                         \
     int indel_first = 1; /* prefer to keep gaps at the end */                                               \
     while (i > 0 && j > 0) {                                                                                \
@@ -305,24 +314,25 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
                 if (j-1 < dp_beg[pre_i] || j-1 > dp_end[pre_i]) continue;                                   \
                 _pre_dp_h = (score_t*)(DP_H2E2F + dp_sn * pre_i * 5);                                       \
                 if (_pre_dp_h[j-1] + s == _dp_h[j]) {                                                       \
-                    cur_op = ABPOA_ALL_OP; hit = 1;                                                         \
                     cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CMATCH, 1, id, j-1);                  \
                     i = pre_i; --j; id = abpoa_graph_index_to_node_id(graph, i+beg_index); hit = 1;         \
                     dp_h = DP_H2E2F + dp_sn * i * 5; _dp_h = (score_t*)dp_h;                                \
+                    cur_op = ABPOA_ALL_OP;                                                                  \
                     ++res->n_aln_bases; res->n_matched_bases += is_match ? 1 : 0;                           \
                     break;                                                                                  \
                 }                                                                                           \
             }                                                                                               \
         }                                                                                                   \
         if (hit == 0 && cur_op & ABPOA_E_OP) { /* deletion */                                               \
+            _dp_e1 = (score_t*)(dp_h + dp_sn); _dp_e2 = (score_t*)(dp_h + dp_sn * 2);                       \
             for (k = 0; k < pre_n[i]; ++k) {                                                                \
                 pre_i = pre_index_i[k];                                                                     \
                 if (j < dp_beg[pre_i] || j > dp_end[pre_i]) continue;                                       \
+                _pre_dp_h = (score_t*)(DP_H2E2F + dp_sn * pre_i * 5);                                       \
                 if (cur_op & ABPOA_E1_OP) {                                                                 \
                     _pre_dp_e1 = (score_t*)(DP_H2E2F + dp_sn * (pre_i * 5 + 1));                            \
                     if (cur_op & ABPOA_M_OP) {                                                              \
                         if (_dp_h[j] == _pre_dp_e1[j]) {                                                    \
-                            _pre_dp_h = (score_t*)(DP_H2E2F + dp_sn * pre_i * 5);                           \
                             if (_pre_dp_h[j] - gap_oe1 == _pre_dp_e1[j]) cur_op = ABPOA_M_OP | ABPOA_F_OP;  \
                             else cur_op = ABPOA_E1_OP;                                                      \
                             hit = 1; cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CDEL, 1, id, j-1);   \
@@ -331,9 +341,7 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
                             break;                                                                          \
                         }                                                                                   \
                     } else {                                                                                \
-                        _dp_e1 = (score_t*)(dp_h + dp_sn);                                                  \
                         if (_dp_e1[j] == _pre_dp_e1[j] - gap_ext1) {                                        \
-                            _pre_dp_h = (score_t*)(DP_H2E2F + dp_sn * pre_i * 5);                           \
                             if (_pre_dp_h[j] - gap_oe1 == _pre_dp_e1[j]) cur_op = ABPOA_M_OP | ABPOA_F_OP;  \
                             else cur_op = ABPOA_E1_OP;                                                      \
                             hit = 1; cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CDEL, 1, id, j-1);   \
@@ -347,7 +355,6 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
                     _pre_dp_e2 = (score_t*)(DP_H2E2F + dp_sn * (pre_i * 5 + 2));                            \
                     if (cur_op & ABPOA_M_OP) {                                                              \
                         if (_dp_h[j] == _pre_dp_e2[j]) {                                                    \
-                            _pre_dp_h = (score_t*)(DP_H2E2F + dp_sn * pre_i * 5);                           \
                             if (_pre_dp_h[j] - gap_oe2 == _pre_dp_e2[j]) cur_op = ABPOA_M_OP | ABPOA_F_OP;  \
                             else cur_op = ABPOA_E2_OP;                                                      \
                             hit = 1; cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CDEL, 1, id, j-1);   \
@@ -356,9 +363,7 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
                             break;                                                                          \
                         }                                                                                   \
                     } else {                                                                                \
-                        _dp_e2 = (score_t*)(dp_h + dp_sn * 2);                                              \
                         if (_dp_e2[j] == _pre_dp_e2[j] - gap_ext2) {                                        \
-                            _pre_dp_h = (score_t*)(DP_H2E2F + dp_sn * pre_i * 5);                           \
                             if (_pre_dp_h[j] - gap_oe2 == _pre_dp_e2[j]) cur_op = ABPOA_M_OP | ABPOA_F_OP;  \
                             else cur_op = ABPOA_E2_OP;                                                      \
                             hit = 1; cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CDEL, 1, id, j-1);   \
@@ -400,16 +405,16 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
                 ++res->n_aln_bases;                                                                         \
             }                                                                                               \
         }                                                                                                   \
-        if (hit == 0 && cur_op & ABPOA_M_OP && indel_first == 1) { /* match/mismatch */                     \
+        if (hit == 0 && (cur_op & ABPOA_M_OP) && (indel_first == 1)) { /* match/mismatch */                 \
             for (k = 0; k < pre_n[i]; ++k) {                                                                \
                 pre_i = pre_index_i[k];                                                                     \
                 if (j-1 < dp_beg[pre_i] || j-1 > dp_end[pre_i]) continue;                                   \
                 _pre_dp_h = (score_t*)(DP_H2E2F + dp_sn * pre_i * 5);                                       \
                 if (_pre_dp_h[j-1] + s == _dp_h[j]) {                                                       \
-                    cur_op = ABPOA_ALL_OP; hit = 1;                                                         \
                     cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CMATCH, 1, id, j-1);                  \
                     i = pre_i; --j; id = abpoa_graph_index_to_node_id(graph, i+beg_index); hit = 1;         \
                     dp_h = DP_H2E2F + dp_sn * i * 5; _dp_h = (score_t*)dp_h;                                \
+                    cur_op = ABPOA_ALL_OP;                                                                  \
                     ++res->n_aln_bases; res->n_matched_bases += is_match ? 1 : 0;                           \
                     indel_first = 0;                                                                        \
                     break;                                                                                  \
@@ -417,7 +422,7 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
             }                                                                                               \
         }                                                                                                   \
         if (hit == 0) err_fatal_simple("Error in cg_backtrack.");                                           \
-     /* fprintf(stderr, "%d, %d, %d\n", i, j, cur_op); */                                                   \
+        simd_output_pre_nodes(pre_index[i], pre_n[i], i, j, cur_op, abpt->verbose);                         \
     }                                                                                                       \
     if (j > 0) cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CINS, j, -1, j-1);                         \
     /* reverse cigar */                                                                                     \
@@ -434,16 +439,16 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
 #define simd_abpoa_var(score_t, sp, SIMDSetOne, SIMDShiftOneN)                                      \
     /* int tot_dp_sn = 0; */                                                                        \
     abpoa_graph_t *graph = ab->abg; abpoa_simd_matrix_t *abm = ab->abm;                             \
-    int matrix_row_n = end_index-beg_index+1, matrix_col_n = qlen + 1;                              \
+    int64_t matrix_row_n = end_index-beg_index+1, matrix_col_n = qlen + 1;                          \
     int **pre_index, *pre_n, _pre_index, _pre_n, pre_i;                                             \
     int i, j, k, *dp_beg, *dp_beg_sn, *dp_end, *dp_end_sn, node_id, index_i, dp_i;                  \
     int beg, end, beg_sn, end_sn, _beg_sn, _end_sn, pre_beg_sn, pre_end, sn_i;                      \
-    int pn, log_n, size, qp_sn, dp_sn; /* pn: value per SIMDi, qp_sn/dp_sn/d_sn: segmented length*/ \
+    int pn, log_n, size; int64_t qp_sn, dp_sn; /* pn: # value per SIMDi, qp_sn/dp_sn: segmented length*/ \
     SIMDi *dp_h, *pre_dp_h, *qp, *qi=NULL;                                                          \
     score_t *_dp_h=NULL, *_qi, best_score = sp.inf_min, inf_min = sp.inf_min;                       \
     int *mat = abpt->mat, m = abpt->m; score_t gap_ext1 = abpt->gap_ext1;                           \
     int w = abpt->wb < 0 ? qlen : abpt->wb+(int)(abpt->wf*qlen); /* when w < 0, do whole global */  \
-    int best_i = 0, best_j = 0, best_id = 0, max, max_i=-1;                                         \
+    int best_i = 0, best_j = 0, best_id = 0, max, left_max_i=-1, right_max_i=-1;                    \
     SIMDi zero = SIMDSetZeroi(), SIMD_INF_MIN = SIMDSetOne(inf_min);                                \
     pn = sp.num_of_value; qp_sn = dp_sn = (matrix_col_n + pn - 1) / pn;                             \
     log_n = sp.log_num, size = sp.size; qp = abm->s_mem;                                            \
@@ -564,7 +569,6 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
         dp_beg[0] = 0, dp_end[0] = qlen;                                                                \
     }                                                                                                   \
     dp_beg_sn[0] = (dp_beg[0])/pn; dp_end_sn[0] = (dp_end[0])/pn;                                       \
-    dp_beg[0] = dp_beg_sn[0] * pn; dp_end[0] = (dp_end_sn[0]+1)*pn-1;                                   \
     dp_h = DP_H; _end_sn = MIN_OF_TWO(dp_end_sn[0]+1, dp_sn-1);                                         \
 }
 
@@ -582,7 +586,6 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
         dp_beg[0] = 0, dp_end[0] = qlen;                                                                \
     }                                                                                                   \
     dp_beg_sn[0] = (dp_beg[0])/pn; dp_end_sn[0] = (dp_end[0])/pn;                                       \
-    dp_beg[0] = dp_beg_sn[0] * pn; dp_end[0] = (dp_end_sn[0]+1)*pn-1;                                   \
     dp_h = DP_HEF; dp_e1 = dp_h + dp_sn; dp_f1 = dp_e1 + dp_sn;                                         \
     _end_sn = MIN_OF_TWO(dp_end_sn[0]+1, dp_sn-1);                                                      \
 }
@@ -601,7 +604,6 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
         dp_beg[0] = 0, dp_end[0] = qlen;                                                                \
     }                                                                                                   \
     dp_beg_sn[0] = (dp_beg[0])/pn; dp_end_sn[0] = (dp_end[0])/pn;                                       \
-    dp_beg[0] = dp_beg_sn[0] * pn; dp_end[0] = (dp_end_sn[0]+1)*pn-1;                                   \
     dp_h = DP_H2E2F; dp_e1 = dp_h+dp_sn; dp_e2 = dp_e1+dp_sn; dp_f1 = dp_e2+dp_sn; dp_f2 = dp_f1+dp_sn; \
     _end_sn = MIN_OF_TWO(dp_end_sn[0]+1, dp_sn-1);                                                      \
 }
@@ -609,7 +611,7 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
 #define simd_abpoa_lg_first_dp(score_t) {                                   \
     simd_abpoa_lg_first_row;                                                \
     if (abpt->align_mode == ABPOA_LOCAL_MODE) {                             \
-        for (i = 0; i <= _end_sn; ++i)                                       \
+        for (i = 0; i <= _end_sn; ++i)                                      \
             dp_h[i] = zero;                                                 \
     } else {                                                                \
         for (i = 0; i <= _end_sn; ++i) {                                    \
@@ -625,7 +627,7 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
 #define simd_abpoa_ag_first_dp(score_t) {                                   \
     simd_abpoa_ag_first_row;                                                \
     if (abpt->align_mode == ABPOA_LOCAL_MODE) {                             \
-        for (i = 0; i <= _end_sn; ++i)                                       \
+        for (i = 0; i <= _end_sn; ++i)                                      \
             dp_h[i] = dp_e1[i] = dp_f1[i] = zero;                           \
     } else {                                                                \
         for (i = 0; i <= _end_sn; ++i) {                                    \
@@ -643,7 +645,7 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
 #define simd_abpoa_cg_first_dp(score_t) {                                             \
     simd_abpoa_cg_first_row;                                                          \
     if (abpt->align_mode == ABPOA_LOCAL_MODE) {                                       \
-        for (i = 0; i <= _end_sn; ++i)                                                 \
+        for (i = 0; i <= _end_sn; ++i)                                                \
             dp_h[i] = dp_e1[i] = dp_e2[i] = dp_f1[i] = dp_f2[i] = zero;               \
     } else {                                                                          \
         for (i = 0; i <= _end_sn; ++i) {                                              \
@@ -698,47 +700,48 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
     }                                                                                                                                                              \
 }
 
-#define simd_abpoa_lg_dp(score_t, SIMDShiftOneN, SIMDMax, SIMDAdd, SIMDSub) {                       \
-    node_id = abpoa_graph_index_to_node_id(graph, index_i);                                         \
-    SIMDi *q = qp + graph->node[node_id].base * qp_sn, first, remain;                               \
-    dp_h = &DP_H[dp_i * dp_sn]; _dp_h = (score_t*)dp_h;                                             \
-    int min_pre_beg_sn, max_pre_end_sn;                                                             \
-    if (abpt->wb < 0) {                                                                             \
-        beg = dp_beg[dp_i] = 0, end = dp_end[dp_i] = qlen;                                          \
-        beg_sn = dp_beg_sn[dp_i] = (dp_beg[dp_i])/pn; end_sn = dp_end_sn[dp_i] = (dp_end[dp_i])/pn; \
-        min_pre_beg_sn = 0, max_pre_end_sn = end_sn;                                                \
-    } else {                                                                                        \
-        beg = GET_AD_DP_BEGIN(graph, w, node_id, end_node_id, qlen), end = GET_AD_DP_END(graph, w, node_id, end_node_id, qlen); \
-        beg_sn = beg / pn; min_pre_beg_sn = INT32_MAX, max_pre_end_sn = -1;                         \
-        for (i = 0; i < pre_n[dp_i]; ++i) {                                                         \
-            pre_i = pre_index[dp_i][i];                                                             \
-            if (min_pre_beg_sn > dp_beg_sn[pre_i]) min_pre_beg_sn = dp_beg_sn[pre_i];               \
-            if (max_pre_end_sn < dp_end_sn[pre_i]) max_pre_end_sn = dp_end_sn[pre_i];               \
-        } if (beg_sn < min_pre_beg_sn) beg_sn = min_pre_beg_sn;                                     \
-        dp_beg_sn[dp_i] = beg_sn; beg = dp_beg[dp_i] = dp_beg_sn[dp_i] * pn;                        \
-        end_sn = dp_end_sn[dp_i] = end/pn; end = dp_end[dp_i] = (dp_end_sn[dp_i]+1)*pn-1;           \
-    }                                                                                               \
-    /* loop query */                                                                                                         \
-    /* first pre_node */                                                                                                     \
-    pre_i = pre_index[dp_i][0];                                                                                              \
-    pre_dp_h = DP_H + pre_i * dp_sn;                                                                                         \
-    pre_end = dp_end[pre_i];                                                                                                 \
-    pre_beg_sn = dp_beg_sn[pre_i];                                                                                           \
-    /* set M from (pre_i, q_i-1), E from (pre_i, q_i) */                                                                     \
-    if (abpt->align_mode == ABPOA_LOCAL_MODE) {                                                                              \
-        _beg_sn = 0, _end_sn = end_sn; first = SIMDShiftRight(zero, SIMDTotalBytes-SIMDShiftOneN);                           \
-    } else {                                                                                                                 \
-        if (pre_beg_sn < beg_sn) _beg_sn = beg_sn, first = SIMDShiftRight(pre_dp_h[beg_sn-1], SIMDTotalBytes-SIMDShiftOneN); \
-        else _beg_sn = pre_beg_sn, first = SIMDShiftRight(SIMD_INF_MIN, SIMDTotalBytes-SIMDShiftOneN);                       \
-        _end_sn = MIN_OF_THREE((pre_end+1)/pn, end_sn, dp_sn-1);                                                             \
-        for (i = beg_sn; i < _beg_sn; ++i) dp_h[i] = SIMD_INF_MIN;                                                           \
-        for (i = _end_sn+1; i <= MIN_OF_TWO(end_sn+1, dp_sn-1); ++i) dp_h[i] = SIMD_INF_MIN;                                 \
-    }                                                                                                                        \
-    for (sn_i = _beg_sn; sn_i <= _end_sn; ++sn_i) { /* SIMD parallelization */                                               \
-        remain = SIMDShiftLeft(pre_dp_h[sn_i], SIMDShiftOneN);                                                               \
-        dp_h[sn_i] = SIMDMax(SIMDAdd(SIMDOri(first, remain), q[sn_i]), SIMDSub(pre_dp_h[sn_i], GAP_E1));                     \
-        first = SIMDShiftRight(pre_dp_h[sn_i], SIMDTotalBytes-SIMDShiftOneN);                                                \
-    }                                                                                                                        \
+#define simd_abpoa_lg_dp(score_t, SIMDShiftOneN, SIMDMax, SIMDAdd, SIMDSub) {                                                     \
+    node_id = abpoa_graph_index_to_node_id(graph, index_i);                                                                       \
+    SIMDi *q = qp + graph->node[node_id].base * qp_sn, first, remain;                                                             \
+    dp_h = &DP_H[dp_i * dp_sn]; _dp_h = (score_t*)dp_h;                                                                           \
+    int min_pre_beg, min_pre_beg_sn, max_pre_end_sn;                                                                              \
+    if (abpt->wb < 0) {                                                                                                           \
+        beg = dp_beg[dp_i] = 0, end = dp_end[dp_i] = qlen;                                                                        \
+        beg_sn = dp_beg_sn[dp_i] = (dp_beg[dp_i])/pn; end_sn = dp_end_sn[dp_i] = (dp_end[dp_i])/pn;                               \
+        min_pre_beg_sn = 0, max_pre_end_sn = end_sn;                                                                              \
+    } else {                                                                                                                      \
+        beg = GET_AD_DP_BEGIN(graph, w, node_id, end_node_id, qlen), end = GET_AD_DP_END(graph, w, node_id, end_node_id, qlen);   \
+        beg_sn = beg / pn; min_pre_beg = min_pre_beg_sn = INT32_MAX, max_pre_end_sn = -1;                                         \
+        for (i = 0; i < pre_n[dp_i]; ++i) {                                                                                       \
+            pre_i = pre_index[dp_i][i];                                                                                           \
+            if (min_pre_beg > dp_beg[pre_i]) min_pre_beg = dp_beg[pre_i], min_pre_beg_sn = dp_beg_sn[pre_i];                      \
+            if (max_pre_end_sn < dp_end_sn[pre_i]) max_pre_end_sn = dp_end_sn[pre_i];                                             \
+        }                                                                                                                         \
+        if (beg_sn < min_pre_beg_sn) {                                                                                            \
+            beg = min_pre_beg; beg_sn = min_pre_beg_sn;                                                                           \
+        }                                                                                                                         \
+        dp_beg_sn[dp_i] = beg_sn; dp_beg[dp_i] = beg; end_sn = dp_end_sn[dp_i] = end/pn; dp_end[dp_i] = end;                      \
+    }                                                                                                                             \
+    /* loop query */                                                                                                              \
+    /* first pre_node */                                                                                                          \
+    pre_i = pre_index[dp_i][0];                                                                                                   \
+    pre_dp_h = DP_H + pre_i * dp_sn;                                                                                              \
+    pre_end = dp_end[pre_i]; pre_beg_sn = dp_beg_sn[pre_i];                                                                       \
+    /* set M from (pre_i, q_i-1), E from (pre_i, q_i) */                                                                          \
+    if (abpt->align_mode == ABPOA_LOCAL_MODE) {                                                                                   \
+        _beg_sn = 0, _end_sn = end_sn; first = SIMDShiftRight(zero, SIMDTotalBytes-SIMDShiftOneN);                                \
+    } else {                                                                                                                      \
+        if (pre_beg_sn < beg_sn) _beg_sn = beg_sn, first = SIMDShiftRight(pre_dp_h[beg_sn-1], SIMDTotalBytes-SIMDShiftOneN);      \
+        else _beg_sn = pre_beg_sn, first = SIMDShiftRight(SIMD_INF_MIN, SIMDTotalBytes-SIMDShiftOneN);                            \
+        _end_sn = MIN_OF_THREE((pre_end+1)/pn, end_sn, dp_sn-1);                                                                  \
+        for (i = beg_sn; i < _beg_sn; ++i) dp_h[i] = SIMD_INF_MIN;                                                                \
+        for (i = _end_sn+1; i <= MIN_OF_TWO(end_sn+1, dp_sn-1); ++i) dp_h[i] = SIMD_INF_MIN;                                      \
+    }                                                                                                                             \
+    for (sn_i = _beg_sn; sn_i <= _end_sn; ++sn_i) { /* SIMD parallelization */                                                    \
+        remain = SIMDShiftLeft(pre_dp_h[sn_i], SIMDShiftOneN);                                                                    \
+        dp_h[sn_i] = SIMDMax(SIMDAdd(SIMDOri(first, remain), q[sn_i]), SIMDSub(pre_dp_h[sn_i], GAP_E1));                          \
+        first = SIMDShiftRight(pre_dp_h[sn_i], SIMDTotalBytes-SIMDShiftOneN);                                                     \
+    }                                                                                                                             \
     /* get max m and e */                                                                                                         \
     for (i = 1; i < pre_n[dp_i]; ++i) {                                                                                           \
         pre_i = pre_index[dp_i][i];                                                                                               \
@@ -759,23 +762,25 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
             first = SIMDShiftRight(pre_dp_h[sn_i], SIMDTotalBytes-SIMDShiftOneN);                                                 \
         } /* now we have max(h,e) stored at dp_h */                                                                               \
     }                                                                                                                             \
-    /* new F start */                                                                                                                  \
-    first = SIMDOri(SIMDAndi(dp_h[beg_sn], PRE_MASK[0]), SUF_MIN[0]);                                                                  \
-    for (sn_i = beg_sn; sn_i <= end_sn; ++sn_i) {                                                                                      \
-        if (abpt->align_mode == ABPOA_LOCAL_MODE) {                                                                                    \
-            set_num = pn;                                                                                                              \
-        } else {                                                                                                                       \
-            if (sn_i < min_pre_beg_sn) {                                                                                               \
-                _err_fatal_simple(__func__, "sn_i < min_pre_beg_sn\n");                                                                \
-            } else if (sn_i > max_pre_end_sn) {                                                                                        \
-                set_num = sn_i == max_pre_end_sn+1 ? 1 : 0;                                                                            \
-            } else set_num = pn;                                                                                                       \
-        }                                                                                                                              \
-        dp_h[sn_i] = SIMDMax(dp_h[sn_i], first);                                                                                       \
-        SIMD_SET_F(dp_h[sn_i], log_n, set_num, PRE_MIN, PRE_MASK, SUF_MIN, GAP_E1S, SIMDMax, SIMDAdd, SIMDSub, SIMDShiftOneN);         \
+    /* new F start */                                                                                                             \
+    for (i = beg_sn * pn; i < beg; ++i) _dp_h[i] = inf_min;                                                                       \
+    for (i = end+1; i < (end_sn+1)*pn; ++i) _dp_h[i] = inf_min;                                                                   \
+    first = SIMDOri(SIMDAndi(dp_h[beg_sn], PRE_MASK[0]), SUF_MIN[0]);                                                             \
+    for (sn_i = beg_sn; sn_i <= end_sn; ++sn_i) {                                                                                 \
+        if (abpt->align_mode == ABPOA_LOCAL_MODE) {                                                                               \
+            set_num = pn;                                                                                                         \
+        } else {                                                                                                                  \
+            if (sn_i < min_pre_beg_sn) {                                                                                          \
+                _err_fatal_simple(__func__, "sn_i < min_pre_beg_sn\n");                                                           \
+            } else if (sn_i > max_pre_end_sn) {                                                                                   \
+                set_num = sn_i == max_pre_end_sn+1 ? 1 : 0;                                                                       \
+            } else set_num = pn;                                                                                                  \
+        }                                                                                                                         \
+        dp_h[sn_i] = SIMDMax(dp_h[sn_i], first);                                                                                  \
+        SIMD_SET_F(dp_h[sn_i], log_n, set_num, PRE_MIN, PRE_MASK, SUF_MIN, GAP_E1S, SIMDMax, SIMDAdd, SIMDSub, SIMDShiftOneN);    \
         first = SIMDOri(SIMDAndi(SIMDShiftRight(SIMDSub(dp_h[sn_i], GAP_E1), SIMDTotalBytes-SIMDShiftOneN), PRE_MASK[0]), SUF_MIN[0]); \
-    }                                                                                                                                  \
-    if (abpt->align_mode == ABPOA_LOCAL_MODE) for (sn_i = 0; sn_i <= end_sn; ++sn_i) dp_h[sn_i] = SIMDMax(zero, dp_h[sn_i]);           \
+    }                                                                                                                             \
+    if (abpt->align_mode == ABPOA_LOCAL_MODE) for (sn_i = 0; sn_i <= end_sn; ++sn_i) dp_h[sn_i] = SIMDMax(zero, dp_h[sn_i]);      \
 }
 
 #define simd_abpoa_ag_dp(score_t, SIMDShiftOneN, SIMDMax, SIMDAdd, SIMDSub, SIMDGetIfGreater, SIMDSetIfGreater, SIMDSetIfEqual) { \
@@ -783,21 +788,23 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
     SIMDi *q = qp + graph->node[node_id].base * qp_sn, first, remain;                                                             \
     dp_h = DP_HEF + dp_i * 3 * dp_sn; dp_e1 = dp_h + dp_sn; dp_f1 = dp_e1 + dp_sn;                                                \
     _dp_h = (score_t*)dp_h, _dp_e1 = (score_t*)dp_e1, _dp_f1 = (score_t*)dp_f1;                                                   \
-    int min_pre_beg_sn, max_pre_end_sn;                                                                                           \
+    int min_pre_beg, min_pre_beg_sn, max_pre_end_sn;                                                                              \
     if (abpt->wb < 0) {                                                                                                           \
         beg = dp_beg[dp_i] = 0, end = dp_end[dp_i] = qlen;                                                                        \
         beg_sn = dp_beg_sn[dp_i] = (dp_beg[dp_i])/pn; end_sn = dp_end_sn[dp_i] = (dp_end[dp_i])/pn;                               \
         min_pre_beg_sn = 0, max_pre_end_sn = end_sn;                                                                              \
     } else {                                                                                                                      \
         beg = GET_AD_DP_BEGIN(graph, w, node_id, end_node_id, qlen), end = GET_AD_DP_END(graph, w, node_id, end_node_id, qlen);   \
-        beg_sn = beg / pn; min_pre_beg_sn = INT32_MAX, max_pre_end_sn = -1;                                                       \
+        beg_sn = beg / pn; min_pre_beg = min_pre_beg_sn = INT32_MAX, max_pre_end_sn = -1;                                         \
         for (i = 0; i < pre_n[dp_i]; ++i) {                                                                                       \
             pre_i = pre_index[dp_i][i];                                                                                           \
-            if (min_pre_beg_sn > dp_beg_sn[pre_i]) min_pre_beg_sn = dp_beg_sn[pre_i];                                             \
+            if (min_pre_beg > dp_beg[pre_i]) min_pre_beg = dp_beg[pre_i], min_pre_beg_sn = dp_beg_sn[pre_i];                      \
             if (max_pre_end_sn < dp_end_sn[pre_i]) max_pre_end_sn = dp_end_sn[pre_i];                                             \
-        } if (beg_sn < min_pre_beg_sn) beg_sn = min_pre_beg_sn;                                                                   \
-        dp_beg_sn[dp_i] = beg_sn; beg = dp_beg[dp_i] = dp_beg_sn[dp_i] * pn;                                                      \
-        end_sn = dp_end_sn[dp_i] = end/pn; end = dp_end[dp_i] = (dp_end_sn[dp_i]+1)*pn-1;                                         \
+        }                                                                                                                         \
+        if (beg_sn < min_pre_beg_sn) {                                                                                            \
+            beg_sn = min_pre_beg_sn; beg = min_pre_beg;                                                                           \
+        }                                                                                                                         \
+        dp_beg_sn[dp_i] = beg_sn; dp_beg[dp_i] = beg; end_sn = dp_end_sn[dp_i] = end/pn; dp_end[dp_i] = end;                      \
     }                                                                                                                             \
     /* loop query */                                                                                                              \
     /* first pre_node */                                                                                                          \
@@ -854,6 +861,8 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
     for (sn_i = beg_sn; sn_i <= end_sn; ++sn_i) { /* SIMD parallelization */                                                      \
         dp_h[sn_i] = SIMDAdd(dp_h[sn_i], q[sn_i]);                                                                                \
     }                                                                                                                             \
+    for (i = beg_sn * pn; i < beg; ++i) _dp_h[i] = _dp_e1[i] = inf_min;                                                           \
+    for (i = end+1; i < (end_sn+1)*pn; ++i) _dp_h[i] = _dp_e1[i] = inf_min;                                                       \
     /* new F start */                                                                                                             \
     first = SIMDShiftRight(SIMDShiftLeft(dp_h[beg_sn], SIMDTotalBytes-SIMDShiftOneN), SIMDTotalBytes-SIMDShiftOneN);              \
     for (sn_i = beg_sn; sn_i <= end_sn; ++sn_i) {                                                                                 \
@@ -889,25 +898,31 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
     SIMDi *q = qp + graph->node[node_id].base * qp_sn, first, remain;                                                             \
     dp_h = DP_H2E2F+dp_i*5*dp_sn; dp_e1 = dp_h+dp_sn; dp_e2 = dp_e1+dp_sn; dp_f1 = dp_e2+dp_sn; dp_f2 = dp_f1+dp_sn;              \
     _dp_h=(score_t*)dp_h, _dp_e1=(score_t*)dp_e1, _dp_e2=(score_t*)dp_e2, _dp_f1=(score_t*)dp_f1, _dp_f2=(score_t*)dp_f2;         \
-    int min_pre_beg_sn, max_pre_end_sn;                                                                                           \
+    int min_pre_beg, min_pre_beg_sn, max_pre_end_sn;                                                                              \
     if (abpt->wb < 0) {                                                                                                           \
         beg = dp_beg[dp_i] = 0, end = dp_end[dp_i] = qlen;                                                                        \
         beg_sn = dp_beg_sn[dp_i] = beg/pn; end_sn = dp_end_sn[dp_i] = end/pn;                                                     \
         min_pre_beg_sn = 0, max_pre_end_sn = end_sn;                                                                              \
     } else {                                                                                                                      \
         beg = GET_AD_DP_BEGIN(graph, w, node_id, end_node_id, qlen), end = GET_AD_DP_END(graph, w, node_id, end_node_id, qlen);   \
-        beg_sn = beg / pn; min_pre_beg_sn = INT32_MAX, max_pre_end_sn = -1;                                                       \
+        if (abpt->verbose >= ABPOA_LONG_DEBUG_VERBOSE)                                                                            \
+            fprintf(stderr, "index: %d (node: %d): raw beg: %d, end: %d\n", index_i-beg_index, node_id, beg, end);                \
+        beg_sn = beg / pn; min_pre_beg = min_pre_beg_sn = INT32_MAX, max_pre_end_sn = -1;                                         \
         for (i = 0; i < pre_n[dp_i]; ++i) {                                                                                       \
             pre_i = pre_index[dp_i][i];                                                                                           \
-            if (min_pre_beg_sn > dp_beg_sn[pre_i]) min_pre_beg_sn = dp_beg_sn[pre_i];                                             \
+            if (abpt->verbose >= ABPOA_LONG_DEBUG_VERBOSE)                                                                        \
+                fprintf(stderr, "\tpre_i: %d, pre_dp_beg: %d, pre_dp_beg_sn: %d\n", pre_i, dp_beg[pre_i], dp_beg_sn[pre_i]);      \
+            if (min_pre_beg > dp_beg[pre_i]) min_pre_beg = dp_beg[pre_i], min_pre_beg_sn = dp_beg_sn[pre_i];                      \
             if (max_pre_end_sn < dp_end_sn[pre_i]) max_pre_end_sn = dp_end_sn[pre_i];                                             \
-        } if (beg_sn < min_pre_beg_sn) beg_sn = min_pre_beg_sn;                                                                   \
-        dp_beg_sn[dp_i] = beg_sn; beg = dp_beg[dp_i] = dp_beg_sn[dp_i] * pn;                                                      \
-        end_sn = dp_end_sn[dp_i] = end/pn; end = dp_end[dp_i] = (dp_end_sn[dp_i]+1)*pn-1;                                         \
-     /* fprintf(stderr, "index: %d, beg: %d, end: %d, beg_sn: %d, end_sn: %d\n", index_i, beg, end, beg_sn, end_sn); */           \
+        }                                                                                                                         \
+        if (beg_sn < min_pre_beg_sn) {                                                                                            \
+            assert(beg < min_pre_beg); beg = min_pre_beg; beg_sn = min_pre_beg_sn;                                                \
+        }                                                                                                                         \
+        dp_beg_sn[dp_i] = beg_sn; dp_beg[dp_i] = beg; end_sn = dp_end_sn[dp_i] = end/pn; dp_end[dp_i] = end;                      \
+        if (abpt->verbose >= ABPOA_LONG_DEBUG_VERBOSE) {                                                                          \
+            fprintf(stderr, "index: %d (node: %d): beg: %d, end: %d, beg_sn: %d, end_sn: %d, max_left: %d, max_right: %d\n", index_i-beg_index, node_id, beg, end, beg_sn, end_sn, abpoa_graph_node_id_to_max_pos_left(graph, node_id), abpoa_graph_node_id_to_max_pos_right(graph, node_id)); \
+        }                                                                                                                         \
     }                                                                                                                             \
- /* fprintf(stderr, "%d: beg, end: %d, %d\n", index_i, beg, end); */                                                              \
-    /* tot_dp_sn += (end_sn - beg_sn + 1); */                                                                                     \
     /* loop query */                                                                                                              \
     /* first pre_node */                                                                                                          \
     pre_i = pre_index[dp_i][0];                                                                                                   \
@@ -968,10 +983,12 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
         }                                                                                                                         \
     }                                                                                                                             \
     /* compare M, E, and F */                                                                                                     \
- /* fprintf(stderr, "5 index_i: %d, beg_sn: %d, end_sn: %d\n", index_i, _beg_sn, _end_sn); */                                     \
+ /* fprintf(stderr, "5 index_i: %d, beg_sn: %d, end_sn: %d\n", index_i, beg_sn, end_sn); */                                       \
     for (sn_i = beg_sn; sn_i <= end_sn; ++sn_i) { /* SIMD parallelization */                                                      \
         dp_h[sn_i] = SIMDAdd(dp_h[sn_i], q[sn_i]);                                                                                \
     }                                                                                                                             \
+    for (i = beg_sn * pn; i < beg; ++i) _dp_h[i] = _dp_e1[i] = _dp_e2[i] = inf_min;                                               \
+    for (i = end+1; i < (end_sn+1)*pn; ++i) _dp_h[i] = _dp_e1[i] = _dp_e2[i] = inf_min;                                           \
     /* new F start */                                                                                                             \
     first = SIMDShiftRight(SIMDShiftLeft(dp_h[beg_sn], SIMDTotalBytes-SIMDShiftOneN), SIMDTotalBytes-SIMDShiftOneN);              \
     SIMDi first2 = first;                                                                                                         \
@@ -1042,28 +1059,27 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
 
 #define simd_abpoa_max_in_row(score_t, SIMDSetIfGreater, SIMDGetIfGreater) { \
     /* select max dp_h */                                                    \
-    max = inf_min, max_i = -1;                                               \
-    SIMDi a = dp_h[end_sn], b = qi[end_sn];                                  \
-    if (end_sn == qlen / pn) SIMDSetIfGreater(a, zero, b, SIMD_INF_MIN, a);  \
-    for (i = beg_sn; i < end_sn; ++i) {                                      \
-        SIMDGetIfGreater(b, a, dp_h[i], a, qi[i], b);                        \
-    }                                                                        \
-    _dp_h = (score_t*)&a, _qi = (score_t*)&b;                                \
-    for (i = 0; i < pn; ++i) {                                               \
+    max = inf_min, left_max_i = right_max_i = -1;                            \
+    _dp_h = (score_t*)dp_h, _qi = (score_t*)qi;                              \
+    for (i = beg; i <= end; ++i) {                                           \
         if (_dp_h[i] > max) {                                                \
-            max = _dp_h[i]; max_i = _qi[i];                                  \
+            max = _dp_h[i];                                                  \
+            left_max_i = right_max_i = _qi[i];                               \
+        } else if (_dp_h[i] == max) {                                        \
+            right_max_i = _qi[i];                                            \
         }                                                                    \
     }                                                                        \
 }
 
-#define simd_abpoa_ada_max_i   {                                                                                        \
-    /* set max_pos_left/right for next nodes */                                                                         \
-    int out_i = max_i + 1;                                                                                              \
-    for (i = 0; i < graph->node[node_id].out_edge_n; ++i) {                                                             \
-        int out_node_id = graph->node[node_id].out_id[i];                                                               \
-        if (out_i > graph->node_id_to_max_pos_right[out_node_id]) graph->node_id_to_max_pos_right[out_node_id] = out_i; \
-        if (out_i < graph->node_id_to_max_pos_left[out_node_id]) graph->node_id_to_max_pos_left[out_node_id] = out_i;   \
-    }                                                                                                                   \
+#define simd_abpoa_ada_max_i   {                                             \
+    /* set max_pos_left/right for next nodes */                              \
+    for (i = 0; i < graph->node[node_id].out_edge_n; ++i) {                  \
+        int out_node_id = graph->node[node_id].out_id[i];                    \
+        if (right_max_i+1 > graph->node_id_to_max_pos_right[out_node_id])    \
+            graph->node_id_to_max_pos_right[out_node_id] = right_max_i+1;    \
+        if (left_max_i+1 < graph->node_id_to_max_pos_left[out_node_id])      \
+            graph->node_id_to_max_pos_left[out_node_id] = left_max_i+1;      \
+    }                                                                        \
 }
 
 // TODO end_bonus for extension
@@ -1077,11 +1093,11 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
         simd_abpoa_lg_dp(score_t, SIMDShiftOneN, SIMDMax, SIMDAdd, SIMDSub);                    \
         if (abpt->align_mode == ABPOA_LOCAL_MODE) {                                             \
             simd_abpoa_max_in_row(score_t, SIMDSetIfGreater, SIMDGetIfGreater);                 \
-            set_global_max_score(max, dp_i, max_i);                                             \
+            set_global_max_score(max, dp_i, left_max_i);                                        \
         }                                                                                       \
         if (abpt->align_mode == ABPOA_EXTEND_MODE) {                                            \
             simd_abpoa_max_in_row(score_t, SIMDSetIfGreater, SIMDGetIfGreater);                 \
-            set_extend_max_score(max, dp_i, max_i);                                             \
+            set_extend_max_score(max, dp_i, right_max_i);                                       \
         }                                                                                       \
         if (abpt->wb >= 0) {                                                                    \
             if (abpt->align_mode == ABPOA_GLOBAL_MODE) {                                        \
@@ -1092,7 +1108,11 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
     }                                                                                           \
     if (abpt->align_mode == ABPOA_GLOBAL_MODE) simd_abpoa_global_get_max(score_t, DP_H, dp_sn); \
     res->best_score = best_score;                                                               \
- /* simd_abpoa_print_lg_matrix(score_t, beg_index, end_index); printf("best_score: (%d, %d) -> %d\n", best_i, best_j, best_score); */                          \
+    if (abpt->verbose >= ABPOA_DEBUG_VERBOSE) {                                                 \
+        if (abpt->verbose >= ABPOA_LONG_DEBUG_VERBOSE)                                          \
+            simd_abpoa_print_lg_matrix(score_t, beg_index, end_index);                          \
+        fprintf(stderr, "best_score: (%d, %d) -> %d\n", best_i, best_j, best_score);            \
+    }                                                                                           \
     if (abpt->ret_cigar) simd_abpoa_lg_backtrack(score_t);                                      \
     simd_abpoa_free_var; SIMDFree(GAP_E1S);                                                     \
 }
@@ -1107,10 +1127,10 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
         simd_abpoa_ag_dp(score_t, SIMDShiftOneN, SIMDMax, SIMDAdd, SIMDSub, SIMDGetIfGreater, SIMDSetIfGreater, SIMDSetIfEqual); \
         if (abpt->align_mode == ABPOA_LOCAL_MODE) {                                                 \
             simd_abpoa_max_in_row(score_t, SIMDSetIfGreater, SIMDGetIfGreater);                     \
-            set_global_max_score(max, dp_i, max_i);                                                 \
+            set_global_max_score(max, dp_i, left_max_i);                                            \
         } else if (abpt->align_mode == ABPOA_EXTEND_MODE) {                                         \
             simd_abpoa_max_in_row(score_t, SIMDSetIfGreater, SIMDGetIfGreater);                     \
-            set_extend_max_score(max, dp_i, max_i);                                                 \
+            set_extend_max_score(max, dp_i, right_max_i);                                           \
         }                                                                                           \
         if (abpt->wb >= 0) {                                                                        \
             if (abpt->align_mode == ABPOA_GLOBAL_MODE) {                                            \
@@ -1121,7 +1141,11 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
     }                                                                                               \
     if (abpt->align_mode == ABPOA_GLOBAL_MODE) simd_abpoa_global_get_max(score_t, DP_HEF, 3*dp_sn); \
     res->best_score = best_score;                                                                   \
- /* simd_abpoa_print_ag_matrix(score_t, beg_index, end_index); fprintf(stderr, "best_score: (%d, %d) -> %d\n", best_i, best_j, best_score); */ \
+    if (abpt->verbose >= ABPOA_DEBUG_VERBOSE) {                                                     \
+        if (abpt->verbose >= ABPOA_LONG_DEBUG_VERBOSE)                                              \
+            simd_abpoa_print_ag_matrix(score_t, beg_index, end_index);                              \
+        fprintf(stderr, "best_score: (%d, %d) -> %d\n", best_i, best_j, best_score);                \
+    }                                                                                               \
     if (abpt->ret_cigar) simd_abpoa_ag_backtrack(score_t);                                          \
     simd_abpoa_free_var; SIMDFree(GAP_E1S);                                                         \
 }
@@ -1136,10 +1160,10 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
         simd_abpoa_cg_dp(score_t, SIMDShiftOneN, SIMDMax, SIMDAdd, SIMDSub, SIMDGetIfGreater, SIMDSetIfGreater, SIMDSetIfEqual); \
         if (abpt->align_mode == ABPOA_LOCAL_MODE) {                                                   \
             simd_abpoa_max_in_row(score_t, SIMDSetIfGreater, SIMDGetIfGreater);                       \
-            set_global_max_score(max, dp_i, max_i);                                                   \
+            set_global_max_score(max, dp_i, left_max_i);                                              \
         } else if (abpt->align_mode == ABPOA_EXTEND_MODE) {                                           \
             simd_abpoa_max_in_row(score_t, SIMDSetIfGreater, SIMDGetIfGreater);                       \
-            set_extend_max_score(max, dp_i, max_i);                                                   \
+            set_extend_max_score(max, dp_i, right_max_i);                                             \
         }                                                                                             \
         if (abpt->wb >= 0) {                                                                          \
             if (abpt->align_mode == ABPOA_GLOBAL_MODE) {                                              \
@@ -1148,11 +1172,13 @@ SIMD_para_t _simd_p64 = {128, 64, 1,  2, 16, -1};
             simd_abpoa_ada_max_i;                                                                     \
         }                                                                                             \
     }                                                                                                 \
- /* printf("dp_sn: %d\n", tot_dp_sn); */                                                              \
     if (abpt->align_mode == ABPOA_GLOBAL_MODE) simd_abpoa_global_get_max(score_t, DP_H2E2F, 5*dp_sn); \
     res->best_score = best_score;                                                                     \
- /* simd_abpoa_print_cg_matrix(score_t, beg_index, end_index); */                                     \
- /* fprintf(stderr,"best_score: (%d, %d) -> %d\n",best_i,best_j,best_score); */                       \
+    if (abpt->verbose >= ABPOA_DEBUG_VERBOSE) {                                                       \
+        if (abpt->verbose >= ABPOA_LONG_DEBUG_VERBOSE)                                                \
+            simd_abpoa_print_cg_matrix(score_t, beg_index, end_index);                                \
+        fprintf(stderr,"best_score: (%d, %d) -> %d\n",best_i, best_j, best_score);                    \
+    }                                                                                                 \
     if (abpt->ret_cigar) simd_abpoa_cg_backtrack(score_t);                                            \
     simd_abpoa_free_var; SIMDFree(GAP_E1S); SIMDFree(GAP_E2S);                                        \
 }
@@ -1191,7 +1217,8 @@ int simd_abpoa_realloc(abpoa_t *ab, int gn, int qlen, abpoa_para_t *abpt, SIMD_p
         // err_func_format_printf(__func__, "Warning: Graph is too large or query is too long.\n");
         // return 1;
     // }
-    // fprintf(stderr, "%lld, %lld, %lld\n", (long long)gn, (long long)ab->abm->s_msize, (long long)s_msize);
+    if (abpt->verbose >= ABPOA_DEBUG_VERBOSE)
+        fprintf(stderr, "realloc: graph_node %lld, qlen: %d, (%lld, %lld)\n", (long long)gn, qlen, (long long)ab->abm->s_msize, (long long)s_msize);
     if (s_msize > ab->abm->s_msize) {
         if (ab->abm->s_mem) SIMDFree(ab->abm->s_mem);
         kroundup64(s_msize); ab->abm->s_msize = s_msize;
@@ -1208,7 +1235,7 @@ int simd_abpoa_realloc(abpoa_t *ab, int gn, int qlen, abpoa_para_t *abpt, SIMD_p
     return 0;
 }
 
-void abpoa_init_var(abpoa_para_t *abpt, uint8_t *query, int qlen, SIMDi *qp, SIMDi *qi, int *mat, int qp_sn, int pn, SIMDi SIMD_INF_MIN) {
+void abpoa_init_var(abpoa_para_t *abpt, uint8_t *query, int qlen, SIMDi *qp, SIMDi *qi, int *mat, int64_t qp_sn, int pn, SIMDi SIMD_INF_MIN) {
     int i, j, k; int32_t *_qi;
     /* generate the query profile */
     for (i = 0; i < qp_sn * abpt->m; ++i) qp[i] = SIMD_INF_MIN;
@@ -1225,7 +1252,10 @@ void abpoa_init_var(abpoa_para_t *abpt, uint8_t *query, int qlen, SIMDi *qp, SIM
     }
 }
 
-void abpoa_cg_first_dp(abpoa_para_t *abpt, abpoa_graph_t *graph, uint8_t *index_map, int beg_node_id, int end_node_id, int *dp_beg, int *dp_end, int *dp_beg_sn, int *dp_end_sn, int pn, int qlen, int w, int dp_sn, SIMDi *DP_H2E2F, SIMDi SIMD_INF_MIN, int32_t inf_min, int gap_open1, int gap_ext1, int gap_open2, int gap_ext2, int gap_oe1, int gap_oe2) {
+void abpoa_cg_first_dp(abpoa_para_t *abpt, abpoa_graph_t *graph, uint8_t *index_map, int beg_node_id, int end_node_id, 
+                       int *dp_beg, int *dp_end, int *dp_beg_sn, int *dp_end_sn, int pn, int qlen, int w, int64_t dp_sn, 
+                       SIMDi *DP_H2E2F, SIMDi SIMD_INF_MIN, int32_t inf_min, 
+                       int gap_open1, int gap_ext1, int gap_open2, int gap_ext2, int gap_oe1, int gap_oe2) {
     int i, _end_sn;
     if (abpt->wb >= 0) {
         graph->node_id_to_max_pos_left[beg_node_id] = graph->node_id_to_max_pos_right[beg_node_id] = 0;
@@ -1234,12 +1264,12 @@ void abpoa_cg_first_dp(abpoa_para_t *abpt, abpoa_graph_t *graph, uint8_t *index_
             if (index_map[abpoa_graph_node_id_to_index(graph, out_id)])
                 graph->node_id_to_max_pos_left[out_id] = graph->node_id_to_max_pos_right[out_id] = 1;
         }
-        dp_beg[0] = GET_AD_DP_BEGIN(graph, w, beg_node_id, end_node_id, qlen), dp_end[0] = GET_AD_DP_END(graph, w, beg_node_id, end_node_id, qlen);
+        dp_beg[0] = 0; // GET_AD_DP_BEGIN(graph, w, beg_node_id, end_node_id, qlen)
+        dp_end[0] = GET_AD_DP_END(graph, w, beg_node_id, end_node_id, qlen);
     } else {
         dp_beg[0] = 0, dp_end[0] = qlen;
     }
     dp_beg_sn[0] = (dp_beg[0])/pn; dp_end_sn[0] = (dp_end[0])/pn;
-    dp_beg[0] = dp_beg_sn[0] * pn; dp_end[0] = (dp_end_sn[0]+1)*pn-1;
     SIMDi *dp_h = DP_H2E2F; SIMDi *dp_e1 = dp_h + dp_sn; SIMDi *dp_e2 = dp_e1 + dp_sn, *dp_f1 = dp_e2 + dp_sn, *dp_f2 = dp_f1 + dp_sn;
     _end_sn = MIN_OF_TWO(dp_end_sn[0]+1, dp_sn-1);
 
@@ -1249,40 +1279,37 @@ void abpoa_cg_first_dp(abpoa_para_t *abpt, abpoa_graph_t *graph, uint8_t *index_
     int32_t *_dp_h = (int32_t*)dp_h, *_dp_e1 = (int32_t*)dp_e1, *_dp_e2 = (int32_t*)dp_e2, *_dp_f1 = (int32_t*)dp_f1, *_dp_f2 = (int32_t*)dp_f2;
     _dp_h[0] = 0; _dp_e1[0] = -(gap_oe1); _dp_e2[0] = -(gap_oe2); _dp_f1[0] = _dp_f2[0] = inf_min;
     for (i = 1; i <= dp_end[0]; ++i) { /* no SIMD parallelization */
-        _dp_f1[i] = -(gap_open1 + gap_ext1 * i);
-        _dp_f2[i] = -(gap_open2 + gap_ext2 * i);
-        _dp_h[i] = MAX_OF_TWO(_dp_f1[i], _dp_f2[i]); // -MIN_OF_TWO(gap_open1+gap_ext1*i, gap_open2+gap_ext2*i);
+        _dp_f1[i] = -(gap_open1 + gap_ext1 * i); _dp_f2[i] = -(gap_open2 + gap_ext2 * i);
+        _dp_h[i] = MAX_OF_TWO(_dp_f1[i], _dp_f2[i]);
     }
 }
 
-int abpoa_max(SIMDi SIMD_INF_MIN, SIMDi zero, int inf_min, SIMDi *dp_h, SIMDi *qi, int qlen, int pn, int beg_sn, int end_sn) {
+void abpoa_max(SIMDi SIMD_INF_MIN, SIMDi zero, int inf_min, SIMDi *dp_h, SIMDi *qi, int qlen, int pn, int beg, int end, int *left_max_i, int *right_max_i) {
     /* select max dp_h */
-    int max = inf_min, max_i = -1, i;
-    SIMDi a = dp_h[end_sn], b = qi[end_sn];
-    if (end_sn == qlen / pn) SIMDSetIfGreateri32(a, zero, b, SIMD_INF_MIN, a);
-    for (i = beg_sn; i < end_sn; ++i) {
-        SIMDGetIfGreateri32(b, a, dp_h[i], a, qi[i], b);
-    }
-    int32_t *_dp_h = (int32_t*)&a, *_qi = (int32_t*)&b;
-    for (i = 0; i < pn; ++i) {
+    int max = inf_min, i; *left_max_i = *right_max_i = -1;
+    int32_t *_dp_h = (int32_t*)dp_h, *_qi = (int32_t*)qi;
+    for (i = beg; i <= end; ++i) {
         if (_dp_h[i] > max) {
-            max = _dp_h[i]; max_i = _qi[i];
+            max = _dp_h[i]; *left_max_i = *right_max_i = _qi[i];
+        } else if (_dp_h[i] == max) {
+            *right_max_i = _qi[i];
         }
     }
-    return max_i;
 }
 
-void abpoa_ada_max_i(int max_i, abpoa_graph_t *graph, int node_id) {
+void abpoa_ada_max_i(int left_max_i, int right_max_i, abpoa_graph_t *graph, int node_id) {
     /* set max_pos_left/right for next nodes */
-    int out_i = max_i + 1; int i;
+    int i;
     for (i = 0; i < graph->node[node_id].out_edge_n; ++i) {
         int out_node_id = graph->node[node_id].out_id[i];
-        if (out_i > graph->node_id_to_max_pos_right[out_node_id]) graph->node_id_to_max_pos_right[out_node_id] = out_i;
-        if (out_i < graph->node_id_to_max_pos_left[out_node_id]) graph->node_id_to_max_pos_left[out_node_id] = out_i;
+        if (right_max_i+1 > graph->node_id_to_max_pos_right[out_node_id])
+            graph->node_id_to_max_pos_right[out_node_id] = right_max_i+1;
+        if (left_max_i+1 < graph->node_id_to_max_pos_left[out_node_id])
+            graph->node_id_to_max_pos_left[out_node_id] = left_max_i+1;
     }
 }
 
-void abpoa_global_get_max(abpoa_graph_t *graph, int beg_index, int end_node_id, uint8_t *index_map, SIMDi *DP_H_HE, int dp_sn, int qlen, int *dp_end, int32_t *best_score, int *best_i, int *best_j) {
+void abpoa_global_get_max(abpoa_graph_t *graph, int beg_index, int end_node_id, uint8_t *index_map, SIMDi *DP_H_HE, int64_t dp_sn, int qlen, int *dp_end, int32_t *best_score, int *best_i, int *best_j) {
     int in_id, in_index, dp_i, i;
     for (i = 0; i < graph->node[end_node_id].in_edge_n; ++i) {
         in_id = graph->node[end_node_id].in_id[i];
@@ -1300,26 +1327,37 @@ void abpoa_global_get_max(abpoa_graph_t *graph, int beg_index, int end_node_id,
     }
 }
 
-int abpoa_cg_dp(SIMDi *q, SIMDi *dp_h, SIMDi *dp_e1, SIMDi *dp_e2, SIMDi *dp_f1, SIMDi *dp_f2, int **pre_index, int *pre_n, int index_i, int dp_i, abpoa_graph_t *graph, abpoa_para_t *abpt, int dp_sn, int pn, int qlen, int w, SIMDi *DP_H2E2F, SIMDi SIMD_INF_MIN, SIMDi GAP_O1, SIMDi GAP_O2, SIMDi GAP_E1, SIMDi GAP_E2, SIMDi GAP_OE1, SIMDi GAP_OE2, SIMDi* GAP_E1S, SIMDi* GAP_E2S, SIMDi *PRE_MIN, SIMDi *PRE_MASK, SIMDi *SUF_MIN, int log_n, int *dp_beg, int *dp_end, int *dp_beg_sn, int *dp_end_sn, int end_node_id) {
+int abpoa_cg_dp(SIMDi *q, SIMDi *dp_h, SIMDi *dp_e1, SIMDi *dp_e2, SIMDi *dp_f1, SIMDi *dp_f2, int inf_min,
+                int **pre_index, int *pre_n, int beg_index, int index_i, int dp_i, abpoa_graph_t *graph, abpoa_para_t *abpt, 
+                int64_t dp_sn, int pn, int qlen, int w, SIMDi *DP_H2E2F, SIMDi SIMD_INF_MIN, 
+                SIMDi GAP_O1, SIMDi GAP_O2, SIMDi GAP_E1, SIMDi GAP_E2, SIMDi GAP_OE1, SIMDi GAP_OE2, SIMDi* GAP_E1S, SIMDi* GAP_E2S, 
+                SIMDi *PRE_MIN, SIMDi *PRE_MASK, SIMDi *SUF_MIN, int log_n, 
+                int *dp_beg, int *dp_end, int *dp_beg_sn, int *dp_end_sn, int end_node_id) {
     int tot_dp_sn = 0, i, pre_i, node_id = abpoa_graph_index_to_node_id(graph, index_i);
-    int min_pre_beg_sn, max_pre_end_sn, beg, end, beg_sn, end_sn, pre_end, pre_end_sn, pre_beg_sn, sn_i;
+    int min_pre_beg, min_pre_beg_sn, max_pre_end_sn, beg, end, beg_sn, end_sn, pre_end, pre_end_sn, pre_beg_sn, sn_i;
     if (abpt->wb < 0) {
         beg = dp_beg[dp_i] = 0, end = dp_end[dp_i] = qlen;
         beg_sn = dp_beg_sn[dp_i] = beg/pn; end_sn = dp_end_sn[dp_i] = end/pn;
         min_pre_beg_sn = 0, max_pre_end_sn = end_sn;
     } else {
         beg = GET_AD_DP_BEGIN(graph, w, node_id, end_node_id, qlen), end = GET_AD_DP_END(graph, w, node_id, end_node_id, qlen);
-        beg_sn = beg / pn; min_pre_beg_sn = INT32_MAX, max_pre_end_sn = -1;
+        if (abpt->verbose >= ABPOA_LONG_DEBUG_VERBOSE)
+            fprintf(stderr, "index: %d (node: %d): raw beg: %d, end: %d\n", index_i-beg_index, node_id, beg, end);
+        beg_sn = beg / pn; min_pre_beg = min_pre_beg_sn = INT32_MAX, max_pre_end_sn = -1;
         for (i = 0; i < pre_n[dp_i]; ++i) {
             pre_i = pre_index[dp_i][i];
-            if (min_pre_beg_sn > dp_beg_sn[pre_i]) min_pre_beg_sn = dp_beg_sn[pre_i];
+            if (abpt->verbose >= ABPOA_LONG_DEBUG_VERBOSE)
+                fprintf(stderr, "\tpre_i: %d, pre_dp_beg: %d, pre_dp_beg_sn: %d\n", pre_i, dp_beg[pre_i], dp_beg_sn[pre_i]);
+            if (min_pre_beg > dp_beg[pre_i]) min_pre_beg = dp_beg[pre_i], min_pre_beg_sn = dp_beg_sn[pre_i];
             if (max_pre_end_sn < dp_end_sn[pre_i]) max_pre_end_sn = dp_end_sn[pre_i];
-        } if (beg_sn < min_pre_beg_sn) beg_sn = min_pre_beg_sn;
-        dp_beg_sn[dp_i] = beg_sn; beg = dp_beg[dp_i] = dp_beg_sn[dp_i] * pn;
-        end_sn = dp_end_sn[dp_i] = end/pn; end = dp_end[dp_i] = (dp_end_sn[dp_i]+1)*pn-1;
-#ifdef __DEBUG__
-    fprintf(stderr, "index: %d (node: %d): beg: %d, end: %d, beg_sn: %d, end_sn: %d\n", index_i, node_id, beg, end, beg_sn, end_sn);
-#endif
+        } 
+        if (beg_sn < min_pre_beg_sn) {
+            assert(beg < min_pre_beg); beg_sn = min_pre_beg_sn; beg = min_pre_beg;
+        }
+        // NOT extend dp_beg/end based on dp_beg/end_sn
+        dp_beg_sn[dp_i] = beg_sn; dp_beg[dp_i] = beg; end_sn = dp_end_sn[dp_i] = end/pn; dp_end[dp_i] = end;
+        if (abpt->verbose >= ABPOA_LONG_DEBUG_VERBOSE)
+            fprintf(stderr, "index: %d (node: %d): beg: %d, end: %d, beg_sn: %d, end_sn: %d, max_left: %d, max_right: %d\n", index_i-beg_index, node_id, beg, end, beg_sn, end_sn, graph->node_id_to_max_pos_left[node_id], graph->node_id_to_max_pos_right[node_id]);
     }
     tot_dp_sn += (end_sn - beg_sn + 1);
     /* loop query */
@@ -1333,7 +1371,7 @@ int abpoa_cg_dp(SIMDi *q, SIMDi *dp_h, SIMDi *dp_e1, SIMDi *dp_e2, SIMDi *dp_f1,
     /* set M from (pre_i, q_i-1) */
     if (pre_beg_sn < beg_sn) _beg_sn = beg_sn, first = SIMDShiftRight(pre_dp_h[beg_sn-1], SIMDTotalBytes-SIMDShiftOneNi32);
     else _beg_sn = pre_beg_sn, first = SIMDShiftRight(SIMD_INF_MIN, SIMDTotalBytes-SIMDShiftOneNi32);
-    _end_sn = MIN_OF_THREE((pre_end+1)/pn, end_sn, dp_sn-1);
+    _end_sn = MIN_OF_THREE((pre_end+1)/pn, end_sn, dp_sn-1); // pre_end+1: match/mismatch operations
     for (i = beg_sn; i < _beg_sn; ++i) dp_h[i] = SIMD_INF_MIN;
     for (i = _end_sn+1; i <= MIN_OF_TWO(end_sn+1, dp_sn-1); ++i) dp_h[i] = SIMD_INF_MIN;
     for (sn_i = _beg_sn; sn_i <= _end_sn; ++sn_i) { /* SIMD parallelization */
@@ -1349,7 +1387,7 @@ int abpoa_cg_dp(SIMDi *q, SIMDi *dp_h, SIMDi *dp_e1, SIMDi *dp_e2, SIMDi *dp_f1,
         dp_e1[sn_i] = pre_dp_e1[sn_i];
         dp_e2[sn_i] = pre_dp_e2[sn_i];
     }
-    // if (index_i == 13095) debug_simd_abpoa_print_cg_matrix_row("1", int32_t, index_i);
+    // debug_simd_abpoa_print_cg_matrix_row("1", int32_t, index_i);
     // new init end
     /* get max m and e */
     for (i = 1; i < pre_n[dp_i]; ++i) {
@@ -1381,6 +1419,10 @@ int abpoa_cg_dp(SIMDi *q, SIMDi *dp_h, SIMDi *dp_e1, SIMDi *dp_e2, SIMDi *dp_f1,
     /* new F start */
     first = SIMDShiftRight(SIMDShiftLeft(dp_h[beg_sn], SIMDTotalBytes-SIMDShiftOneNi32), SIMDTotalBytes-SIMDShiftOneNi32);
     int set_num; SIMDi first2 = first;//, tmp;
+    int32_t *_dp_h = (int32_t*)dp_h, *_dp_e1 = (int32_t*)dp_e1, *_dp_e2 = (int32_t*)dp_e2;
+    // set h/e as inf_min for cells out of range: < dp_beg or > dp_end
+    for (i = beg_sn * pn; i < beg; ++i) _dp_h[i] = _dp_e1[i] = _dp_e2[i] = inf_min;
+    for (i = end+1; i < (end_sn+1)*pn; ++i) _dp_h[i] = _dp_e1[i] = _dp_e2[i] = inf_min;
     for (sn_i = beg_sn; sn_i <= end_sn; ++sn_i) {
         if (sn_i < min_pre_beg_sn) {
             _err_fatal_simple(__func__, "sn_i < min_pre_beg_sn\n");
@@ -1410,10 +1452,14 @@ int abpoa_cg_dp(SIMDi *q, SIMDi *dp_h, SIMDi *dp_e1, SIMDi *dp_e2, SIMDi *dp_f1,
         // SIMDSetIfEquali32(dp_e2[sn_i], dp_h[sn_i], tmp, SIMDMaxi32(SIMDSubi32(dp_e2[sn_i], GAP_E2), SIMDSubi32(dp_h[sn_i], GAP_OE2)), SIMD_INF_MIN);
         dp_e2[sn_i] = SIMDMaxi32(SIMDSubi32(dp_e2[sn_i], GAP_E2), SIMDSubi32(dp_h[sn_i], GAP_OE2));
     }
+    // debug_simd_abpoa_print_cg_matrix_row("0", int32_t, index_i);
     return tot_dp_sn;
 }
 
-void abpoa_cg_backtrack(SIMDi *DP_H2E2F, int **pre_index, int *pre_n, int *dp_beg, int *dp_end, int dp_sn, int m, int *mat, int gap_ext1, int gap_ext2, int gap_oe1, int gap_oe2, int beg_index, int best_dp_i, int best_dp_j, int qlen, abpoa_graph_t *graph, abpoa_para_t *abpt, uint8_t *query, abpoa_res_t *res) {
+void abpoa_cg_backtrack(SIMDi *DP_H2E2F, int **pre_index, int *pre_n, int *dp_beg, int *dp_end, int64_t dp_sn, 
+                        int m, int *mat, int gap_ext1, int gap_ext2, int gap_oe1, int gap_oe2, 
+                        int beg_index, int best_dp_i, int best_dp_j, int qlen, 
+                        abpoa_graph_t *graph, abpoa_para_t *abpt, uint8_t *query, abpoa_res_t *res) {
     int dp_i, dp_j, k, pre_i, n_c = 0, m_c = 0, id, hit, cur_op = ABPOA_ALL_OP, _start_i, _start_j;
     SIMDi *dp_h;
     int32_t s, is_match, *_dp_h=NULL, *_dp_e1, *_dp_e2, *_pre_dp_h, *_pre_dp_e1, *_pre_dp_e2, *_dp_f1, *_dp_f2; abpoa_cigar_t *cigar = 0;
@@ -1540,10 +1586,8 @@ void abpoa_cg_backtrack(SIMDi *DP_H2E2F, int **pre_index, int *pre_n, int *dp_be
                 }
             }
         }
-        if (hit == 0) exit(1);
-#ifdef __DEBUG__
-        fprintf(stderr, "%d, %d, %d\n", dp_i, dp_j, cur_op);
-#endif
+        if (hit == 0) err_fatal_simple("Error in cg_backtrack.");
+        simd_output_pre_nodes(pre_index[dp_i], pre_n[dp_i], dp_i, dp_j, cur_op, abpt->verbose);
     }
     if (dp_j > 0) cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CINS, dp_j, -1, dp_j-1);
     /* reverse cigar */
@@ -1554,15 +1598,23 @@ void abpoa_cg_backtrack(SIMDi *DP_H2E2F, int **pre_index, int *pre_n, int *dp_be
     /*abpoa_print_cigar(n_c, *graph_cigar, graph);*/
 }
 
-int abpoa_cg_global_align_sequence_to_graph_core(abpoa_t *ab, int beg_node_id, int beg_index, int end_node_id, int end_index, uint8_t *index_map, int qlen, uint8_t *query, abpoa_para_t *abpt, SIMD_para_t sp, abpoa_res_t *res) {
-    int tot_dp_sn = 0;
+int abpoa_cg_global_align_sequence_to_graph_core(abpoa_t *ab, 
+                                                 int beg_node_id, int beg_index, int end_node_id, int end_index, 
+                                                 uint8_t *index_map, int qlen, uint8_t *query, 
+                                                 abpoa_para_t *abpt, SIMD_para_t sp, abpoa_res_t *res) {
+    int i, j;
     abpoa_graph_t *graph = ab->abg; abpoa_simd_matrix_t *abm = ab->abm;
-    int matrix_row_n = end_index-beg_index+1, matrix_col_n = qlen + 1;
+    int64_t matrix_row_n = end_index-beg_index+1, matrix_col_n = qlen + 1;
     int **pre_index, *pre_n, _pre_index, _pre_n;
-    int i, j, *dp_beg, *dp_beg_sn, *dp_end, *dp_end_sn, node_id, index_i, dp_i;
-    int beg_sn, end_sn;
-    int pn, log_n, size, qp_sn, dp_sn; /* pn: value per SIMDi, qp_sn/dp_sn/d_sn: segmented length*/
+    int node_id, index_i, dp_i;
+    int pn, log_n, size; int64_t qp_sn, dp_sn; /* pn: value per SIMDi, qp_sn/dp_sn/d_sn: segmented length*/
     SIMDi *dp_h, *qp, *qi;
+    // dp_beg/end: beg/end position of dp band in query (column) for each node (row)
+    // dp_beg/end_sn: beg/end VECTOR position of dp band in query (segmented column) for each node (row)
+    // XXX to keep consistency between different VECTOR size (SSE41/AVX2/AVX/512):
+    // .   dp cells within dp_beg/end_sn VECTORs but not within dp_beg/end need to be set to -inf
+    int *dp_beg, *dp_beg_sn, *dp_end, *dp_end_sn; 
+
     int32_t best_score = sp.inf_min, inf_min = sp.inf_min;
     int *mat = abpt->mat, best_i = 0, best_j = 0; int32_t gap_ext1 = abpt->gap_ext1;
     int w = abpt->wb < 0 ? qlen : abpt->wb + (int)(abpt->wf * qlen); /* when w < 0, do whole global */
@@ -1573,12 +1625,15 @@ int abpoa_cg_global_align_sequence_to_graph_core(abpoa_t *ab, int beg_node_id, i
     int32_t gap_open1 = abpt->gap_open1, gap_oe1 = gap_open1 + gap_ext1;
     int32_t gap_open2 = abpt->gap_open2, gap_ext2 = abpt->gap_ext2, gap_oe2 = gap_open2 + gap_ext2;
     SIMDi *DP_H2E2F, *dp_e1, *dp_e2, *dp_f2, *dp_f1;
-    SIMDi GAP_O1 = SIMDSetOnei32(gap_open1), GAP_O2 = SIMDSetOnei32(gap_open2), GAP_E1 = SIMDSetOnei32(gap_ext1), GAP_E2 = SIMDSetOnei32(gap_ext2), GAP_OE1 = SIMDSetOnei32(gap_oe1), GAP_OE2 = SIMDSetOnei32(gap_oe2);
+    SIMDi GAP_O1 = SIMDSetOnei32(gap_open1), GAP_O2 = SIMDSetOnei32(gap_open2);
+    SIMDi GAP_E1 = SIMDSetOnei32(gap_ext1), GAP_E2 = SIMDSetOnei32(gap_ext2);
+    SIMDi GAP_OE1 = SIMDSetOnei32(gap_oe1), GAP_OE2 = SIMDSetOnei32(gap_oe2);
     DP_H2E2F = qp + qp_sn * abpt->m; qi = DP_H2E2F + dp_sn * matrix_row_n * 5;
 
     // for SET_F mask[pn], suf_min[pn], pre_min[logN]
     SIMDi *PRE_MASK, *SUF_MIN, *PRE_MIN, *GAP_E1S, *GAP_E2S;
-    PRE_MASK = (SIMDi*)SIMDMalloc((pn+1) * size, size), SUF_MIN = (SIMDi*)SIMDMalloc((pn+1) * size, size), PRE_MIN = (SIMDi*)SIMDMalloc(pn * size, size), GAP_E1S =  (SIMDi*)SIMDMalloc(log_n * size, size), GAP_E2S =  (SIMDi*)SIMDMalloc(log_n * size, size);
+    PRE_MASK = (SIMDi*)SIMDMalloc((pn+1) * size, size), SUF_MIN = (SIMDi*)SIMDMalloc((pn+1) * size, size); 
+    PRE_MIN = (SIMDi*)SIMDMalloc(pn * size, size), GAP_E1S = (SIMDi*)SIMDMalloc(log_n * size, size), GAP_E2S =  (SIMDi*)SIMDMalloc(log_n * size, size);
     for (i = 0; i < pn; ++i) {
         int32_t *pre_mask = (int32_t*)(PRE_MASK + i);
         for (j = 0; j <= i; ++j) pre_mask[j] = -1;
@@ -1612,26 +1667,34 @@ int abpoa_cg_global_align_sequence_to_graph_core(abpoa_t *ab, int beg_node_id, i
         }
         pre_n[dp_i] = _pre_n;
     }
-    abpoa_cg_first_dp(abpt, graph, index_map, beg_node_id, end_node_id, dp_beg, dp_end, dp_beg_sn, dp_end_sn, pn, qlen, w, dp_sn, DP_H2E2F, SIMD_INF_MIN, inf_min, gap_open1, gap_ext1, gap_open2, gap_ext2, gap_oe1, gap_oe2);
+    abpoa_cg_first_dp(abpt, graph, index_map, beg_node_id, end_node_id, 
+                      dp_beg, dp_end, dp_beg_sn, dp_end_sn, pn, qlen, w, dp_sn, 
+                      DP_H2E2F, SIMD_INF_MIN, inf_min, gap_open1, gap_ext1, gap_open2, gap_ext2, gap_oe1, gap_oe2);
 
     for (index_i=beg_index+1, dp_i=1; index_i<end_index; ++index_i, ++dp_i) {
         if (index_map[index_i] == 0) continue;
         node_id = abpoa_graph_index_to_node_id(graph, index_i);
         SIMDi *q = qp + graph->node[node_id].base * qp_sn;
-        dp_h = DP_H2E2F + (dp_i*5) * dp_sn; dp_e1 = dp_h + dp_sn; dp_e2 = dp_e1 + dp_sn; dp_f1 = dp_e2 + dp_sn; dp_f2 = dp_f1 + dp_sn;
-        tot_dp_sn += abpoa_cg_dp(q, dp_h, dp_e1, dp_e2, dp_f1, dp_f2, pre_index, pre_n, index_i, dp_i, graph, abpt, dp_sn, pn, qlen, w, DP_H2E2F, SIMD_INF_MIN, GAP_O1, GAP_O2, GAP_E1, GAP_E2, GAP_OE1, GAP_OE2, GAP_E1S, GAP_E2S, PRE_MIN, PRE_MASK, SUF_MIN, log_n, dp_beg, dp_end, dp_beg_sn, dp_end_sn, end_node_id);
+        dp_h = DP_H2E2F + (dp_i*5) * dp_sn; 
+        dp_e1 = dp_h + dp_sn; dp_e2 = dp_e1 + dp_sn; 
+        dp_f1 = dp_e2 + dp_sn; dp_f2 = dp_f1 + dp_sn;
+        abpoa_cg_dp(q, dp_h, dp_e1, dp_e2, dp_f1, dp_f2, inf_min,
+                    pre_index, pre_n, beg_index, index_i, dp_i, 
+                    graph, abpt, dp_sn, pn, qlen, w, 
+                    DP_H2E2F, SIMD_INF_MIN, GAP_O1, GAP_O2, GAP_E1, GAP_E2, GAP_OE1, GAP_OE2, GAP_E1S, GAP_E2S, 
+                    PRE_MIN, PRE_MASK, SUF_MIN, log_n, 
+                    dp_beg, dp_end, dp_beg_sn, dp_end_sn, end_node_id);
         if (abpt->wb >= 0) {
-            beg_sn = dp_beg_sn[dp_i], end_sn = dp_end_sn[dp_i];
-            int max_i = abpoa_max(SIMD_INF_MIN, zero, inf_min, dp_h, qi, qlen, pn, beg_sn, end_sn);
-            abpoa_ada_max_i(max_i, graph, node_id);
+            int left_max_i, right_max_i;
+            abpoa_max(SIMD_INF_MIN, zero, inf_min, dp_h, qi, qlen, pn, dp_beg[dp_i], dp_end[dp_i], &left_max_i, &right_max_i);
+            abpoa_ada_max_i(left_max_i, right_max_i, graph, node_id);
         }
     }
-    // printf("dp_sn: %d\n", tot_dp_sn);
-    // printf("dp_sn: %d, node_n: %d, seq_n: %d\n", tot_dp_sn, graph->node_n, qlen);
     abpoa_global_get_max(graph, beg_index, end_node_id, index_map, DP_H2E2F, 5*dp_sn, qlen, dp_end, &best_score, &best_i, &best_j);
-#ifdef __DEBUG__
-    simd_abpoa_print_cg_matrix(int32_t, beg_index, end_index); fprintf(stderr, "best_score: (%d, %d) -> %d\n", best_i, best_j, best_score);
-#endif
+    if (abpt->verbose >= ABPOA_DEBUG_VERBOSE) {
+        if (abpt->verbose >= ABPOA_LONG_DEBUG_VERBOSE) simd_abpoa_print_cg_matrix(int32_t, beg_index, end_index); 
+        fprintf(stderr, "best_score: (%d, %d) -> %d\n", best_i, best_j, best_score);
+    }
     res->best_score = best_score;
     abpoa_cg_backtrack(DP_H2E2F, pre_index, pre_n, dp_beg, dp_end, dp_sn, abpt->m, mat, gap_ext1, gap_ext2, gap_oe1, gap_oe2, beg_index, best_i, best_j, qlen, graph, abpt, query, res);
     for (i = 0; i < matrix_row_n; ++i) free(pre_index[i]);
@@ -1644,8 +1707,10 @@ int abpoa_cg_global_align_sequence_to_graph_core(abpoa_t *ab, int beg_node_id, i
 // align query to subgraph between beg_node_id and end_node_id (both are excluded)
 // generally: beg/end are the SRC/SINK_node
 int simd_abpoa_align_sequence_to_subgraph(abpoa_t *ab, abpoa_para_t *abpt, int beg_node_id, int end_node_id, uint8_t *query, int qlen, abpoa_res_t *res) {
-    // if (abpt->simd_flag == 0) err_fatal_simple("No SIMD instruction available.");
-
+    int old_verbose = abpt->verbose;
+    if (abpt->verbose >= ABPOA_DEBUG_VERBOSE) fprintf(stderr, "beg_id: %d, end_id: %d, qlen: %d\n", beg_node_id, end_node_id, qlen);
+    // if (qlen == 8957) // for debug
+        // abpt->verbose = ABPOA_LONG_DEBUG_VERBOSE;
     int i, j, beg_index = ab->abg->node_id_to_index[beg_node_id], end_index = ab->abg->node_id_to_index[end_node_id];
     int gn = end_index - beg_index + 1;
     uint8_t *index_map = (uint8_t*)_err_calloc(ab->abg->node_n, sizeof(uint8_t));
@@ -1660,19 +1725,16 @@ int simd_abpoa_align_sequence_to_subgraph(abpoa_t *ab, abpoa_para_t *abpt, int b
         }
     }
 
-#ifdef __DEBUG__
+#ifdef __SIMD_DEBUG__
     _simd_p32.inf_min = MAX_OF_TWO(abpt->gap_ext1, abpt->gap_ext2) * 31 +MAX_OF_THREE(INT32_MIN + abpt->min_mis, INT32_MIN + abpt->gap_open1 + abpt->gap_ext1, INT32_MIN + abpt->gap_open2 + abpt->gap_ext2);
     if (simd_abpoa_realloc(ab, gn, qlen, abpt, _simd_p32)) return 0;
-    if (abpt->gap_mode == ABPOA_CONVEX_GAP) abpoa_cg_global_align_sequence_to_graph_core(ab, beg_node_id, beg_index, end_node_id, end_index, index_map, qlen, query, abpt, _simd_p32, res);
+    if (abpt->gap_mode == ABPOA_CONVEX_GAP)
+        abpoa_cg_global_align_sequence_to_graph_core(ab, beg_node_id, beg_index, end_node_id, end_index, index_map, qlen, query, abpt, _simd_p32, res);
 #else
     int32_t max_score, bits, mem_ret=0, gap_ext1 = abpt->gap_ext1, gap_ext2 = abpt->gap_ext2;
     int32_t gap_oe1 = abpt->gap_open1+gap_ext1, gap_oe2 = abpt->gap_open2+gap_ext2;
-    // if (abpt->simd_flag & SIMD_AVX512F && !(abpt->simd_flag & SIMD_AVX512BW))
-    //    max_score = INT16_MAX + 1; // AVX512F has no 8/16 bits operations
-    // else {
     int len = qlen > gn ? qlen : gn;
     max_score = MAX_OF_TWO(qlen * abpt->max_mat, len * abpt->gap_ext1 + abpt->gap_open1);
-    // }
     if (max_score <= INT16_MAX - abpt->min_mis - gap_oe1 - gap_oe2) {
         _simd_p16.inf_min = MAX_OF_THREE(INT16_MIN + abpt->min_mis, INT16_MIN + gap_oe1, INT16_MIN + gap_oe2) + 31 * MAX_OF_TWO(gap_ext1, gap_ext2);
         mem_ret = simd_abpoa_realloc(ab, gn, qlen, abpt, _simd_p16);
@@ -1709,6 +1771,7 @@ int simd_abpoa_align_sequence_to_subgraph(abpoa_t *ab, abpoa_para_t *abpt, int b
     }
 #endif
     free(index_map);
+    abpt->verbose = old_verbose;
     return 0;
 }
 

From b8685d0d8db198d7adf202feb48130b1196b2c4a Mon Sep 17 00:00:00 2001
From: Yan Gao <yangao07@hit.edu.cn>
Date: Fri, 5 Apr 2024 16:30:26 -0400
Subject: [PATCH 19/38] fix instruction-dependent result difference and seg
 fault due to large memory usage (#66)

---
 Makefile | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/Makefile b/Makefile
index 2c5f8f0..208570a 100644
--- a/Makefile
+++ b/Makefile
@@ -6,15 +6,14 @@ OS          := $(shell uname)
 ARCH        := $(shell arch)
 # add -fno-tree-vectorize to avoid certain vectorization errors in O3 optimization
 # right now, we are using -O3 for the best performance, and no vectorization errors were found
-EXTRA_FLAGS = -Wall -Wno-unused-function -Wno-misleading-indentation -DUSE_SIMDE -DSIMDE_ENABLE_NATIVE_ALIASES # -fno-tree-vectorize
+EXTRA_FLAGS = -Wall -Wno-unused-function -Wno-misleading-indentation -DUSE_SIMDE -DSIMDE_ENABLE_NATIVE_ALIASES# -fno-tree-vectorize
 
-DFLAGS	  =
 # for debug
 ifneq ($(debug),)
-	DFLAGS  +=   -D __DEBUG__
+	EXTRA_FLAGS  += -D __DEBUG__
 endif
 ifneq ($(sdebug),)
-	DFLAGS  +=   -D __SIMD_DEBUG__
+	EXTRA_FLAGS  += -D __SIMD_DEBUG__
 endif
 
 # for gdb
@@ -24,7 +23,7 @@ else
 	OPT_FLAGS = -O3
 endif
 
-CFLAGS        = OPT_FLAGS ${DFLAGS} $(EXTRA_FLAGS)
+CFLAGS        = $(OPT_FLAGS) $(EXTRA_FLAGS)
 
 # for gprof
 ifneq ($(pg),)

From eadc97d838593100930948e07cec391eb3888534 Mon Sep 17 00:00:00 2001
From: Yan Gao <yangao07@hit.edu.cn>
Date: Fri, 5 Apr 2024 17:29:59 -0400
Subject: [PATCH 20/38] update README

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index baa0f13..82eb2a1 100644
--- a/README.md
+++ b/README.md
@@ -172,7 +172,7 @@ You can also use any score matrix, as long as it has the same format as the abov
 ```
 abpoa seq.fa -g poa.png > cons.fa
 ```
-See [Plot of alignment graph](#plot) for more details about the plot file.
+See [Plot of alignment graph](#plot-of-alignment-graph) for more details about the plot file.
 
 ## Input
 abPOA works with FASTA, FASTQ, gzip'd FASTA(.fa.gz) and gzip'd FASTQ(.fq.gz) formats. The input file is 

From 53f276ea5cbcd9abb5dc940cbee15adf5f7abef3 Mon Sep 17 00:00:00 2001
From: Yan Gao <yangao07@hit.edu.cn>
Date: Fri, 5 Apr 2024 17:38:16 -0400
Subject: [PATCH 21/38] update ci

---
 .github/workflows/wheels.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index 1ceec11..17518b8 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -28,7 +28,8 @@ jobs:
         - [macos-latest, macosx_x86_64, x86_64]
         # skip these for now, need more work
         - [macos-latest, macosx_arm64, arm64]
-        python: ["cp38", "cp39", "cp310", "cp311", "cp312"]
+        # python: ["cp38", "cp39", "cp310", "cp311", "cp312"]
+        python: ["cp312", "cp313"]
 
     steps:
       - uses: actions/checkout@v3

From cd178fdd64c22ae1800d8600224c82684329c894 Mon Sep 17 00:00:00 2001
From: Yan Gao <yangao07@hit.edu.cn>
Date: Fri, 5 Apr 2024 17:40:57 -0400
Subject: [PATCH 22/38] update ci

---
 .github/workflows/wheels.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index 17518b8..b270959 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -29,7 +29,7 @@ jobs:
         # skip these for now, need more work
         - [macos-latest, macosx_arm64, arm64]
         # python: ["cp38", "cp39", "cp310", "cp311", "cp312"]
-        python: ["cp312", "cp313"]
+        python: ["cp312"]
 
     steps:
       - uses: actions/checkout@v3

From 8d1b1cc31bd4f9a942edda2dc423ca0cab06e10e Mon Sep 17 00:00:00 2001
From: Yan Gao <yangao07@hit.edu.cn>
Date: Mon, 8 Apr 2024 15:38:19 -0400
Subject: [PATCH 23/38] fix 'Error in cg_backtrack' (#66)

---
 src/simd_abpoa_align.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/simd_abpoa_align.c b/src/simd_abpoa_align.c
index 6362ec1..1db098b 100644
--- a/src/simd_abpoa_align.c
+++ b/src/simd_abpoa_align.c
@@ -777,6 +777,7 @@ void simd_output_pre_nodes(int *pre_index, int pre_n, int dp_i, int dp_j, int cu
             } else set_num = pn;                                                                                                  \
         }                                                                                                                         \
         dp_h[sn_i] = SIMDMax(dp_h[sn_i], first);                                                                                  \
+        if (sn_i == end_sn) for (i = end+1; i < (end_sn+1)*pn; ++i) _dp_h[i] = inf_min;                                           \
         SIMD_SET_F(dp_h[sn_i], log_n, set_num, PRE_MIN, PRE_MASK, SUF_MIN, GAP_E1S, SIMDMax, SIMDAdd, SIMDSub, SIMDShiftOneN);    \
         first = SIMDOri(SIMDAndi(SIMDShiftRight(SIMDSub(dp_h[sn_i], GAP_E1), SIMDTotalBytes-SIMDShiftOneN), PRE_MASK[0]), SUF_MIN[0]); \
     }                                                                                                                             \
@@ -885,9 +886,11 @@ void simd_output_pre_nodes(int *pre_index, int pre_n, int dp_i, int dp_j, int cu
         dp_h[sn_i] = SIMDMax(dp_h[sn_i], dp_e1[sn_i]); SIMDi tmp = dp_h[sn_i];                                                    \
         if (abpt->align_mode == ABPOA_LOCAL_MODE) {                                                                               \
             dp_h[sn_i] = SIMDMax(zero, SIMDMax(dp_h[sn_i], dp_f1[sn_i]));                                                         \
+            if (sn_i == end_sn) for (i = end+1; i < (end_sn+1)*pn; ++i) _dp_h[i] = _dp_e1[i] = inf_min;                           \
             SIMDSetIfEqual(dp_e1[sn_i], dp_h[sn_i],tmp, SIMDMax(SIMDSub(dp_e1[sn_i],GAP_E1), SIMDSub(dp_h[sn_i],GAP_OE1)),zero);  \
         } else {                                                                                                                  \
             dp_h[sn_i] = SIMDMax(dp_h[sn_i], dp_f1[sn_i]);                                                                        \
+            if (sn_i == end_sn) for (i = end+1; i < (end_sn+1)*pn; ++i) _dp_h[i] = _dp_e1[i] = inf_min;                           \
             SIMDSetIfEqual(dp_e1[sn_i], dp_h[sn_i],tmp, SIMDMax(SIMDSub(dp_e1[sn_i],GAP_E1), SIMDSub(dp_h[sn_i],GAP_OE1)),SIMD_INF_MIN); \
         }                                                                                                                         \
     }                                                                                                                             \
@@ -1014,11 +1017,13 @@ void simd_output_pre_nodes(int *pre_index, int pre_n, int dp_i, int dp_j, int cu
         first2 = SIMDShiftRight(SIMDMax(dp_h[sn_i], SIMDAdd(dp_f2[sn_i], GAP_O2)), SIMDTotalBytes-SIMDShiftOneN);                 \
         if (abpt->align_mode == ABPOA_LOCAL_MODE) {                                                                               \
             dp_h[sn_i] = SIMDMax(zero, SIMDMax(dp_h[sn_i], SIMDMax(dp_f1[sn_i], dp_f2[sn_i])));                                   \
+            if (sn_i == end_sn) for (i = end+1; i < (end_sn+1)*pn; ++i) _dp_h[i] = _dp_e1[i] = _dp_e2[i] = inf_min;               \
             dp_e1[sn_i] = SIMDMax(zero,SIMDMax(SIMDSub(dp_e1[sn_i],GAP_E1),SIMDSub(dp_h[sn_i],GAP_OE1)));                         \
             dp_e2[sn_i] = SIMDMax(zero,SIMDMax(SIMDSub(dp_e2[sn_i],GAP_E2),SIMDSub(dp_h[sn_i],GAP_OE2)));                         \
         } else {                                                                                                                  \
             /* H = max{H, F}    */                                                                                                \
             dp_h[sn_i] = SIMDMax(dp_h[sn_i], SIMDMax(dp_f1[sn_i], dp_f2[sn_i]));                                                  \
+            if (sn_i == end_sn) for (i = end+1; i < (end_sn+1)*pn; ++i) _dp_h[i] = _dp_e1[i] = _dp_e2[i] = inf_min;               \
             /* e for next cell */                                                                                                 \
             dp_e1[sn_i] = SIMDMax(SIMDSub(dp_e1[sn_i],GAP_E1),SIMDSub(dp_h[sn_i],GAP_OE1));                                       \
             dp_e2[sn_i] = SIMDMax(SIMDSub(dp_e2[sn_i],GAP_E2),SIMDSub(dp_h[sn_i],GAP_OE2));                                       \
@@ -1420,6 +1425,7 @@ int abpoa_cg_dp(SIMDi *q, SIMDi *dp_h, SIMDi *dp_e1, SIMDi *dp_e2, SIMDi *dp_f1,
     first = SIMDShiftRight(SIMDShiftLeft(dp_h[beg_sn], SIMDTotalBytes-SIMDShiftOneNi32), SIMDTotalBytes-SIMDShiftOneNi32);
     int set_num; SIMDi first2 = first;//, tmp;
     int32_t *_dp_h = (int32_t*)dp_h, *_dp_e1 = (int32_t*)dp_e1, *_dp_e2 = (int32_t*)dp_e2;
+    // debug_simd_abpoa_print_cg_matrix_row("4", int32_t, index_i);
     // set h/e as inf_min for cells out of range: < dp_beg or > dp_end
     for (i = beg_sn * pn; i < beg; ++i) _dp_h[i] = _dp_e1[i] = _dp_e2[i] = inf_min;
     for (i = end+1; i < (end_sn+1)*pn; ++i) _dp_h[i] = _dp_e1[i] = _dp_e2[i] = inf_min;
@@ -1445,14 +1451,16 @@ int abpoa_cg_dp(SIMDi *q, SIMDi *dp_h, SIMDi *dp_e1, SIMDi *dp_e2, SIMDi *dp_f1,
         first2 = SIMDShiftRight(SIMDMaxi32(dp_h[sn_i], SIMDAddi32(dp_f2[sn_i], GAP_O2)), SIMDTotalBytes-SIMDShiftOneNi32);
         /* H = max{H, F}    */
         dp_h[sn_i] = SIMDMaxi32(SIMDMaxi32(dp_h[sn_i], dp_f1[sn_i]), dp_f2[sn_i]);
+        if (sn_i == end_sn) for (i = end+1; i < (end_sn+1)*pn; ++i) _dp_h[i] = _dp_e1[i] = _dp_e2[i] = inf_min;
         // if (sn_i==beg_sn) debug_simd_abpoa_print_cg_matrix_row("4.4", int32_t, index_i);
         /* e for next cell */
         // SIMDSetIfEquali32(dp_e1[sn_i], dp_h[sn_i], tmp, SIMDMaxi32(SIMDSubi32(dp_e1[sn_i], GAP_E1), SIMDSubi32(dp_h[sn_i], GAP_OE1)), SIMD_INF_MIN);
         dp_e1[sn_i] = SIMDMaxi32(SIMDSubi32(dp_e1[sn_i], GAP_E1), SIMDSubi32(dp_h[sn_i], GAP_OE1));
         // SIMDSetIfEquali32(dp_e2[sn_i], dp_h[sn_i], tmp, SIMDMaxi32(SIMDSubi32(dp_e2[sn_i], GAP_E2), SIMDSubi32(dp_h[sn_i], GAP_OE2)), SIMD_INF_MIN);
         dp_e2[sn_i] = SIMDMaxi32(SIMDSubi32(dp_e2[sn_i], GAP_E2), SIMDSubi32(dp_h[sn_i], GAP_OE2));
+        // debug_simd_abpoa_print_cg_matrix_row("4.5", int32_t, index_i);
     }
-    // debug_simd_abpoa_print_cg_matrix_row("0", int32_t, index_i);
+    // debug_simd_abpoa_print_cg_matrix_row("5", int32_t, index_i);
     return tot_dp_sn;
 }
 

From 769821fb06fc28570d2cd6162b58dce7b706d88b Mon Sep 17 00:00:00 2001
From: Yan Gao <gaoy1@chop.edu>
Date: Sun, 21 Apr 2024 11:59:19 -0400
Subject: [PATCH 24/38] add consensus calling option: 'most frequent bases at
 each pos.'

---
 .github/workflows/linux-CI.yml |  10 +-
 include/abpoa.h                |   8 +-
 src/abpoa.c                    |  24 +--
 src/abpoa.h                    |   8 +-
 src/abpoa_align.c              |   7 +-
 src/abpoa_graph.c              |  18 +--
 src/abpoa_output.c             | 266 ++++++++++++++++++++-------------
 src/abpoa_plot.c               |   2 +-
 test_data/heter.fq             |  60 ++++++++
 9 files changed, 264 insertions(+), 139 deletions(-)
 create mode 100644 test_data/heter.fq

diff --git a/.github/workflows/linux-CI.yml b/.github/workflows/linux-CI.yml
index 773529b..78bab7b 100644
--- a/.github/workflows/linux-CI.yml
+++ b/.github/workflows/linux-CI.yml
@@ -16,4 +16,12 @@ jobs:
         submodules: recursive
 
     - name: make
-      run: make
\ No newline at end of file
+      run: make
+
+    - name: Run Test Data
+      run: ./bin/abpoa ./test_data/test.fa;         \
+           ./bin/abpoa ./test_data/heter.fa -d2;    \
+           ./bin/abpoa ./test_data/seq.fa -a1;      \
+           ./bin/abpoa ./test_data/seq.fa -r5 -a1;  \
+           ./bin/abpoa ./test_data./heter.fq -d2 -Q \
+           ./bin/abpoa ./test_data./heter.fq -d2 -Q -a1
diff --git a/include/abpoa.h b/include/abpoa.h
index ae9a6b7..693a5bc 100644
--- a/include/abpoa.h
+++ b/include/abpoa.h
@@ -36,7 +36,7 @@
 #define ABPOA_OUT_CONS_FQ  5
 
 #define ABPOA_HB 0
-#define ABPOA_HC 1
+#define ABPOA_MC 1
 
 #define ABPOA_NONE_VERBOSE 0
 #define ABPOA_INFO_VERBOSE 1
@@ -77,7 +77,7 @@ typedef struct {
     uint8_t ret_cigar:1, rev_cigar:1, out_msa:1, out_cons:1, out_gfa:1, out_fq:1, use_read_ids:1, amb_strand:1;
     uint8_t use_qv:1, disable_seeding:1, progressive_poa:1;
     char *incr_fn, *out_pog;
-    int align_mode, gap_mode, max_n_cons;
+    int align_mode, gap_mode, max_n_cons, cons_algrm; // consensus calling algorithm: 0: partial order graph, 1: majority voting
     double min_freq; // for multiploid data
     int verbose; // to control output msg
 
@@ -88,8 +88,8 @@ typedef struct {
 typedef struct {
     int node_id;
     int in_edge_n, in_edge_m, *in_id;
-    int out_edge_n, out_edge_m, *out_id; int *out_weight;
-    int *read_weight, n_read, m_read; // weight of each read, valid when use_qv=1
+    int out_edge_n, out_edge_m, *out_id; int *out_edge_weight; // out_edge_weight: edge-wise weight
+    int *read_weight, n_read, m_read; // read_weight: read-wise weight, valid when use_qv=1
     uint64_t **read_ids; int read_ids_n; // for each edge
 
     int aligned_node_n, aligned_node_m, *aligned_node_id; // mismatch; aligned node will have same rank
diff --git a/src/abpoa.c b/src/abpoa.c
index 031ee4b..9c3a09e 100644
--- a/src/abpoa.c
+++ b/src/abpoa.c
@@ -49,6 +49,7 @@ const struct option abpoa_long_opt [] = {
     { "output", 1, NULL, 'o' },
     { "result", 1, NULL, 'r' },
     { "out-pog", 1, NULL, 'g' },
+    { "cons-algrm", 1, NULL, 'a'},
     { "max-num-cons", 1, NULL, 'd', },
     { "min-freq", 1, NULL, 'q', },
 
@@ -65,14 +66,14 @@ int abpoa_usage(void)
     err_printf("%s: %s \n\n", PROG, DESCRIPTION);
     err_printf("Version: %s\t", VERSION);
 	err_printf("Contact: %s\n\n", CONTACT);
-    err_printf("Usage: %s [options] <in.fa/fq> > cons.fa/msa.out/abpoa.gfa\n\n", PROG);
+    err_printf("Usage: %s [options] <in.fa/fq> > cons.fa/msa.fa/abpoa.gfa\n\n", PROG);
     err_printf("Options:\n");
     err_printf("  Alignment:\n");
-    err_printf("    -m --aln-mode   INT     alignment mode [%d]\n", ABPOA_GLOBAL_MODE);
+    err_printf("    -m --aln-mode    INT    alignment mode [%d]\n", ABPOA_GLOBAL_MODE);
     err_printf("                              %d: global, %d: local, %d: extension\n", ABPOA_GLOBAL_MODE, ABPOA_LOCAL_MODE, ABPOA_EXTEND_MODE);
-    err_printf("    -M --match      INT     match score [%d]\n", ABPOA_MATCH);
-    err_printf("    -X --mismatch   INT     mismatch penalty [%d]\n", ABPOA_MISMATCH);
-    err_printf("    -t --matrix    FILE     scoring matrix file, \'-M\' and \'-X\' are not used when \'-t\' is used [Null]\n");
+    err_printf("    -M --match       INT    match score [%d]\n", ABPOA_MATCH);
+    err_printf("    -X --mismatch    INT    mismatch penalty [%d]\n", ABPOA_MISMATCH);
+    err_printf("    -t --matrix     FILE    scoring matrix file, \'-M\' and \'-X\' are not used when \'-t\' is used [Null]\n");
     err_printf("                            e.g., \'HOXD70.mtx, BLOSUM62.mtx\'\n");
     err_printf("    -O --gap-open INT(,INT) gap opening penalty (O1,O2) [%d,%d]\n", ABPOA_GAP_OPEN1, ABPOA_GAP_OPEN2);
     err_printf("    -E --gap-ext  INT(,INT) gap extension penalty (E1,E2) [%d,%d]\n", ABPOA_GAP_EXT1, ABPOA_GAP_EXT2);
@@ -84,9 +85,9 @@ int abpoa_usage(void)
     err_printf("                            for each input sequence, try the reverse complement if the current\n");
     err_printf("                            alignment score is too low, and pick the strand with a higher score\n");
     err_printf("  Adaptive banded DP:\n");
-    err_printf("    -b --extra-b    INT     first adaptive banding parameter [%d]\n", ABPOA_EXTRA_B);
+    err_printf("    -b --extra-b     INT    first adaptive banding parameter [%d]\n", ABPOA_EXTRA_B);
     err_printf("                            set b as < 0 to disable adaptive banded DP\n");
-    err_printf("    -f --extra-f  FLOAT     second adaptive banding parameter [%.2f]\n", ABPOA_EXTRA_F);
+    err_printf("    -f --extra-f   FLOAT    second adaptive banding parameter [%.2f]\n", ABPOA_EXTRA_F);
     err_printf("                            the number of extra bases added on both sites of the band is\n");
     err_printf("                            b+f*L, where L is the length of the aligned sequence\n");
     // err_printf("    -z --zdrop    INT       Z-drop score in extension alignment [-1]\n");
@@ -102,7 +103,8 @@ int abpoa_usage(void)
     // err_printf("    -n --par-size           minimal partition size [%d]\n", ABPOA_W);
 
     err_printf("  Input/Output:\n");
-    err_printf("    -Q --use-qual-weight    take base quality score from FASTQ input file as graph edge weight [False]\n");
+    err_printf("    -Q --use-qual-weight    take base quality score from FASTQ input file as graph edge weight for consensus calling [False]\n");
+    err_printf("                            effective only when input sequences are in FASTQ format and consensus calling with heaviest bundling\n");
     err_printf("    -c --amino-acid         input sequences are amino acid (default is nucleotide) [False]\n");
     err_printf("    -l --in-list            input file is a list of sequence file names [False]\n");
     err_printf("                            each line is one sequence file containing a set of sequences\n");
@@ -117,6 +119,9 @@ int abpoa_usage(void)
     err_printf("                            - %d: graph in GFA format\n", ABPOA_OUT_GFA);
     err_printf("                            - %d: graph with consensus path in GFA format\n", ABPOA_OUT_CONS_GFA);
     err_printf("                            - %d: consensus in FASTQ format\n", ABPOA_OUT_CONS_FQ);
+    err_printf("    -a --cons-algrm INT     consensus algorithm [%d]\n", ABPOA_HB);
+    err_printf("                            - %d: heaviest bundling path in partial order graph\n", ABPOA_HB);
+    err_printf("                            - %d: most frequent bases at each position\n", ABPOA_MC);
     err_printf("    -d --maxnum-cons INT    max. number of consensus sequence to generate [1]\n");
     err_printf("    -q --min-freq  FLOAT    min. frequency of each consensus sequence (only effective when -d/--num-cons > 1) [%.2f]\n", MULTIP_MIN_FREQ);
     err_printf("    -g --out-pog    FILE    dump final alignment graph to FILE (.pdf/.png) [Null]\n\n");
@@ -151,7 +156,7 @@ int abpoa_main(char *file_fn, int is_list, abpoa_para_t *abpt){
 
 int main(int argc, char **argv) {
     int c, m, in_list=0; char *s; abpoa_para_t *abpt = abpoa_init_para();
-    while ((c = getopt_long(argc, argv, "m:M:X:t:O:E:b:f:z:e:QSk:w:n:i:clpso:r:g:d:q:hvV:", abpoa_long_opt, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "m:M:X:t:O:E:b:f:z:e:QSk:w:n:i:clpso:r:g:a:d:q:hvV:", abpoa_long_opt, NULL)) >= 0) {
         switch(c)
         {
             case 'm': m = atoi(optarg);
@@ -194,6 +199,7 @@ int main(int argc, char **argv) {
                       break;
             case 'g': abpt->out_pog= strdup(optarg); break;
 
+            case 'a': abpt->cons_algrm = atoi(optarg); break;
             case 'd': abpt->max_n_cons = atoi(optarg); break; 
             case 'q': abpt->min_freq = atof(optarg); break;
 
diff --git a/src/abpoa.h b/src/abpoa.h
index ae9a6b7..693a5bc 100644
--- a/src/abpoa.h
+++ b/src/abpoa.h
@@ -36,7 +36,7 @@
 #define ABPOA_OUT_CONS_FQ  5
 
 #define ABPOA_HB 0
-#define ABPOA_HC 1
+#define ABPOA_MC 1
 
 #define ABPOA_NONE_VERBOSE 0
 #define ABPOA_INFO_VERBOSE 1
@@ -77,7 +77,7 @@ typedef struct {
     uint8_t ret_cigar:1, rev_cigar:1, out_msa:1, out_cons:1, out_gfa:1, out_fq:1, use_read_ids:1, amb_strand:1;
     uint8_t use_qv:1, disable_seeding:1, progressive_poa:1;
     char *incr_fn, *out_pog;
-    int align_mode, gap_mode, max_n_cons;
+    int align_mode, gap_mode, max_n_cons, cons_algrm; // consensus calling algorithm: 0: partial order graph, 1: majority voting
     double min_freq; // for multiploid data
     int verbose; // to control output msg
 
@@ -88,8 +88,8 @@ typedef struct {
 typedef struct {
     int node_id;
     int in_edge_n, in_edge_m, *in_id;
-    int out_edge_n, out_edge_m, *out_id; int *out_weight;
-    int *read_weight, n_read, m_read; // weight of each read, valid when use_qv=1
+    int out_edge_n, out_edge_m, *out_id; int *out_edge_weight; // out_edge_weight: edge-wise weight
+    int *read_weight, n_read, m_read; // read_weight: read-wise weight, valid when use_qv=1
     uint64_t **read_ids; int read_ids_n; // for each edge
 
     int aligned_node_n, aligned_node_m, *aligned_node_id; // mismatch; aligned node will have same rank
diff --git a/src/abpoa_align.c b/src/abpoa_align.c
index 3829392..cd427d5 100644
--- a/src/abpoa_align.c
+++ b/src/abpoa_align.c
@@ -106,6 +106,7 @@ abpoa_para_t *abpoa_init_para(void) {
     abpt->out_fq = 0;     // output consensus sequence in fastq
     abpt->out_gfa = 0;    // out graph in GFA format
     abpt->out_msa = 0;    // output msa
+    abpt->cons_algrm = ABPOA_HB; // consensus calling algorithm
     abpt->max_n_cons = 1; // number of max. generated consensus sequence
     abpt->min_freq = MULTIP_MIN_FREQ; 
     abpt->use_read_ids = 0;
@@ -142,10 +143,10 @@ abpoa_para_t *abpoa_init_para(void) {
 
 void abpoa_post_set_para(abpoa_para_t *abpt) {
     abpoa_set_gap_mode(abpt);
-    if (abpt->out_msa || abpt->out_gfa || abpt->max_n_cons > 1) {
+    if (abpt->out_msa || abpt->out_gfa || abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MC) {
         abpt->use_read_ids = 1;
-        set_65536_table();
-        if (abpt->max_n_cons > 1) set_bit_table16();
+        if (abpt->out_msa || abpt->out_gfa) set_65536_table();
+        if (abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MC) set_bit_table16();
     }
     if (abpt->align_mode == ABPOA_LOCAL_MODE) abpt->wb = -1;
     int i;
diff --git a/src/abpoa_graph.c b/src/abpoa_graph.c
index 1fe83c4..e05ba04 100644
--- a/src/abpoa_graph.c
+++ b/src/abpoa_graph.c
@@ -28,7 +28,7 @@ void abpoa_free_node(abpoa_node_t *node, int n) {
     for (i = 0; i < n; ++i) {
         if (node[i].in_edge_m > 0) free(node[i].in_id);
         if (node[i].out_edge_m > 0) {
-            free(node[i].out_id); free(node[i].out_weight);
+            free(node[i].out_id); free(node[i].out_edge_weight);
             if (node[i].read_ids_n > 0) {
                 for (j = 0; j < node[i].out_edge_m; ++j) {
                     free(node[i].read_ids[j]);
@@ -51,7 +51,7 @@ abpoa_graph_t *abpoa_realloc_graph_edge(abpoa_graph_t *abg, int io, int id, int
         if (edge_m <= 0) {
             abg->node[id].out_edge_m = MAX_OF_TWO(abg->node[id].out_edge_n, 1);
             abg->node[id].out_id = (int*)_err_malloc(abg->node[id].out_edge_m * sizeof(int));
-            abg->node[id].out_weight = (int*)_err_malloc(abg->node[id].out_edge_m * sizeof(int));
+            abg->node[id].out_edge_weight = (int*)_err_malloc(abg->node[id].out_edge_m * sizeof(int));
             if (use_read_ids || abg->node[id].read_ids_n > 0) {
                 abg->node[id].read_ids = (uint64_t**)_err_malloc(abg->node[id].out_edge_m * sizeof(uint64_t*));
                 if (abg->node[id].read_ids_n > 0) {
@@ -64,7 +64,7 @@ abpoa_graph_t *abpoa_realloc_graph_edge(abpoa_graph_t *abg, int io, int id, int
         } else if (abg->node[id].out_edge_n >= edge_m) {
             abg->node[id].out_edge_m = abg->node[id].out_edge_n+1; kroundup32(abg->node[id].out_edge_m);
             abg->node[id].out_id = (int*)_err_realloc(abg->node[id].out_id, abg->node[id].out_edge_m * sizeof(int));
-            abg->node[id].out_weight = (int*)_err_realloc(abg->node[id].out_weight, abg->node[id].out_edge_m * sizeof(int));
+            abg->node[id].out_edge_weight = (int*)_err_realloc(abg->node[id].out_edge_weight, abg->node[id].out_edge_m * sizeof(int));
             if (use_read_ids || abg->node[id].read_ids_n > 0) {
                 abg->node[id].read_ids = (uint64_t**)_err_realloc(abg->node[id].read_ids, abg->node[id].out_edge_m * sizeof(uint64_t*));
                 if (abg->node[id].read_ids_n > 0) {
@@ -253,8 +253,8 @@ void abpoa_BFS_set_node_remain(abpoa_graph_t *abg, int src_id, int sink_id) {
             int max_w=-1, max_id=-1;
             for (i = 0; i < abg->node[cur_id].out_edge_n; ++i) {
                 out_id = abg->node[cur_id].out_id[i];
-                if (abg->node[cur_id].out_weight[i] > max_w) {
-                    max_w = abg->node[cur_id].out_weight[i];
+                if (abg->node[cur_id].out_edge_weight[i] > max_w) {
+                    max_w = abg->node[cur_id].out_edge_weight[i];
                     max_id = out_id;
                 }
             }
@@ -287,7 +287,7 @@ void abpoa_topological_sort(abpoa_graph_t *abg, abpoa_para_t *abpt) {
         // fprintf(stderr, "node_n: %d, index_rank_m: %d\n", node_n, abg->index_rank_m);
         abg->index_to_node_id = (int*)_err_realloc(abg->index_to_node_id, abg->index_rank_m * sizeof(int));
         abg->node_id_to_index = (int*)_err_realloc(abg->node_id_to_index, abg->index_rank_m * sizeof(int));
-        if (abpt->out_msa || abpt->max_n_cons > 1) 
+        if (abpt->out_msa || abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MC) 
             abg->node_id_to_msa_rank = (int*)_err_realloc(abg->node_id_to_msa_rank, abg->index_rank_m * sizeof(int));
         if (abpt->wb >= 0) {
             abg->node_id_to_max_pos_left = (int*)_err_realloc(abg->node_id_to_max_pos_left, abg->index_rank_m * sizeof(int));
@@ -426,7 +426,7 @@ int abpoa_add_graph_edge(abpoa_graph_t *abg, int from_id, int to_id, int check_e
         int i;
         for (i = 0; i < out_edge_n; ++i) {
             if (abg->node[from_id].out_id[i] == to_id) { // edge exists
-                abg->node[from_id].out_weight[i] += w; // update weight on existing edge
+                abg->node[from_id].out_edge_weight[i] += w; // update weight on existing edge
                 // update label id
                 edge_exist = 1;
                 out_edge_i = i;
@@ -444,7 +444,7 @@ int abpoa_add_graph_edge(abpoa_graph_t *abg, int from_id, int to_id, int check_e
         /// out edge
         abpoa_realloc_graph_edge(abg, 1, from_id, add_read_id);
         abg->node[from_id].out_id[out_edge_n] = to_id;
-        abg->node[from_id].out_weight[out_edge_n] = w; // initial weight for new edge
+        abg->node[from_id].out_edge_weight[out_edge_n] = w; // initial weight for new edge
         out_edge_i = out_edge_n;
         ++abg->node[from_id].out_edge_n;
     }
@@ -699,7 +699,7 @@ void abpoa_reset(abpoa_t *ab, abpoa_para_t *abpt, int qlen) {
         abg->node_m = abg->index_rank_m = node_m;
         abg->index_to_node_id = (int*)_err_realloc(abg->index_to_node_id, node_m * sizeof(int));
         abg->node_id_to_index = (int*)_err_realloc(abg->node_id_to_index, node_m * sizeof(int));
-        if (abpt->out_msa || abpt->max_n_cons > 1) 
+        if (abpt->out_msa || abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MC) 
             abg->node_id_to_msa_rank = (int*)_err_realloc(abg->node_id_to_msa_rank, node_m * sizeof(int));
         if (abpt->wb >= 0) {
             abg->node_id_to_max_pos_left = (int*)_err_realloc(abg->node_id_to_max_pos_left, node_m * sizeof(int));
diff --git a/src/abpoa_output.c b/src/abpoa_output.c
index 72c1192..1c08c2d 100644
--- a/src/abpoa_output.c
+++ b/src/abpoa_output.c
@@ -275,51 +275,75 @@ int abpoa_cons_phred_score(int n_cov, int n_seq) {
     return (33 + (int)(-10 * log10(p) + 0.499));
 }
 
-int get_read_ids_clu_count(uint64_t *cur_read_ids, int read_ids_n, uint64_t *clu_read_ids) {
+int get_edge_inclu_read_count(int edge_i, int cons_i, abpoa_node_t *node, uint64_t **clu_read_ids) {
     int n = 0, i; uint64_t b;
-    for (i = 0; i < read_ids_n; ++i) {
-        b = cur_read_ids[i] & clu_read_ids[i];
+    for (i = 0; i < node->read_ids_n; ++i) {
+        b = node->read_ids[edge_i][i] & clu_read_ids[cons_i][i];
         n += get_bit_cnt4(ab_bit_table16, b);
     }
     return n;
 }
 
-int get_read_ids_clu_weight(uint64_t *cur_read_ids, int read_ids_n, uint64_t *clu_read_ids, uint8_t use_qv, int *read_weight, int m_read) {
-    if (use_qv == 0) return get_read_ids_clu_count(cur_read_ids, read_ids_n, clu_read_ids);
-    int w = 0, i; uint64_t b;
-    for (i = 0; i < read_ids_n; ++i) {
-        b = cur_read_ids[i] & clu_read_ids[i];
-
-        w += get_bit_cnt4(ab_bit_table16, b);
-    }
-    uint64_t one = 1;
-    for (i = 0; i < m_read; ++i) {
-        if (read_weight[i] > 0) {
+// for n_clu >= 2, get weight of reads within cons_i'th clu
+int get_inclu_edge_weight(int edge_i, int cons_i, abpoa_node_t *node, uint64_t **clu_read_ids, int use_qv) {
+    // collect read-wise weight
+    if (use_qv == 0) return get_edge_inclu_read_count(edge_i, cons_i, node, clu_read_ids);
+    int w = 0, i; uint64_t c, one = 1;
+    for (i = 0; i < node->m_read; ++i) {
+        if (node->read_weight[i] > 0) {
             int n = i / 64, b = i & 0x3f;
-            if ((cur_read_ids[n] & clu_read_ids[n] & (one << b)) > 0)
-                w += read_weight[i];
+            c = node->read_ids[edge_i][n] & clu_read_ids[cons_i][n];
+            if ((c & (one << b)) > 0)
+                w += node->read_weight[i];
         }
     }
     return w;
 }
 
-int abpoa_consensus_cov(abpoa_graph_t *abg, int id, uint64_t *clu_read_ids) {
-    int i, j, in_id, left_n, right_n;
+int get_edge_weight(int edge_i, int cons_i, abpoa_node_t *node, uint64_t **clu_read_ids, int use_qv, int n_clu) {
+    if (n_clu == 1) { // use weight directly
+        return node->out_edge_weight[edge_i];
+    } else { // collect weight of reads within cons_i'th clu
+        return get_inclu_edge_weight(edge_i, cons_i, node, clu_read_ids, use_qv);
+    }
+}
+
+int get_node_weight(int n_clu, int cons_i, abpoa_node_t *node, uint64_t **clu_read_ids, int use_qv) {
+    int w = 0, i;
+    for (i = 0; i < node->out_edge_n; ++i) {
+        w += get_edge_weight(i, cons_i, node, clu_read_ids, use_qv, n_clu);
+    }
+    return w;
+}
+
+// get base coverage for node[id]
+int abpoa_node_out_cov(abpoa_node_t *node, int id, uint64_t **clu_read_ids, int cons_i, int n_cons) {
+    if (n_cons == 1) return node[id].n_read;
+    int i, out_cov = 0;
+    for (i = 0; i < node[id].out_edge_n; ++i) {
+        out_cov += get_edge_inclu_read_count(i, cons_i, node+id, clu_read_ids);
+    }
+    return out_cov;
+}
+
+int abpoa_node_in_cov(abpoa_node_t *node, int id, uint64_t **clu_read_ids, int cons_i, int n_cons) {
+    if (n_cons == 1) return node[id].n_read;
+    int i, j, in_id, in_cov = 0;
     // for each id: get max{left_weigth, right_weight}
-    left_n = right_n = 0;
-    for (i = 0; i < abg->node[id].in_edge_n; ++i) {
-        in_id = abg->node[id].in_id[i];
-        for (j = 0; j < abg->node[in_id].out_edge_n; ++j) {
-            if (abg->node[in_id].out_id[j] == id) {
-                left_n += get_read_ids_clu_count(abg->node[in_id].read_ids[j], abg->node[in_id].read_ids_n, clu_read_ids);
+    for (i = 0; i < node->in_edge_n; ++i) {
+        in_id = node[id].in_id[i];
+        for (j = 0; j < node[in_id].out_edge_n; ++j) {
+            if (node[in_id].out_id[j] == id) {
+                in_cov += get_edge_inclu_read_count(j, cons_i, node+in_id, clu_read_ids);
                 break;
             }
         }
     }
-    for (i = 0; i < abg->node[id].out_edge_n; ++i) {
-        right_n += get_read_ids_clu_count(abg->node[id].read_ids[i], abg->node[id].read_ids_n, clu_read_ids);
-    }
-    return MAX_OF_TWO(left_n, right_n);
+    return in_cov;
+}
+int abpoa_node_cov(abpoa_node_t *node, int id, uint64_t **clu_read_ids, int cons_i, int n_cons) {
+    if (n_cons == 1) return node[id].n_read;
+    return MAX_OF_TWO(abpoa_node_in_cov(node, id, clu_read_ids, cons_i, n_cons), abpoa_node_out_cov(node, id, clu_read_ids, cons_i, n_cons));
 }
 
 void abpoa_set_hb_cons(abpoa_graph_t *abg, int **max_out_id, int n_cons, uint64_t **clu_read_ids, int src_id, int sink_id, abpoa_cons_t *abc) {
@@ -331,7 +355,7 @@ void abpoa_set_hb_cons(abpoa_graph_t *abg, int **max_out_id, int n_cons, uint64_
         while (cur_id != sink_id) {
             abc->cons_node_ids[i][j] = cur_id;
             abc->cons_base[i][j] = abg->node[cur_id].base;
-            abc->cons_cov[i][j] = abpoa_consensus_cov(abg, cur_id, clu_read_ids[i]);
+            abc->cons_cov[i][j] = abpoa_node_cov(abg->node, cur_id, clu_read_ids, i, n_cons);
             abc->cons_phred_score[i][j] = abpoa_cons_phred_score(abc->cons_cov[i][j], abc->clu_n_seq[i]);
             ++j;
             cur_id = max_out_id[i][cur_id];
@@ -340,78 +364,59 @@ void abpoa_set_hb_cons(abpoa_graph_t *abg, int **max_out_id, int n_cons, uint64_
     }
 }
 
-void abpoa_set_hb_cons1(abpoa_graph_t *abg, int *max_out_id, int src_id, int sink_id, abpoa_cons_t *abc) {
-    int i = 0, cur_id;
-    abc->n_cons = 1;
-    cur_id = max_out_id[src_id];
-    while (cur_id != sink_id) {
-        abc->cons_node_ids[0][i] = cur_id;
-        abc->cons_base[0][i] = abg->node[cur_id].base;
-        abc->cons_cov[0][i] = abg->node[cur_id].n_read;
-        abc->cons_phred_score[0][i] = abpoa_cons_phred_score(abc->cons_cov[0][i], abc->n_seq);
-        cur_id = max_out_id[cur_id];
-        ++i;
+void abpoa_set_major_voting_cons(abpoa_graph_t *abg, int m, int ***row_column_count, int **msa_node_id, int src_id, int sink_id, int msa_l, abpoa_cons_t *abc) {
+    int cur_id, cons_i, i, j, max_c, max_base, gap_c, c;
+    int cons_l;
+    for (cons_i = 0; cons_i < abc->n_cons; ++cons_i) {
+        cons_l = 0;
+        for (i = 0; i < msa_l; ++i) {
+            max_c = 0, max_base = m, gap_c = abc->clu_n_seq[cons_i];
+            for (j = 0; j < m-1; ++j) {
+                c = row_column_count[cons_i][i][j];
+                if (c > max_c) {
+                    max_c = c;
+                    max_base = j;
+                }
+                gap_c -= c;
+            }
+            if (max_c >= gap_c) { // append consensus base to abc
+                cur_id = msa_node_id[i][max_base];
+                abc->cons_node_ids[cons_i][cons_l] = cur_id;
+                abc->cons_base[cons_i][cons_l] = max_base;
+                abc->cons_cov[cons_i][cons_l] = max_c;
+                abc->cons_phred_score[cons_i][cons_l] = abpoa_cons_phred_score(abc->cons_cov[cons_i][cons_l], abc->clu_n_seq[cons_i]);
+                cons_l++;
+            }
+        }
+        abc->cons_len[cons_i] = cons_l;
     }
-    abc->cons_len[0] = i;
 }
 
-// heaviest_bundling
-// 1. argmax{cur->weight}
-// 2. argmax{out_node->weight}
-void abpoa_heaviest_bundling(abpoa_graph_t *abg, int src_id, int sink_id, int *out_degree, abpoa_cons_t *abc) {
-    int *id, i, cur_id, in_id, out_id, max_id; int max_w, out_w;
-    int *score = (int*)_err_malloc(abg->node_n * sizeof(int));
-    int *max_out_id = (int*)_err_malloc(abg->node_n * sizeof(int));
-
-    kdq_int_t *q = kdq_init_int();
-    kdq_push_int(q, sink_id);
-    // reverse Breadth-First-Search
-    while ((id = kdq_shift_int(q)) != 0) {
-        cur_id = *id;
-        if (cur_id == sink_id) {
-            max_out_id[cur_id] = -1;
-            score[cur_id] = 0;
-        } else {
-            max_id = -1;
-            if (cur_id == src_id) {
-                int path_score = -1, path_max_w = -1;
-                for (i = 0; i < abg->node[cur_id].out_edge_n; ++i) {
-                    out_id = abg->node[cur_id].out_id[i];
-                    out_w = abg->node[cur_id].out_weight[i];
-                    if (out_w > path_max_w || (out_w == path_max_w && score[out_id] > path_score)) {
-                        max_id = out_id;
-                        path_score = score[out_id];
-                        path_max_w = out_w;
-                    }
-                }
-                max_out_id[cur_id] = max_id;
-                kdq_destroy_int(q);
-                break;
-            } else {
-                max_w = INT32_MIN;
-                for (i = 0; i < abg->node[cur_id].out_edge_n; ++i) {
-                    out_id = abg->node[cur_id].out_id[i];
-                    out_w = abg->node[cur_id].out_weight[i];
-                    if (max_w < out_w) {
-                        max_w = out_w; max_id = out_id;
-                    } else if (max_w == out_w && score[max_id] <= score[out_id]) {
-                        max_id = out_id;
-                    }
-                }
-                score[cur_id] = max_w + score[max_id];
-                max_out_id[cur_id] = max_id;
-            }
+void abpoa_set_row_column_weight(abpoa_graph_t *abg, int n_clu, int m, int ***rc_weight, uint64_t **clu_read_ids, int **msa_node_id) {
+    int i, cons_i, k, rank, aligned_id;
+    int node_w;
+    for (i = 2; i < abg->node_n; ++i) {
+        // get msa rank
+        rank = abpoa_graph_node_id_to_msa_rank(abg, i);
+        for (k = 0; k < abg->node[i].aligned_node_n; ++k) {
+            aligned_id = abg->node[i].aligned_node_id[k];
+            rank = MAX_OF_TWO(rank, abpoa_graph_node_id_to_msa_rank(abg, aligned_id));
         }
-        for (i = 0; i < abg->node[cur_id].in_edge_n; ++i) {
-            in_id = abg->node[cur_id].in_id[i];
-            if (--out_degree[in_id] == 0) kdq_push_int(q, in_id);
+        msa_node_id[rank-1][abg->node[i].base] = i;
+        // assign seq
+        for (cons_i = 0; cons_i < n_clu; ++cons_i) {
+            node_w = abpoa_node_out_cov(abg->node, i, clu_read_ids, cons_i, n_clu);
+            rc_weight[cons_i][rank-1][abg->node[i].base] = node_w;
+            // for (k = 0; k < abg->node[i].read_ids_n; ++k) {
+            //     for (j = 0; j < abg->node[i].out_edge_n; ++j) {
+            //         if (n_clu > 1) b = abg->node[i].read_ids[j][k] & clu_read_ids[cons_i][k];
+            //         else b = abg->node[i].read_ids[j][k];
+            //         rc_count[cons_i][rank-1][abg->node[i].base] += get_bit_cnt4(ab_bit_table16, b);
+            //     }
+            // }
+            rc_weight[cons_i][rank-1][m-1] -= rc_weight[cons_i][rank-1][abg->node[i].base];
         }
     }
-    abc->clu_n_seq[0] = abc->n_seq;
-    // set cons read ids
-    for (i = 0; i < abc->n_seq; ++i) abc->clu_read_ids[0][i] = i;
-    abpoa_set_hb_cons1(abg, max_out_id, src_id, sink_id, abc);
-    free(score); free(max_out_id);
 }
 
 void set_clu_read_ids(abpoa_cons_t *abc, uint64_t **read_ids, int cons_i, int n_seq) {
@@ -426,18 +431,26 @@ void set_clu_read_ids(abpoa_cons_t *abc, uint64_t **read_ids, int cons_i, int n_
         err_fatal(__func__, "Error in set cluster read ids. (%d, %d)", n, abc->clu_n_seq[cons_i]);
 }
 
-void abpoa_multip_heaviest_bundling(abpoa_graph_t *abg, abpoa_para_t *abpt, int src_id, int sink_id, int *out_degree, int n_clu, int read_ids_n, uint64_t **clu_read_ids, abpoa_cons_t *abc) {
+// heaviest_bundling
+// 1. argmax{cur->weight}
+// 2. argmax{out_node->weight}
+void abpoa_heaviest_bundling(abpoa_graph_t *abg, abpoa_para_t *abpt, int src_id, int sink_id, int *out_degree, int n_clu, int read_ids_n, uint64_t **clu_read_ids, abpoa_cons_t *abc) {
     int *id, cons_i, i, cur_id, in_id, out_id, max_id; int max_w, out_w;
 
     int *_out_degree = (int*)_err_malloc(abg->node_n * sizeof(int));
     int *score = (int*)_err_malloc(abg->node_n * sizeof(int));
     int **max_out_id = (int**)_err_malloc(n_clu * sizeof(int*));
     for (i = 0; i < n_clu; ++i) max_out_id[i] = (int*)_err_malloc(abg->node_n * sizeof(int));
+    if (n_clu == 1) abc->clu_n_seq[0] = abc->n_seq;
+    else {
+        for (cons_i = 0; cons_i < n_clu; cons_i++) {
+            abc->clu_n_seq[cons_i] = get_read_cnt(clu_read_ids[cons_i], read_ids_n);
+            set_clu_read_ids(abc, clu_read_ids, cons_i, abc->n_seq);
+        }
+    }
 
     for (cons_i = 0; cons_i < n_clu; cons_i++) {
         for (i = 0; i < abg->node_n; ++i) _out_degree[i] = out_degree[i];
-        abc->clu_n_seq[cons_i] = get_read_cnt(clu_read_ids[cons_i], read_ids_n);
-        set_clu_read_ids(abc, clu_read_ids, cons_i, abc->n_seq);
         kdq_int_t *q = kdq_init_int();
         kdq_push_int(q, sink_id);
         // reverse Breadth-First-Search
@@ -452,8 +465,7 @@ void abpoa_multip_heaviest_bundling(abpoa_graph_t *abg, abpoa_para_t *abpt, int
                     int path_score = -1, path_max_w = -1;
                     for (i = 0; i < abg->node[cur_id].out_edge_n; ++i) {
                         out_id = abg->node[cur_id].out_id[i];
-                        out_w = get_read_ids_clu_weight(abg->node[cur_id].read_ids[i], abg->node[cur_id].read_ids_n, clu_read_ids[cons_i], abpt->use_qv, abg->node[cur_id].read_weight, abg->node[cur_id].m_read);
-                        // out_w = abg->node[cur_id].out_weight[i];
+                        out_w = get_edge_weight(i, cons_i, abg->node+cur_id, clu_read_ids, abpt->use_qv, n_clu);
                         if (out_w > path_max_w || (out_w == path_max_w && score[out_id] > path_score)) {
                             max_id = out_id;
                             path_score = score[out_id];
@@ -467,7 +479,7 @@ void abpoa_multip_heaviest_bundling(abpoa_graph_t *abg, abpoa_para_t *abpt, int
                     max_w = INT32_MIN;
                     for (i = 0; i < abg->node[cur_id].out_edge_n; ++i) {
                         out_id = abg->node[cur_id].out_id[i];
-                        out_w = get_read_ids_clu_weight(abg->node[cur_id].read_ids[i], abg->node[cur_id].read_ids_n, clu_read_ids[cons_i], abpt->use_qv, abg->node[cur_id].read_weight, abg->node[cur_id].m_read);
+                        out_w = get_edge_weight(i, cons_i, abg->node+cur_id, clu_read_ids, abpt->use_qv, n_clu);
                         if (max_w < out_w) {
                             max_w = out_w;
                             max_id = out_id;
@@ -492,6 +504,42 @@ void abpoa_multip_heaviest_bundling(abpoa_graph_t *abg, abpoa_para_t *abpt, int
     for (i = 0; i < n_clu; ++i) free(max_out_id[i]); free(max_out_id);
 }
 
+void abpoa_major_voting(abpoa_graph_t *abg, abpoa_para_t *abpt, int src_id, int sink_id, int *out_degree, int n_clu, int read_ids_n, uint64_t **clu_read_ids, abpoa_cons_t *abc) {
+    abpoa_set_msa_rank(abg, src_id, sink_id);
+    int i, cons_i, msa_l = abg->node_id_to_msa_rank[sink_id]-1;
+    int ***row_column_weight = (int***)_err_malloc(n_clu * sizeof(int**));
+    int **msa_node_id = (int**)_err_malloc(msa_l * sizeof(int*));
+    // init row_column_weight
+    for (cons_i = 0; cons_i < n_clu; ++cons_i) {
+        row_column_weight[cons_i] = (int**)_err_malloc(msa_l * sizeof(int*));
+        for (i = 0; i < msa_l; ++i) {
+            row_column_weight[cons_i][i] = (int*)_err_calloc(abpt->m, sizeof(int));
+            row_column_weight[cons_i][i][abpt->m-1] = abc->clu_n_seq[cons_i];
+        }
+    }
+    for (i = 0; i < msa_l; ++i) msa_node_id[i] = (int*)_err_calloc(abpt->m, sizeof(int));
+    abc->n_cons = n_clu;
+    if (n_clu == 1) abc->clu_n_seq[0] = abc->n_seq;
+    else {
+        for (cons_i = 0; cons_i < n_clu; ++cons_i) {
+            abc->clu_n_seq[cons_i] = get_read_cnt(clu_read_ids[cons_i], read_ids_n);
+            set_clu_read_ids(abc, clu_read_ids, cons_i, abc->n_seq);
+        }
+    }
+    // no quality weight used for now, only read count
+    abpoa_set_row_column_weight(abg, n_clu, abpt->m, row_column_weight, clu_read_ids, msa_node_id);
+    abpoa_set_major_voting_cons(abg, abpt->m, row_column_weight, msa_node_id, src_id, sink_id, msa_l, abc);
+    for (cons_i = 0; cons_i < n_clu; ++cons_i) {
+        for (i = 0; i < msa_l; ++i) {
+            free(row_column_weight[cons_i][i]);
+        } free(row_column_weight[cons_i]);
+    }
+    for (i = 0; i < msa_l; ++i) {
+        free(msa_node_id[i]);
+    } 
+    free(row_column_weight); free(msa_node_id);
+}
+
 void abpoa_output_fx_consensus(abpoa_t *ab, abpoa_para_t *abpt, FILE *out_fp) {
     if (out_fp == NULL) return;
     int cons_i, j;
@@ -904,19 +952,21 @@ void abpoa_generate_consensus(abpoa_t *ab, abpoa_para_t *abpt) {
         out_degree[i] = abg->node[i].out_edge_n;
     }
 
-    int n_clu, m_clu, n_seq = ab->abs->n_seq; uint64_t **clu_read_ids;
+    int n_clu, m_clu=0, n_seq = ab->abs->n_seq; uint64_t **clu_read_ids=NULL;
     int read_ids_n = (n_seq-1)/64+1;
 
-    if (abpt->max_n_cons > 1) n_clu = abpoa_multip_read_clu(abg, ABPOA_SRC_NODE_ID, ABPOA_SINK_NODE_ID, n_seq, abpt->m, abpt->max_n_cons, abpt->min_freq, &clu_read_ids, &m_clu, abpt->verbose);
-    else n_clu = 1;
+    n_clu = 1;
+    if (abpt->max_n_cons > 1) 
+        n_clu = abpoa_multip_read_clu(abg, ABPOA_SRC_NODE_ID, ABPOA_SINK_NODE_ID, n_seq, abpt->m, abpt->max_n_cons, abpt->min_freq, &clu_read_ids, &m_clu, abpt->verbose);
 
     abpoa_cons_t *abc = ab->abc;
     abpoa_allocate_cons(abc, abg->node_n, ab->abs->n_seq, n_clu);
-    if (n_clu > 1) {
-         abpoa_multip_heaviest_bundling(abg, abpt, ABPOA_SRC_NODE_ID, ABPOA_SINK_NODE_ID, out_degree, n_clu, read_ids_n, clu_read_ids, abc);
+    if (abpt->cons_algrm == ABPOA_HB)
+        abpoa_heaviest_bundling(abg, abpt, ABPOA_SRC_NODE_ID, ABPOA_SINK_NODE_ID, out_degree, n_clu, read_ids_n, clu_read_ids, abc);
+    else
+        abpoa_major_voting(abg, abpt, ABPOA_SRC_NODE_ID, ABPOA_SINK_NODE_ID, out_degree, n_clu, read_ids_n, clu_read_ids, abc);
+    if (m_clu > 0) {
         for (i = 0; i < m_clu; ++i) free(clu_read_ids[i]); free(clu_read_ids);
-    } else {
-        abpoa_heaviest_bundling(abg, ABPOA_SRC_NODE_ID, ABPOA_SINK_NODE_ID, out_degree, abc);
     }
     abg->is_called_cons = 1; free(out_degree);
 }
diff --git a/src/abpoa_plot.c b/src/abpoa_plot.c
index d35fbcf..3fbbffa 100644
--- a/src/abpoa_plot.c
+++ b/src/abpoa_plot.c
@@ -84,7 +84,7 @@ void abpoa_dump_pog(abpoa_t *ab, abpoa_para_t *abpt) {
         // out_edge
         for (j = 0; j < abg->node[id].out_edge_n; ++j) {
             out_id = abg->node[id].out_id[j];
-            fprintf(fp, "\t%s -> %s [label=\"%d\", penwidth=%d]\n", node_label[id], node_label[out_id], abg->node[id].out_weight[j], abg->node[id].out_weight[j]+1);
+            fprintf(fp, "\t%s -> %s [label=\"%d\", penwidth=%d]\n", node_label[id], node_label[out_id], abg->node[id].out_edge_weight[j], abg->node[id].out_edge_weight[j]+1);
         }
         if (abg->node[id].aligned_node_n > 0) {
             fprintf(fp, "\t{rank=same; %s ", node_label[id]);
diff --git a/test_data/heter.fq b/test_data/heter.fq
new file mode 100644
index 0000000..bfbb23f
--- /dev/null
+++ b/test_data/heter.fq
@@ -0,0 +1,60 @@
+@m64062_200517_230654/46596725/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200517_230654/122620624/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCCACCATCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCATCCCCACCGCCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200604_115437/178193244/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200517_230654/73204047/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200604_115437/157813105/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200604_115437/141952744/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCAGTTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCATCCCCACCGCCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCCTCCACCAACATCCCCACCATCCCCACCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200604_115437/19531191/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCCCATTAACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCATCCCCACCGCCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACACCATTCTCACCATCTCCACCAACATCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCAATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200604_115437/120652681/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCATCCCCACCGCCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200604_115437/28773203/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCATCCCCACCGCCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200604_115437/75628767/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTCCATCCATTCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCATCCCCATTACCATCCCACCACCATTTCCACCATCCCACCATCATCCCCACCACCATCCCCAGTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCATCCCCACCGCCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200517_230654/180226763/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200517_230654/29755012/ccs
+CCATTCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCATCCCCACCGCCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200517_230654/154468686/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATGCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCACCACCATCCTCACTACCATCCCACCACCATTCCACCATTCCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCACCACCATCTCCATTACCATCCCCACCACCATCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCATCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCATCCCCACCGCCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCACCCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200604_115437/146211230/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@m64062_200517_230654/85983296/ccs
+CCATTCCCACCATCCTTACCATCAACATCACCATCCCCACCATCCCCAACACCATTCCCACCATCCCTACCATCACCATCACCATCCCCACCAACATCCCCACCACCATCCTCACTACCATCCCCACCACCATTTCCACCATTCCCACCACAGTCACCATCACCCCCACCATCCCCATCATCATCCGCACCATCCCCACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCTCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATCCCCATTACCATCCCCACCACCATTTCCACCATTCCCACCATCATCCCCACCACCATCCTCGTTACCATCCCCACCACCTTTTCCACCATTCCCACCATCTCCAACACCTCCCCCACCATCATCCCCACCATCCCCACCACCTTCTCCACCATCATTCTCACCATCCCCACCACCATCTCCACCACCATTCTCACCATCTCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCAACATCCCCACCATCCCCACCCCCATGCCCACCATCATCCCCACCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII

From 1e3d594eaadb92f59c62ea0501bed68f734444af Mon Sep 17 00:00:00 2001
From: Yan Gao <gaoy1@chop.edu>
Date: Sun, 21 Apr 2024 12:03:39 -0400
Subject: [PATCH 25/38] update CI

---
 .github/workflows/linux-CI.yml | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/linux-CI.yml b/.github/workflows/linux-CI.yml
index 78bab7b..c79af3c 100644
--- a/.github/workflows/linux-CI.yml
+++ b/.github/workflows/linux-CI.yml
@@ -19,9 +19,10 @@ jobs:
       run: make
 
     - name: Run Test Data
-      run: ./bin/abpoa ./test_data/test.fa;         \
-           ./bin/abpoa ./test_data/heter.fa -d2;    \
-           ./bin/abpoa ./test_data/seq.fa -a1;      \
-           ./bin/abpoa ./test_data/seq.fa -r5 -a1;  \
-           ./bin/abpoa ./test_data./heter.fq -d2 -Q \
-           ./bin/abpoa ./test_data./heter.fq -d2 -Q -a1
+      run: | 
+         ./bin/abpoa ./test_data/test.fa
+         ./bin/abpoa ./test_data/heter.fa -d2
+         ./bin/abpoa ./test_data/seq.fa -a1
+         ./bin/abpoa ./test_data/seq.fa -r5 -a1
+         ./bin/abpoa ./test_data./heter.fq -d2 -Q
+         ./bin/abpoa ./test_data./heter.fq -d2 -Q -a1

From cfb9a21d35ac69d9ca8f8f1d7f5b0470968ab099 Mon Sep 17 00:00:00 2001
From: Yan Gao <gaoy1@chop.edu>
Date: Sun, 21 Apr 2024 12:05:45 -0400
Subject: [PATCH 26/38] update CI

---
 .github/workflows/linux-CI.yml | 4 ++--
 .github/workflows/macos-CI.yml | 9 +++++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/linux-CI.yml b/.github/workflows/linux-CI.yml
index c79af3c..6965550 100644
--- a/.github/workflows/linux-CI.yml
+++ b/.github/workflows/linux-CI.yml
@@ -24,5 +24,5 @@ jobs:
          ./bin/abpoa ./test_data/heter.fa -d2
          ./bin/abpoa ./test_data/seq.fa -a1
          ./bin/abpoa ./test_data/seq.fa -r5 -a1
-         ./bin/abpoa ./test_data./heter.fq -d2 -Q
-         ./bin/abpoa ./test_data./heter.fq -d2 -Q -a1
+         ./bin/abpoa ./test_data/heter.fq -d2 -Q
+         ./bin/abpoa ./test_data/heter.fq -d2 -Q -a1
diff --git a/.github/workflows/macos-CI.yml b/.github/workflows/macos-CI.yml
index 6ca2c17..1e266ce 100644
--- a/.github/workflows/macos-CI.yml
+++ b/.github/workflows/macos-CI.yml
@@ -17,3 +17,12 @@ jobs:
 
     - name: make
       run: make
+
+    - name: Run Test Data
+      run: | 
+         ./bin/abpoa ./test_data/test.fa
+         ./bin/abpoa ./test_data/heter.fa -d2
+         ./bin/abpoa ./test_data/seq.fa -a1
+         ./bin/abpoa ./test_data/seq.fa -r5 -a1
+         ./bin/abpoa ./test_data/heter.fq -d2 -Q
+         ./bin/abpoa ./test_data/heter.fq -d2 -Q -a1
\ No newline at end of file

From 4608021d9c6cbcb235090c086b0a976fa2164859 Mon Sep 17 00:00:00 2001
From: Yan Gao <gaoy1@chop.edu>
Date: Sun, 21 Apr 2024 12:43:37 -0400
Subject: [PATCH 27/38] update README

---
 .github/workflows/linux-CI.yml |  2 +-
 .github/workflows/macos-CI.yml |  2 +-
 README.md                      | 20 +++++++++++++++-----
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/linux-CI.yml b/.github/workflows/linux-CI.yml
index 6965550..ba2255a 100644
--- a/.github/workflows/linux-CI.yml
+++ b/.github/workflows/linux-CI.yml
@@ -25,4 +25,4 @@ jobs:
          ./bin/abpoa ./test_data/seq.fa -a1
          ./bin/abpoa ./test_data/seq.fa -r5 -a1
          ./bin/abpoa ./test_data/heter.fq -d2 -Q
-         ./bin/abpoa ./test_data/heter.fq -d2 -Q -a1
+         ./bin/abpoa ./test_data/heter.fq -d2 -r2 -Q -a1
diff --git a/.github/workflows/macos-CI.yml b/.github/workflows/macos-CI.yml
index 1e266ce..c28b431 100644
--- a/.github/workflows/macos-CI.yml
+++ b/.github/workflows/macos-CI.yml
@@ -25,4 +25,4 @@ jobs:
          ./bin/abpoa ./test_data/seq.fa -a1
          ./bin/abpoa ./test_data/seq.fa -r5 -a1
          ./bin/abpoa ./test_data/heter.fq -d2 -Q
-         ./bin/abpoa ./test_data/heter.fq -d2 -Q -a1
\ No newline at end of file
+         ./bin/abpoa ./test_data/heter.fq -d2 -r2 -Q -a1
\ No newline at end of file
diff --git a/README.md b/README.md
index 82eb2a1..5a7c7d7 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,6 @@
 [![C/C++ CI](https://github.com/yangao07/abPOA/actions/workflows/linux-CI.yml/badge.svg)](https://github.com/yangao07/abPOA/actions/workflows/linux-CI.yml)
 [![C/C++ CI](https://github.com/yangao07/abPOA/actions/workflows/macos-CI.yml/badge.svg)](https://github.com/yangao07/abPOA/actions/workflows/macos-CI.yml)
 [![License](https://img.shields.io/badge/License-MIT-black.svg)](https://github.com/yangao07/abPOA/blob/main/LICENSE)
-<!-- [![PyPI](https://img.shields.io/pypi/v/pyabpoa.svg?style=flat)](https://pypi.python.org/pypi/pyabpoa) -->
 ## Updates (v1.5.1)
 
 - Fix a memory allocation bug caused by int overflow when memory is large (>4G)
@@ -44,12 +43,12 @@ abpoa ./test_data/seq.fa > cons.fa
     - [Building abPOA from source files](#building-abpoa-from-source-files)
     - [Pre-built binary executable file for Linux/Unix or MacOS](#pre-built-binary-executable-file-for-linuxunix-or-macos)
   - [General usage](#general-usage)
-    - [Generate consensus sequence](#generate-consensus-sequence)
+    - [Generate a consensus sequence](#generate-a-consensus-sequence)
     - [Generate multiple consensus sequences](#generate-multiple-consensus-sequences)
     - [Generate row-column multiple sequence alignment in FASTA format](#generate-row-column-multiple-sequence-alignment-in-fasta-format)
     - [Generate graph information in GFA format](#generate-graph-information-in-gfa-format)
     - [Align sequence to an existing graph in GFA/MSA format](#align-sequence-to-an-existing-graph-in-gfamsa-format)
-    - [Generate consensus sequence for amino acid sequences](#generate-consensus-sequence-for-amino-acid-sequences)
+    - [Generate a consensus sequence for amino acid sequences](#generate-a-consensus-sequence-for-amino-acid-sequences)
     - [Generate a plot of the alignment graph](#generate-a-plot-of-the-alignment-graph)
   - [Input](#input)
   - [Output](#output)
@@ -118,12 +117,23 @@ tar -zxvf abPOA-v1.5.1_arm64-macos.tar.gz
 ```
 
 ## General usage
-### Generate consensus sequence
+### Generate a consensus sequence
 
 ```
 abpoa seq.fa > cons.fa
 ```
 
+abPOA provides two conensus calling methods: 
+* heaviest bundlding (default): the path with largest weight along the partial order graph
+* most frequent bases: pick the most common base at each alignment position
+
+Sometimes these two methods will generate different consensus sequences (#67).
+
+To use `most frequent bases` method:
+```
+abpoa seq.fa -a1 > cons.fa
+```
+
 ### Generate multiple consensus sequences
 
 ```
@@ -159,7 +169,7 @@ abpoa seq1.fa -r1 > seq1.msa
 abpoa -i seq1.msa seq2.fa > cons.fa
 ```
 
-### Generate consensus sequence for amino acid sequences
+### Generate a consensus sequence for amino acid sequences
 ```
 abpoa -c -t BLOSUM62.mtx input_aa.fa > output_aa_cons.fa
 ```

From c48d7e9c272bda109f7e3767ae8acc77edfe1a26 Mon Sep 17 00:00:00 2001
From: Yan Gao <gaoy1@chop.edu>
Date: Sun, 21 Apr 2024 12:49:30 -0400
Subject: [PATCH 28/38] Update README.md

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 5a7c7d7..2b0c326 100644
--- a/README.md
+++ b/README.md
@@ -127,7 +127,8 @@ abPOA provides two conensus calling methods:
 * heaviest bundlding (default): the path with largest weight along the partial order graph
 * most frequent bases: pick the most common base at each alignment position
 
-Sometimes these two methods will generate different consensus sequences (#67).
+Sometimes these two methods will generate different consensus sequences [#67](https://github.com/yangao07/abPOA/issues/67)
+
 
 To use `most frequent bases` method:
 ```

From b0eba33f0de695554ab5b3a6f2a9e1ce59a5542c Mon Sep 17 00:00:00 2001
From: Yan Gao <yangao07@outlook.com>
Date: Wed, 17 Jul 2024 16:22:48 -0400
Subject: [PATCH 29/38] v1.5.2

---
 README.md   | 25 +++++++++++++------------
 setup.py    |  2 +-
 src/abpoa.c |  2 +-
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index 5a7c7d7..1afba05 100644
--- a/README.md
+++ b/README.md
@@ -8,15 +8,16 @@
 [![C/C++ CI](https://github.com/yangao07/abPOA/actions/workflows/linux-CI.yml/badge.svg)](https://github.com/yangao07/abPOA/actions/workflows/linux-CI.yml)
 [![C/C++ CI](https://github.com/yangao07/abPOA/actions/workflows/macos-CI.yml/badge.svg)](https://github.com/yangao07/abPOA/actions/workflows/macos-CI.yml)
 [![License](https://img.shields.io/badge/License-MIT-black.svg)](https://github.com/yangao07/abPOA/blob/main/LICENSE)
-## Updates (v1.5.1)
+## Updates (v1.5.2)
 
-- Fix a memory allocation bug caused by int overflow when memory is large (>4G)
+- Fix a backtrack bug in seeding mode
+- Add consensus mode: most frequent base at each pos
 
 ## Getting started
 Download the [latest release](https://github.com/yangao07/abPOA/releases):
 ```
-wget https://github.com/yangao07/abPOA/releases/download/v1.5.1/abPOA-v1.5.1.tar.gz
-tar -zxvf abPOA-v1.5.1.tar.gz && cd abPOA-v1.5.1
+wget https://github.com/yangao07/abPOA/releases/download/v1.5.2/abPOA-v1.5.2.tar.gz
+tar -zxvf abPOA-v1.5.2.tar.gz && cd abPOA-v1.5.2
 ```
 Make from source and run with test data:
 ```
@@ -34,7 +35,7 @@ abpoa ./test_data/seq.fa > cons.fa
 ## Table of Contents
 
 - [abPOA: adaptive banded Partial Order Alignment](#abpoa-adaptive-banded-partial-order-alignment)
-  - [Updates (v1.5.1)](#updates-v151)
+  - [Updates (v1.5.2)](#updates-v152)
   - [Getting started](#getting-started)
   - [Table of Contents](#table-of-contents)
   - [Introduction](#introduction)
@@ -93,9 +94,9 @@ You can also build abPOA from source files.
 Make sure you have gcc (>=6.4.0) and zlib installed before compiling.
 It is recommended to download the [latest release](https://github.com/yangao07/abPOA/releases).
 ```
-wget https://github.com/yangao07/abPOA/releases/download/v1.5.1/abPOA-v1.5.1.tar.gz
-tar -zxvf abPOA-v1.5.1.tar.gz
-cd abPOA-v1.5.1; make
+wget https://github.com/yangao07/abPOA/releases/download/v1.5.2/abPOA-v1.5.2.tar.gz
+tar -zxvf abPOA-v1.5.2.tar.gz
+cd abPOA-v1.5.2; make
 ```
 Or, you can use `git clone` command to download the source code.
 This gives you the latest version of abPOA, which might be still under development.
@@ -107,13 +108,13 @@ cd abPOA; make
 ### Pre-built binary executable file for Linux/Unix or MacOS
 If you meet any compiling issue, please try the pre-built binary file for linux:
 ```
-wget https://github.com/yangao07/abPOA/releases/download/v1.5.1/abPOA-v1.5.1_x64-linux.tar.gz
-tar -zxvf abPOA-v1.5.1_x64-linux.tar.gz
+wget https://github.com/yangao07/abPOA/releases/download/v1.5.2/abPOA-v1.5.2_x64-linux.tar.gz
+tar -zxvf abPOA-v1.5.2_x64-linux.tar.gz
 ```
 or for macos:
 ```
-wget https://github.com/yangao07/abPOA/releases/download/v1.5.1/abPOA-v1.5.1_arm64-macos.tar.gz
-tar -zxvf abPOA-v1.5.1_arm64-macos.tar.gz
+wget https://github.com/yangao07/abPOA/releases/download/v1.5.2/abPOA-v1.5.2_arm64-macos.tar.gz
+tar -zxvf abPOA-v1.5.2_arm64-macos.tar.gz
 ```
 
 ## General usage
diff --git a/setup.py b/setup.py
index f8c54f8..c6fc019 100644
--- a/setup.py
+++ b/setup.py
@@ -63,7 +63,7 @@
     description = "pyabpoa: SIMD-based partial order alignment using adaptive band",
     long_description = long_description,
     long_description_content_type="text/markdown",
-    version = "1.5.1",
+    version = "1.5.2",
     url = "https://github.com/yangao07/abPOA",
     author = "Yan Gao",
     author_email = "gaoy1@chop.edu",
diff --git a/src/abpoa.c b/src/abpoa.c
index 9c3a09e..9a33e40 100644
--- a/src/abpoa.c
+++ b/src/abpoa.c
@@ -16,7 +16,7 @@ char PROG[20] = "abpoa";
 #define _bO BOLD UNDERLINE "O" NONE
 #define _bA BOLD UNDERLINE "A" NONE
 char DESCRIPTION[100] = _ba "daptive " _bb "anded " _bP "artial " _bO "rder " _bA "lignment";
-char VERSION[20] = "1.5.1";
+char VERSION[20] = "1.5.2";
 char CONTACT[30] = "gaoy1@chop.edu";
 
 const struct option abpoa_long_opt [] = {

From 64cda9e264d9ed67ca4c3d20efae417040e85fa5 Mon Sep 17 00:00:00 2001
From: Yan Gao <gaoy1@chop.edu>
Date: Thu, 25 Jul 2024 11:20:24 -0400
Subject: [PATCH 30/38] Create pypi.yml

---
 .github/workflows/pypi.yml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 .github/workflows/pypi.yml

diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml
new file mode 100644
index 0000000..ae4795c
--- /dev/null
+++ b/.github/workflows/pypi.yml
@@ -0,0 +1,14 @@
+jobs:
+  pypi-publish:
+    name: Upload release to PyPI
+    runs-on: ubuntu-latest
+    environment: release
+      name: pypi
+      url: https://pypi.org/project/pyabpoa/
+    permissions:
+      id-token: write  # IMPORTANT: this permission is mandatory for trusted publishing
+    steps:
+    # retrieve your distributions here
+
+    - name: Publish package distributions to PyPI
+      uses: pypa/gh-action-pypi-publish@release/v1

From 155da607c5220f5da860153099ba1c49a198613a Mon Sep 17 00:00:00 2001
From: Yan Gao <yangao07@outlook.com>
Date: Thu, 25 Jul 2024 11:29:39 -0400
Subject: [PATCH 31/38] update pypi.yml

---
 .github/workflows/pypi.yml | 8 ++++++--
 README.md                  | 2 +-
 setup.py                   | 2 +-
 src/abpoa.c                | 2 +-
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml
index ae4795c..e3fc56c 100644
--- a/.github/workflows/pypi.yml
+++ b/.github/workflows/pypi.yml
@@ -1,14 +1,18 @@
+name: PyPI Publish
+
+on: [push, pull_request]
+
 jobs:
   pypi-publish:
     name: Upload release to PyPI
     runs-on: ubuntu-latest
-    environment: release
+    environment:
       name: pypi
       url: https://pypi.org/project/pyabpoa/
     permissions:
       id-token: write  # IMPORTANT: this permission is mandatory for trusted publishing
     steps:
     # retrieve your distributions here
-
+    
     - name: Publish package distributions to PyPI
       uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/README.md b/README.md
index a5a3b8c..7131b63 100644
--- a/README.md
+++ b/README.md
@@ -299,7 +299,7 @@ abPOA also provides Python bindings to all the primary C APIs. Refer to [python/
 The evaluation datasets and scripts used in [abPOA paper](https://dx.doi.org/10.1093/bioinformatics/btaa963) can be found in [abPOA-v1.0.5](https://github.com/yangao07/abPOA/releases/tag/v1.0.5).
 
 ## Contact
-Yan Gao gaoy1@chop.edu
+Yan Gao yangao@ds.dfci.harvard.edu
 
 Yi Xing xingyi@chop.edu
 
diff --git a/setup.py b/setup.py
index c6fc019..f2f0608 100644
--- a/setup.py
+++ b/setup.py
@@ -66,7 +66,7 @@
     version = "1.5.2",
     url = "https://github.com/yangao07/abPOA",
     author = "Yan Gao",
-    author_email = "gaoy1@chop.edu",
+    author_email = "yangao@ds.dfci.harvard.edu",
     license = "MIT",
     keywords = "multiple-sequence-alignment  partial-order-graph-alignment",
     setup_requires=["cython<3"], # see https://github.com/cython/cython/issues/5568
diff --git a/src/abpoa.c b/src/abpoa.c
index 9a33e40..b0b3cd9 100644
--- a/src/abpoa.c
+++ b/src/abpoa.c
@@ -17,7 +17,7 @@ char PROG[20] = "abpoa";
 #define _bA BOLD UNDERLINE "A" NONE
 char DESCRIPTION[100] = _ba "daptive " _bb "anded " _bP "artial " _bO "rder " _bA "lignment";
 char VERSION[20] = "1.5.2";
-char CONTACT[30] = "gaoy1@chop.edu";
+char CONTACT[30] = "yangao@ds.dfci.harvard.edu";
 
 const struct option abpoa_long_opt [] = {
     { "align-mode", 1, NULL, 'm' },

From 539cb14d077ae92b12a8bee1d3cfb7737fa637f8 Mon Sep 17 00:00:00 2001
From: Yan Gao <gaoy1@chop.edu>
Date: Thu, 25 Jul 2024 11:38:31 -0400
Subject: [PATCH 32/38] Update wheels.yml

---
 .github/workflows/wheels.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index b270959..46cb499 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -79,4 +79,4 @@ jobs:
       - uses: pypa/gh-action-pypi-publish@v1.5.0
         with:
           user: __token__
-          password: ${{ secrets.pypi_password }}
+          password: ${{ secrets.PYPI_API_TOKEN }}

From e5d515b88abd47b4a395fb56bbb5d106aeebf323 Mon Sep 17 00:00:00 2001
From: Yan Gao <gaoy1@chop.edu>
Date: Thu, 25 Jul 2024 11:40:05 -0400
Subject: [PATCH 33/38] Delete .github/workflows/pypi.yml

---
 .github/workflows/pypi.yml | 18 ------------------
 1 file changed, 18 deletions(-)
 delete mode 100644 .github/workflows/pypi.yml

diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml
deleted file mode 100644
index e3fc56c..0000000
--- a/.github/workflows/pypi.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-name: PyPI Publish
-
-on: [push, pull_request]
-
-jobs:
-  pypi-publish:
-    name: Upload release to PyPI
-    runs-on: ubuntu-latest
-    environment:
-      name: pypi
-      url: https://pypi.org/project/pyabpoa/
-    permissions:
-      id-token: write  # IMPORTANT: this permission is mandatory for trusted publishing
-    steps:
-    # retrieve your distributions here
-    
-    - name: Publish package distributions to PyPI
-      uses: pypa/gh-action-pypi-publish@release/v1

From e2f4c81596fb378fddff12f5ecd45d10ee860c6b Mon Sep 17 00:00:00 2001
From: Yan Gao <yangao@ds.dfci.harvard.edu>
Date: Thu, 25 Jul 2024 11:55:19 -0400
Subject: [PATCH 34/38] Create pypi.yml

---
 .github/workflows/pypi.yml | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 .github/workflows/pypi.yml

diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml
new file mode 100644
index 0000000..1d9e7c3
--- /dev/null
+++ b/.github/workflows/pypi.yml
@@ -0,0 +1,18 @@
+name: Publish to Pypi
+
+on: [push, pull_request]
+
+jobs:
+  pypi-publish:
+    name: upload release to PyPI
+    runs-on: ubuntu-latest
+    # Specifying a GitHub environment is optional, but strongly encouraged
+    environment: release
+    permissions:
+      # IMPORTANT: this permission is mandatory for trusted publishing
+      id-token: write
+    steps:
+      # retrieve your distributions here
+
+      - name: Publish package distributions to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1

From 372568ef444c1161f9d587776459f0e04f1584c6 Mon Sep 17 00:00:00 2001
From: Yan Gao <yangao@ds.dfci.harvard.edu>
Date: Thu, 25 Jul 2024 12:10:22 -0400
Subject: [PATCH 35/38] Update wheels.yml

---
 .github/workflows/wheels.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index 46cb499..72b63b5 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -65,7 +65,7 @@ jobs:
     needs: [build_wheels, build_sdist]
     runs-on: ubuntu-latest
     # upload to PyPI on every tag starting with 'v'
-    if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
+    # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
     # alternatively, to publish when a GitHub Release is created, use the following rule:
     # if: github.event_name == 'release' && github.event.action == 'published'
     steps:

From cd3f1426e8bcbfdbe7242a4cf6c76bbdeb2e8fcc Mon Sep 17 00:00:00 2001
From: Yan Gao <yangao07@outlook.com>
Date: Thu, 25 Jul 2024 12:13:43 -0400
Subject: [PATCH 36/38] update wheels.yml

---
 .github/workflows/pypi.yml   | 18 ------------------
 .github/workflows/wheels.yml |  2 +-
 2 files changed, 1 insertion(+), 19 deletions(-)
 delete mode 100644 .github/workflows/pypi.yml

diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml
deleted file mode 100644
index 1d9e7c3..0000000
--- a/.github/workflows/pypi.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-name: Publish to Pypi
-
-on: [push, pull_request]
-
-jobs:
-  pypi-publish:
-    name: upload release to PyPI
-    runs-on: ubuntu-latest
-    # Specifying a GitHub environment is optional, but strongly encouraged
-    environment: release
-    permissions:
-      # IMPORTANT: this permission is mandatory for trusted publishing
-      id-token: write
-    steps:
-      # retrieve your distributions here
-
-      - name: Publish package distributions to PyPI
-        uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index 72b63b5..46cb499 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -65,7 +65,7 @@ jobs:
     needs: [build_wheels, build_sdist]
     runs-on: ubuntu-latest
     # upload to PyPI on every tag starting with 'v'
-    # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
+    if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
     # alternatively, to publish when a GitHub Release is created, use the following rule:
     # if: github.event_name == 'release' && github.event.action == 'published'
     steps:

From 8784b065febd36ef9c983faf1e5ae78b4d8e23ca Mon Sep 17 00:00:00 2001
From: Yan Gao <yangao@ds.dfci.harvard.edu>
Date: Thu, 29 Aug 2024 15:35:48 -0400
Subject: [PATCH 37/38] add cons_algrm in pyabpoa

---
 .github/workflows/{wheels.yml => pypi.yml} | 55 +++++++++++-----------
 include/abpoa.h                            |  2 +-
 python/README.md                           |  1 +
 python/cabpoa.pxd                          |  3 +-
 python/pyabpoa.pyx                         | 12 ++++-
 src/abpoa.c                                |  2 +-
 src/abpoa.h                                |  2 +-
 src/abpoa_align.c                          |  4 +-
 src/abpoa_graph.c                          |  4 +-
 9 files changed, 47 insertions(+), 38 deletions(-)
 rename .github/workflows/{wheels.yml => pypi.yml} (59%)

diff --git a/.github/workflows/wheels.yml b/.github/workflows/pypi.yml
similarity index 59%
rename from .github/workflows/wheels.yml
rename to .github/workflows/pypi.yml
index 46cb499..e7ee104 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/pypi.yml
@@ -16,35 +16,35 @@ env:
   CIBW_SKIP: pp*
 
 jobs:
-  build_wheels:
-    name: Build wheels for ${{ matrix.python }}-${{ matrix.buildplat[1] }}
-    runs-on: ${{ matrix.buildplat[0] }}
-    strategy:
-      # Ensure that a wheel builder finishes even if another fails
-      fail-fast: false
-      matrix:
-        buildplat:
-        - [ubuntu-latest, manylinux_x86_64, auto]
-        - [macos-latest, macosx_x86_64, x86_64]
-        # skip these for now, need more work
-        - [macos-latest, macosx_arm64, arm64]
-        # python: ["cp38", "cp39", "cp310", "cp311", "cp312"]
-        python: ["cp312"]
+  # build_wheels:
+  #   name: Build wheels for ${{ matrix.python }}-${{ matrix.buildplat[1] }}
+  #   runs-on: ${{ matrix.buildplat[0] }}
+  #   strategy:
+  #     # Ensure that a wheel builder finishes even if another fails
+  #     fail-fast: false
+  #     matrix:
+  #       buildplat:
+  #       - [ubuntu-latest, manylinux_x86_64, auto]
+  #       - [macos-latest, macosx_x86_64, x86_64]
+  #       # skip these for now, need more work
+  #       - [macos-latest, macosx_arm64, arm64]
+  #       # python: ["cp38", "cp39", "cp310", "cp311", "cp312"]
+  #       python: ["cp312"]
 
-    steps:
-      - uses: actions/checkout@v3
-        with:
-            submodules: 'true'
+  #   steps:
+  #     - uses: actions/checkout@v3
+  #       with:
+  #           submodules: 'true'
 
-      - name: Build wheels
-        uses: pypa/cibuildwheel@v2.15.0
-        env:
-          CIBW_ARCHS: ${{ matrix.buildplat[2] }}
-          CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }}
+  #     - name: Build wheels
+  #       uses: pypa/cibuildwheel@v2.15.0
+  #       env:
+  #         CIBW_ARCHS: ${{ matrix.buildplat[2] }}
+  #         CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }}
 
-      - uses: actions/upload-artifact@v3
-        with:
-          path: ./wheelhouse/*.whl
+  #     - uses: actions/upload-artifact@v3
+  #       with:
+  #         path: ./wheelhouse/*.whl
 
   build_sdist:
     name: Build source distribution
@@ -62,7 +62,8 @@ jobs:
           path: dist/*.tar.gz
 
   upload_pypi:
-    needs: [build_wheels, build_sdist]
+    # needs: [build_wheels, build_sdist]
+    needs: [build_sdist]
     runs-on: ubuntu-latest
     # upload to PyPI on every tag starting with 'v'
     if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
diff --git a/include/abpoa.h b/include/abpoa.h
index 693a5bc..255da17 100644
--- a/include/abpoa.h
+++ b/include/abpoa.h
@@ -36,7 +36,7 @@
 #define ABPOA_OUT_CONS_FQ  5
 
 #define ABPOA_HB 0
-#define ABPOA_MC 1
+#define ABPOA_MF 1
 
 #define ABPOA_NONE_VERBOSE 0
 #define ABPOA_INFO_VERBOSE 1
diff --git a/python/README.md b/python/README.md
index b49616a..fd2c441 100644
--- a/python/README.md
+++ b/python/README.md
@@ -63,6 +63,7 @@ This constructs a multiple sequence alignment handler of pyabpoa, it accepts the
 * **gap_ext2**: second gap extension penalty; default: **1**
 * **extra_b**: first adaptive banding paremeter; set as < 0 to disable adaptive banded DP; default: **10**
 * **extra_f**: second adaptive banding paremete; the number of extra bases added on both sites of the band is *b+f\*L*, where *L* is the length of the aligned sequence; default : **0.01**
+* **cons_algrm**: consensus calling algorithm. 'HB': heaviest bunlding, 'MF': most frequent bases; default: **'HB'**
 
 The `msa_aligner` handler provides one method which performs multiple sequence alignment and takes four arguments:
 ```
diff --git a/python/cabpoa.pxd b/python/cabpoa.pxd
index f5eb558..702b1cd 100644
--- a/python/cabpoa.pxd
+++ b/python/cabpoa.pxd
@@ -35,7 +35,6 @@ cdef extern from "abpoa.h":
     cdef int ABPOA_OUT_CONS_GFA "ABPOA_OUT_CONS_GFA"
 
     cdef int ABPOA_HB "ABPOA_HB"
-    cdef int ABPOA_HC "ABPOA_HC"
     cdef int ABPOA_MF "ABPOA_MF"
 
     ctypedef struct abpoa_res_t:
@@ -63,7 +62,7 @@ cdef extern from "abpoa.h":
         uint8_t use_qv, disable_seeding, progressive_poa
         char *incr_fn
         char *out_pog
-        int align_mode, gap_mode, max_n_cons
+        int align_mode, gap_mode, max_n_cons, cons_algrm
         double min_freq # for diploid data
         int verbose
 
diff --git a/python/pyabpoa.pyx b/python/pyabpoa.pyx
index d344e3f..0f4b341 100644
--- a/python/pyabpoa.pyx
+++ b/python/pyabpoa.pyx
@@ -90,8 +90,10 @@ cdef class msa_aligner:
     cdef abpoa_para_t abpt
     cdef seq2int_dict, int2seq_dict
 
-    def __cinit__(self, aln_mode='g', is_aa=False, match=2, mismatch=4, score_matrix=b'', gap_open1=4, gap_open2=24, gap_ext1=2, gap_ext2=1,
-            extra_b=10, extra_f=0.01):
+    def __cinit__(self, aln_mode='g', is_aa=False,
+                  match=2, mismatch=4, score_matrix=b'', gap_open1=4, gap_open2=24, gap_ext1=2, gap_ext2=1,
+                  extra_b=10, extra_f=0.01,
+                  cons_algrm='HB'):
         self.ab = abpoa_init()
 
         if aln_mode == 'g':
@@ -132,6 +134,12 @@ cdef class msa_aligner:
         self.abpt.zdrop = -1
         self.abpt.disable_seeding = 1
         self.abpt.progressive_poa = 0
+        if cons_algrm.upper() == 'MF':
+            self.abpt.cons_algrm = ABPOA_MF
+        elif cons_algrm.upper() == 'HB':
+            self.abpt.cons_algrm = ABPOA_HB
+        else:
+            raise Exception('Unknown conseneus calling mode: {}'.format(cons_algrm))
 
         self.seq2int_dict, self.int2seq_dict = set_seq_int_dict(self.abpt.m)
 
diff --git a/src/abpoa.c b/src/abpoa.c
index b0b3cd9..a18895f 100644
--- a/src/abpoa.c
+++ b/src/abpoa.c
@@ -121,7 +121,7 @@ int abpoa_usage(void)
     err_printf("                            - %d: consensus in FASTQ format\n", ABPOA_OUT_CONS_FQ);
     err_printf("    -a --cons-algrm INT     consensus algorithm [%d]\n", ABPOA_HB);
     err_printf("                            - %d: heaviest bundling path in partial order graph\n", ABPOA_HB);
-    err_printf("                            - %d: most frequent bases at each position\n", ABPOA_MC);
+    err_printf("                            - %d: most frequent bases at each position\n", ABPOA_MF);
     err_printf("    -d --maxnum-cons INT    max. number of consensus sequence to generate [1]\n");
     err_printf("    -q --min-freq  FLOAT    min. frequency of each consensus sequence (only effective when -d/--num-cons > 1) [%.2f]\n", MULTIP_MIN_FREQ);
     err_printf("    -g --out-pog    FILE    dump final alignment graph to FILE (.pdf/.png) [Null]\n\n");
diff --git a/src/abpoa.h b/src/abpoa.h
index 693a5bc..255da17 100644
--- a/src/abpoa.h
+++ b/src/abpoa.h
@@ -36,7 +36,7 @@
 #define ABPOA_OUT_CONS_FQ  5
 
 #define ABPOA_HB 0
-#define ABPOA_MC 1
+#define ABPOA_MF 1
 
 #define ABPOA_NONE_VERBOSE 0
 #define ABPOA_INFO_VERBOSE 1
diff --git a/src/abpoa_align.c b/src/abpoa_align.c
index cd427d5..c573fab 100644
--- a/src/abpoa_align.c
+++ b/src/abpoa_align.c
@@ -143,10 +143,10 @@ abpoa_para_t *abpoa_init_para(void) {
 
 void abpoa_post_set_para(abpoa_para_t *abpt) {
     abpoa_set_gap_mode(abpt);
-    if (abpt->out_msa || abpt->out_gfa || abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MC) {
+    if (abpt->out_msa || abpt->out_gfa || abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MF) {
         abpt->use_read_ids = 1;
         if (abpt->out_msa || abpt->out_gfa) set_65536_table();
-        if (abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MC) set_bit_table16();
+        if (abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MF) set_bit_table16();
     }
     if (abpt->align_mode == ABPOA_LOCAL_MODE) abpt->wb = -1;
     int i;
diff --git a/src/abpoa_graph.c b/src/abpoa_graph.c
index e05ba04..0dd34e0 100644
--- a/src/abpoa_graph.c
+++ b/src/abpoa_graph.c
@@ -287,7 +287,7 @@ void abpoa_topological_sort(abpoa_graph_t *abg, abpoa_para_t *abpt) {
         // fprintf(stderr, "node_n: %d, index_rank_m: %d\n", node_n, abg->index_rank_m);
         abg->index_to_node_id = (int*)_err_realloc(abg->index_to_node_id, abg->index_rank_m * sizeof(int));
         abg->node_id_to_index = (int*)_err_realloc(abg->node_id_to_index, abg->index_rank_m * sizeof(int));
-        if (abpt->out_msa || abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MC) 
+        if (abpt->out_msa || abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MF) 
             abg->node_id_to_msa_rank = (int*)_err_realloc(abg->node_id_to_msa_rank, abg->index_rank_m * sizeof(int));
         if (abpt->wb >= 0) {
             abg->node_id_to_max_pos_left = (int*)_err_realloc(abg->node_id_to_max_pos_left, abg->index_rank_m * sizeof(int));
@@ -699,7 +699,7 @@ void abpoa_reset(abpoa_t *ab, abpoa_para_t *abpt, int qlen) {
         abg->node_m = abg->index_rank_m = node_m;
         abg->index_to_node_id = (int*)_err_realloc(abg->index_to_node_id, node_m * sizeof(int));
         abg->node_id_to_index = (int*)_err_realloc(abg->node_id_to_index, node_m * sizeof(int));
-        if (abpt->out_msa || abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MC) 
+        if (abpt->out_msa || abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MF) 
             abg->node_id_to_msa_rank = (int*)_err_realloc(abg->node_id_to_msa_rank, node_m * sizeof(int));
         if (abpt->wb >= 0) {
             abg->node_id_to_max_pos_left = (int*)_err_realloc(abg->node_id_to_max_pos_left, node_m * sizeof(int));

From 680f98ef9eca7b61f3fd7f006c1ade74d2abb998 Mon Sep 17 00:00:00 2001
From: Yan Gao <yangao07@outlook.com>
Date: Thu, 12 Sep 2024 22:08:17 -0400
Subject: [PATCH 38/38] fix score_mat_fn in pyabpoa

---
 python/pyabpoa.pyx | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/python/pyabpoa.pyx b/python/pyabpoa.pyx
index 0f4b341..755edbb 100644
--- a/python/pyabpoa.pyx
+++ b/python/pyabpoa.pyx
@@ -4,6 +4,7 @@ from libc.stdint cimport uint8_t
 from collections import defaultdict as dd
 cimport cython
 from cabpoa cimport *
+from libc.string cimport strcpy
 
 
 cdef class msa_result:
@@ -89,6 +90,8 @@ cdef class msa_aligner:
     cdef abpoa_t *ab
     cdef abpoa_para_t abpt
     cdef seq2int_dict, int2seq_dict
+    cdef char* _score_mat_fn
+    cdef bytes _score_mat_fn_b
 
     def __cinit__(self, aln_mode='g', is_aa=False,
                   match=2, mismatch=4, score_matrix=b'', gap_open1=4, gap_open2=24, gap_ext1=2, gap_ext2=1,
@@ -114,12 +117,11 @@ cdef class msa_aligner:
         self.abpt.mismatch = mismatch
 
         if score_matrix:
-            if isinstance(score_matrix, str): 
-                score_matrix = bytes(score_matrix, 'utf-8')
-            if os.path.exists(score_matrix.decode('utf-8')):
-                abpoa_set_mat_from_file(&self.abpt, score_matrix)
-            else:
-                raise Exception('Matrix file not exist: {}'.format(score_matrix.decode('utf-8')))
+            self.abpt.use_score_matrix = 1
+            _score_mat_fn_b = score_matrix.encode('utf-8')
+            _score_mat_fn = <char*> malloc(len(_score_mat_fn_b) + 1)
+            strcpy(_score_mat_fn, _score_mat_fn_b)
+            self.abpt.mat_fn = _score_mat_fn
 
         self.abpt.gap_open1 = gap_open1
         self.abpt.gap_open2 = gap_open2