Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add query to ref coord mapping #120

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
change N ref ops to false
  • Loading branch information
chilampoon committed Oct 12, 2023
commit 804fb9b7e82fa5636d0f39abb8e94f99ae7d2b7b
22 changes: 2 additions & 20 deletions src/remora/data_chunks.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@
[True, False, False, False, False, False, False, True, True]
)
QUERY_OPS = np.array([True, True, False, False, True, False, False, True, True])
REF_OPS = np.array([True, False, True, True, False, False, False, True, True])
REFCOORD_OPS = np.array([True, False, True, False, False, False, False, True, True])
REF_OPS = np.array([True, False, True, False, False, False, False, True, True])
CIGAR_CODES = ["M", "I", "D", "N", "S", "H", "P", "=", "X"]
CODE_TO_OP = {
"M": 0,
Expand Down Expand Up @@ -99,28 +98,11 @@ def make_sequence_coordinate_mapping(cigar):
query_knots = np.concatenate(
[[0], (query_knots[is_match] - offsets).T.flatten(), [query_knots[-1]]]
)
ref_coords = get_ref_coords(ops, lens)
knots = np.interp(np.concatenate([[0], ref_coords]), ref_knots, query_knots)
knots = np.interp(np.arange(ref_knots[-1] + 1), ref_knots, query_knots)

return knots


def get_ref_coords(ops, lens):
'''
Map query base to reference coordinates (1-based).
Returns
array shape (ref_len,); ref_len = len(aln.get_reference_sequences())
'''
starts = np.cumsum(np.where(REF_OPS[ops], lens, 0)) - np.where(REF_OPS[ops], lens, 0)
ranges = [
np.arange(start, start + l)
for op, start, l in zip(ops, starts, lens)
if REFCOORD_OPS[op]
]
ref_coords = np.concatenate(ranges) + 1
return ref_coords


def compute_ref_to_signal(query_to_signal, cigar):
ref_to_read_knots = make_sequence_coordinate_mapping(cigar)
assert query_to_signal.size == ref_to_read_knots[-1].astype(int) + 1, (
Expand Down