Skip to content

Commit

Permalink
refactor: add 10XG index file support
Browse files Browse the repository at this point in the history
  • Loading branch information
chaunceyyu authored and chungongyu committed Jul 18, 2020
1 parent 540b89f commit 0da457d
Show file tree
Hide file tree
Showing 10 changed files with 182 additions and 58 deletions.
4 changes: 2 additions & 2 deletions src/bigraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ std::string Edge::label() const {
const std::string& seq = _end->seq();
std::string label = seq.substr(unmatched.interval.start, unmatched.length());
if (comp() == EC_REVERSE) {
make_reverse_complement_dna(label);
make_dna_reverse_complement(label);
}
return label;
}
Expand Down Expand Up @@ -60,7 +60,7 @@ void Edge::validate() const {
std::string m1 = v1.substr(_coord.interval.start, _coord.length());
std::string m2 = v2.substr(_twin->_coord.interval.start, _twin->_coord.length());
if (_comp == EC_REVERSE) {
make_reverse_complement_dna(m2);
make_dna_reverse_complement(m2);
}
if (m1 != m2) {
LOG4CXX_ERROR(logger, "Error, matching strings are not the same length");
Expand Down
2 changes: 1 addition & 1 deletion src/bigraph_search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ bool BigraphWalk::hasLink(const Vertex* v1, const Vertex* v2, int distance, Edge
assert(distance >= 0);
std::string seq1 = v1->seq(), seq2 = v2->seq();
if (comp == Edge::EC_REVERSE) {
make_reverse_complement_dna(seq2);
make_dna_reverse_complement(seq2);
}
return (
dir == Edge::ED_SENSE && distance < seq1.length() && boost::algorithm::starts_with(seq2, seq1.substr(distance))
Expand Down
2 changes: 1 addition & 1 deletion src/correct_processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ class KmerCorrector : public AbstractCorrector {
size_t bestCount = 0;
char bestBase = '$';

LOG4CXX_DEBUG(logger, boost::format("baseIdx: %d kmerIdx: %d %s %s") % baseIdx % kmerIdx % kmer % make_reverse_complement_dna_copy(kmer));
LOG4CXX_DEBUG(logger, boost::format("baseIdx: %d kmerIdx: %d %s %s") % baseIdx % kmerIdx % kmer % make_dna_reverse_complement_copy(kmer));

for (size_t i = 0; i < DNAAlphabet::size; ++i) {
char c = DNAAlphabet::DNA[i];
Expand Down
2 changes: 1 addition & 1 deletion src/kmerdistr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ size_t KmerDistribution::sample(const FMIndex* index, size_t k, size_t n, KmerDi

for (size_t j = k; j < s.length(); ++j) {
std::string w = s.substr(j - k, k);
std::string v = make_reverse_dna_copy(w);
std::string v = make_dna_reverse_copy(w);

size_t count = 0;
count += FMIndex::Interval::occurrences(w, index);
Expand Down
28 changes: 14 additions & 14 deletions src/kseq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

static log4cxx::LoggerPtr logger(log4cxx::Logger::getLogger("arcs.DNASeq"));

char make_complement_dna(char c) {
char make_dna_complement(char c) {
static std::map<char, char> mapping = {
{'A', 'T'},
{'C', 'G'},
Expand All @@ -25,7 +25,7 @@ char make_complement_dna(char c) {
return mapping[c];
}

void make_complement_dna(std::string& sequence) {
void make_dna_complement(std::string& sequence) {
static std::map<char, char> mapping = {
{'A', 'T'},
{'C', 'G'},
Expand All @@ -40,30 +40,30 @@ void make_complement_dna(std::string& sequence) {
}
}

std::string make_complement_dna_copy(const std::string& sequence) {
std::string make_dna_complement_copy(const std::string& sequence) {
std::string complement = sequence;
make_complement_dna(complement);
make_dna_complement(complement);
return complement;
}

void make_reverse_dna(std::string& sequence) {
void make_dna_reverse(std::string& sequence) {
std::reverse(sequence.begin(), sequence.end());
}

std::string make_reverse_dna_copy(const std::string& sequence) {
std::string make_dna_reverse_copy(const std::string& sequence) {
std::string reverse = sequence;
make_reverse_dna(reverse);
make_dna_reverse(reverse);
return reverse;
}

void make_reverse_complement_dna(std::string& sequence) {
make_complement_dna(sequence);
make_reverse_dna(sequence);
void make_dna_reverse_complement(std::string& sequence) {
make_dna_complement(sequence);
make_dna_reverse(sequence);
}

std::string make_reverse_complement_dna_copy(const std::string& sequence) {
std::string complement = make_complement_dna_copy(sequence);
make_reverse_dna(complement);
std::string make_dna_reverse_complement_copy(const std::string& sequence) {
std::string complement = make_dna_complement_copy(sequence);
make_dna_reverse(complement);
return complement;
}

Expand All @@ -86,7 +86,7 @@ DNASeq::DNASeq(const std::string& name, const std::string& seq, const std::strin
}

void DNASeq::make_complement() {
make_complement_dna(seq);
make_dna_complement(seq);
}

void DNASeq::make_reverse() {
Expand Down
14 changes: 7 additions & 7 deletions src/kseq.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@

#include "quality.h"

char make_complement_dna(char c);
void make_complement_dna(std::string& dna);
std::string make_complement_dna_copy(const std::string& dna);
void make_reverse_dna(std::string& dna);
std::string make_reverse_dna_copy(const std::string& dna);
void make_reverse_complement_dna(std::string& dna);
std::string make_reverse_complement_dna_copy(const std::string& dna);
char make_dna_complement(char c);
void make_dna_complement(std::string& dna);
std::string make_dna_complement_copy(const std::string& dna);
void make_dna_reverse(std::string& dna);
std::string make_dna_reverse_copy(const std::string& dna);
void make_dna_reverse_complement(std::string& dna);
std::string make_dna_reverse_complement_copy(const std::string& dna);

//
// DNASeq represents a DNA sequence.
Expand Down
2 changes: 1 addition & 1 deletion src/match.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class Match : public Runner {
}
DNASeq read;
while (reader->read(read)) {
std::cout << boost::format("%s\t%s\t%d\n") % read.name % read.seq % (FMIndex::Interval::occurrences(read.seq, &fmi) + FMIndex::Interval::occurrences(make_reverse_complement_dna_copy(read.seq), &fmi));
std::cout << boost::format("%s\t%s\t%d\n") % read.name % read.seq % (FMIndex::Interval::occurrences(read.seq, &fmi) + FMIndex::Interval::occurrences(make_dna_reverse_complement_copy(read.seq), &fmi));
}
}
} else {
Expand Down
10 changes: 5 additions & 5 deletions src/overlap_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -790,7 +790,7 @@ class IrreducibleBlockListExtractor {
assert(blocks != NULL);
auto i = blocks->begin();
while (i != blocks->end()) {
char b = i->af.test(AlignFlags::QUERYCOMP_BIT) ? make_complement_dna(c) : c;
char b = i->af.test(AlignFlags::QUERYCOMP_BIT) ? make_dna_complement(c) : c;
i->capped.updateR(b, i->index(_fmi, _rfmi));

// remove the block from the list if its no longer valid
Expand Down Expand Up @@ -1092,13 +1092,13 @@ OverlapResult OverlapBuilder::overlap(const DNASeq& read, size_t minOverlap, Ove
// Match the suffix of seq to prefixes
finder.find(seq, kSuffixPrefixAF, &suffixfwd, &containfwd, &result);
if (_rc) {
finder.find(make_reverse_complement_dna_copy(seq), kPrefixPrefixAF, &prefixfwd, &containfwd, &result);
finder.find(make_dna_reverse_complement_copy(seq), kPrefixPrefixAF, &prefixfwd, &containfwd, &result);
}

// Match the prefix of seq to suffixes
rfinder.find(make_reverse_dna_copy(seq), kPrefixSuffixAF, &prefixrev, &containrev, &result);
rfinder.find(make_dna_reverse_copy(seq), kPrefixSuffixAF, &prefixrev, &containrev, &result);
if (_rc) {
rfinder.find(make_complement_dna_copy(seq), kSuffixSuffixAF, &suffixrev, &containrev, &result);
rfinder.find(make_dna_complement_copy(seq), kSuffixSuffixAF, &suffixrev, &containrev, &result);
}

// Remove submaximal blocks for each block list including fully contained blocks
Expand Down Expand Up @@ -1158,7 +1158,7 @@ OverlapResult OverlapBuilder::duplicate(const DNASeq& read, OverlapBlockList* bl
OverlapBlockFinder finder(_fmi, _rfmi, minOverlap), rfinder(_rfmi, _fmi, minOverlap);

finder.find(seq, kSuffixPrefixAF, NULL, blocks, &result);
rfinder.find(make_complement_dna_copy(seq), kSuffixSuffixAF, NULL, blocks, &result);
rfinder.find(make_dna_complement_copy(seq), kSuffixSuffixAF, NULL, blocks, &result);

return result;
}
Expand Down
Loading

0 comments on commit 0da457d

Please sign in to comment.