Skip to content

Commit

Permalink
feat: barcode support to the string graph
Browse files Browse the repository at this point in the history
  • Loading branch information
chungongyu committed Nov 9, 2020
1 parent f95c315 commit 2c5a8d1
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 6 deletions.
6 changes: 4 additions & 2 deletions src/assembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ class Assembler : public Runner {
delta = sigma;
}
}
PairedReadVisitor prVisit(options.get<size_t>("max-distance", 100), average, delta, options.get<size_t>("max-search-nodes", 100), options.get<size_t>("threads", 1), options.get<size_t>("batch-size", 1000));
PairedReadVisitor prVisit(options.get<size_t>("max-distance", 100), average, delta, options.get<size_t>("max-search-nodes", 100), options.find("with-index") != options.not_found(), options.get<size_t>("threads", 1), options.get<size_t>("batch-size", 1000));
g.visit(&prVisit);
} else {
LOG4CXX_INFO(logger, "Removing contained vertices from graph");
Expand Down Expand Up @@ -235,6 +235,7 @@ class Assembler : public Runner {
"Paired reads parameters:\n"
" --pe-mode=INT 0 - do not treat reads as paired (default)\n"
" 1 - treat reads as paired\n"
" --with-index treat as 10x linked read data\n"
" --max-distance=INT treat reads as connected whose distance is less than INT (default: 100)\n"
" --insert-size=INT treat reads as paired with insert size INT (default: learned from paired reads)\n"
" --insert-size-delta=INT treat reads as paired with insert size delta INT (default: learned from paired reads)\n"
Expand All @@ -254,7 +255,7 @@ class Assembler : public Runner {
};

static const std::string shortopts = "c:s:p:t:m:x:n:l:a:b:d:h";
enum { OPT_HELP = 1, OPT_BATCH_SIZE, OPT_PEMODE, OPT_MAXDIST, OPT_INSERTSIZE, OPT_INSERTSIZE_DELTA, OPT_MAXEDGES, OPT_INIT_VERTEX_CAPACITY };
enum { OPT_HELP = 1, OPT_BATCH_SIZE, OPT_PEMODE, OPT_WITH_IDX, OPT_MAXDIST, OPT_INSERTSIZE, OPT_INSERTSIZE_DELTA, OPT_MAXEDGES, OPT_INIT_VERTEX_CAPACITY };
static const option longopts[] = {
{"log4cxx", required_argument, NULL, 'c'},
{"ini", required_argument, NULL, 's'},
Expand All @@ -265,6 +266,7 @@ static const option longopts[] = {
{"threads", required_argument, NULL, 't'},
{"batch-size", required_argument, NULL, OPT_BATCH_SIZE},
{"pe-mode", required_argument, NULL, OPT_PEMODE},
{"with-index", no_argument, NULL, OPT_WITH_IDX},
{"max-distance", required_argument, NULL, OPT_MAXDIST},
{"insert-size", required_argument, NULL, OPT_INSERTSIZE},
{"insert-size-delta", required_argument, NULL, OPT_INSERTSIZE_DELTA},
Expand Down
20 changes: 17 additions & 3 deletions src/bigraph_visitors.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "bigraph_visitors.h"
#include "asqg.h"
#include "bigraph_search.h"
#include "kseq.h"
#include "reads.h"
Expand Down Expand Up @@ -159,6 +160,14 @@ void ContainRemoveVisitor::postvisit(Bigraph* graph) {
//
bool FastaVisitor::visit(Bigraph* graph, Vertex* vertex) {
DNASeq seq(vertex->id(), vertex->seq());
const std::string& index = vertex->index();
if (!index.empty()) {
ASQG::StringTagValue bx(index);
if (!seq.comment.empty()) {
seq.comment += ' ';
}
seq.comment += bx.tostring(ASQG::BARCODE_TAG);
}
_stream << seq;
return false;
}
Expand Down Expand Up @@ -215,7 +224,7 @@ bool LoopRemoveVisitor::visit(Bigraph* graph, Vertex* vertex) {
Edge* nextEdge = vertex->edges(Edge::ED_SENSE)[0];
Vertex* prevVert = prevEdge->end();
Vertex* nextVert = nextEdge->end();
if (prevVert == nextVert) {
if (!prevEdge->isSelf() && !nextEdge->isSelf() && prevVert == nextVert) {
//vertex->color(GC_BLACK);
_loops.push_back(vertex);
modified = true;
Expand Down Expand Up @@ -589,7 +598,7 @@ class PairedVertexProcess {
size_t numNodes[Edge::ED_COUNT] = {0}, MAX_STEPS = 3;
for (const auto& node1 : adjacents) {
size_t numIdx = node1->attr.distance >= 0 ? Edge::ED_SENSE : Edge::ED_ANTISENSE;
if (numNodes[numIdx] >= MAX_STEPS) continue;
//if (numNodes[numIdx] >= MAX_STEPS) continue;
const Vertex* paired_v2 = _graph->getVertex(PairEnd::id(node1->vertex->id()));
assert(paired_v2 != NULL);
LOG4CXX_DEBUG(logger, boost::format("vertex1: %s<->%s, vertex2: %s<->%s") % vertex1->id() % paired_v1->id() % node1->vertex->id() % paired_v2->id());
Expand All @@ -602,6 +611,11 @@ class PairedVertexProcess {
}, paired_v2, 0, std::abs(node1->attr.distance) + _visitor->_insertDelta*4, 1, &faraways);
}
for (const auto& node2 : faraways) {
if (_visitor->_withIndex) {
if (node1->vertex->index() != node2->vertex->index()) {
continue;
}
}
linklist.push_back(node1);
LOG4CXX_DEBUG(logger, boost::format("paired_read_all\t%s\t%s\t%d\t%s\t%s\t%d") % vertex1->id() % node1->vertex->id() % node1->attr.distance % paired_v1->id() % node2->vertex->id() % node2->attr.distance);
++numNodes[numIdx];
Expand Down Expand Up @@ -797,7 +811,7 @@ class PairedContainmentVisitor : public BigraphVisitor {
for (auto& n : *containment) {
// Modify inplace
n->attr.distance -= distance;
LOG4CXX_INFO(logger, boost::format("PairedContainmentVisitor::visit\t%s\t%s\t%d\t%d\t%d") % vertex->id() % n->vertex->id() % n->attr.distance % n->attr.dir % n->attr.comp);
LOG4CXX_DEBUG(logger, boost::format("PairedContainmentVisitor::visit\t%s\t%s\t%d\t%d\t%d") % vertex->id() % n->vertex->id() % n->attr.distance % n->attr.dir % n->attr.comp);
}
assert(_table.find(vertex->id()) == _table.end());
_table[vertex->id()] = containment;
Expand Down
3 changes: 2 additions & 1 deletion src/bigraph_visitors.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ class InsertSizeEstimateVisitor : public BigraphVisitor {
// Visit each paired node via zigzag, sweep false positive edges.
class PairedReadVisitor : public BigraphVisitor {
public:
PairedReadVisitor(size_t maxDistance, size_t insertSize, size_t insertDelta, size_t maxNodes, size_t threads=1, size_t batch=1000) : _maxDistance(maxDistance), _insertSize(insertSize), _insertDelta(insertDelta), _maxNodes(maxNodes), _threads(threads), _batch(batch) {
PairedReadVisitor(size_t maxDistance, size_t insertSize, size_t insertDelta, size_t maxNodes, bool withIndex, size_t threads=1, size_t batch=1000) : _maxDistance(maxDistance), _insertSize(insertSize), _insertDelta(insertDelta), _maxNodes(maxNodes), _withIndex(withIndex), _threads(threads), _batch(batch) {
}
void previsit(Bigraph* graph);
bool visit(Bigraph* graph, Vertex* vertex);
Expand All @@ -121,6 +121,7 @@ class PairedReadVisitor : public BigraphVisitor {
size_t _maxNodes;
size_t _threads;
size_t _batch;
bool _withIndex;

std::vector<const Vertex *> _vertices;
friend class PairedVertexProcess;
Expand Down

0 comments on commit 2c5a8d1

Please sign in to comment.