Skip to content

Commit

Permalink
Merge pull request #36 from chungongyu/dev
Browse files Browse the repository at this point in the history
feat: 10x genomics
  • Loading branch information
chungongyu committed May 4, 2021
2 parents a98f744 + 2d48762 commit c6b2a09
Show file tree
Hide file tree
Showing 55 changed files with 6,064 additions and 5,796 deletions.
10 changes: 5 additions & 5 deletions benchmark/contigs_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,11 @@ def contig_find(ref_tbl, mapping_tbl, contig):
contig_number += 1
contig_total_length += len(contig)
length_list.append(len(contig))
if contig_find(ref, mapping_tbl, contig) or contig_find(ref, mapping_tbl, reverse_complement(contig)):
matched_contig += 1
else:
unmatched_file.write('>' + name + '\n')
unmatched_file.write(contig + '\n')
#if contig_find(ref, mapping_tbl, contig) or contig_find(ref, mapping_tbl, reverse_complement(contig)):
# matched_contig += 1
#else:
# unmatched_file.write('>' + name + '\n')
# unmatched_file.write(contig + '\n')
print "contig_number: %d"%contig_number
print "matched_contig: %d"%matched_contig
print "unmatched_contig: %d"%(contig_number - matched_contig)
Expand Down
18 changes: 10 additions & 8 deletions benchmark/graphviz.awk
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,20 @@ BEGIN {
printf("digraph {\n");
if (red_file != "") {
while (getline<red_file) {
gsub("[-/\\.|]", "_", $1);
gsub("[-/\\.|:]", "_", $1);
red_nodes[$1] = 1;
#printf("%s[style=filled,color=red];\n", $1);
}
}
if (red_node != "") {
gsub("[-/\\.|]", "_", red_node);
gsub("[-/\\.|:]", "_", red_node);
red_nodes[red_node] = 1;
#printf("%s[style=filled,color=red];\n", red_node);
}
if (mapping_file != "") {
while(getline<mapping_file) {
gsub("[-/\\.|]", "_", $1);
gsub("[-/\\.|]", "_", $2);
gsub("[-/\\.|:]", "_", $1);
gsub("[-/\\.|:]", "_", $2);
if ($2 != -1) {
#mapping_nodes[$1] = $2"\t"$3;
mapping_nodes[$1"\t"$2] = $3;
Expand All @@ -33,7 +33,7 @@ BEGIN {
}
} {
if ($1 == "VT") {
gsub("[-/\\.|]", "_", $2);
gsub("[-/\\.|:]", "_", $2);
LEN_TBL[$2] = length($3);
for (i in ref_tbl) {
pos = index(ref_tbl[i], $3);
Expand All @@ -52,12 +52,14 @@ BEGIN {
}
if ($2 in red_nodes) {
printf("%s_%d_%s[style=filled,color=red];\n", $2, length($3), POS_TBL[$2]);
} else if (length($3)>=1000) {
printf("%s_%d_%s[style=filled,color=green];\n", $2, length($3), POS_TBL[$2]);
} else {
printf("%s_%d_%s;\n", $2, length($3), POS_TBL[$2]);
}
} else if ($1=="ED" && $5 - $4 + 1 >= minOverlap) {
gsub("[-/\\.|]", "_", $2);
gsub("[-/\\.|]", "_", $3);
gsub("[-/\\.|:]", "_", $2);
gsub("[-/\\.|:]", "_", $3);
EDGE_NODE[$2] = 1;
EDGE_NODE[$3] = 1;
color="";
Expand All @@ -84,7 +86,7 @@ BEGIN {
} END {
for (i in LEN_TBL) {
if (!(i in EDGE_NODE)) {
printf("%s_%d;\n", i, LEN_TBL[i]);
printf("%s_%d_%s;\n", i, LEN_TBL[i], POS_TBL[i]);
}
}
printf("}\n");
Expand Down
12 changes: 11 additions & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,21 @@ AX_CXX_COMPILE_STDCXX_11([noext],[mandatory])

PKG_CHECK_MODULES([RAPIDJSON], [RapidJSON >= 1.0.0])
PKG_CHECK_MODULES([LOG4CXX], [liblog4cxx >= 0.10.0])
AC_ARG_WITH([tcmalloc], AS_HELP_STRING([--with-tcmalloc], [Build with the `TCMalloc`]))
AC_ARG_WITH([tcmalloc], AS_HELP_STRING([--with-tcmalloc], [Build with the `TCMalloc` (default=no)]))
AS_IF([test "x${with_tcmalloc}" = "xyes"], [
PKG_CHECK_MODULES([TCMALLOC], [libtcmalloc >= 2.1])
])

AC_ARG_ENABLE([gprof], [AS_HELP_STRING([--enable-gprof], [enable internal support for `gprof` (default=no)])],
[case "${enableval}" in
yes)
CFLAGS="$CFLAGS -pg"
CXXFLAGS="$CXXFLAGS -pg"
;;
no) ;;
*) AC_MSG_ERROR(bad value ${enableval} for --enable-gprof);;
esac])

# Checks for libraries.
# FIXME: Replace `main' with a function in `-lpthread':
AC_CHECK_LIB([pthread], [main])
Expand Down
197 changes: 100 additions & 97 deletions src/alphabet.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,113 +6,116 @@

#include <algorithm>
#include <iterator>
#include <utility>

namespace DNAAlphabet {
const size_t ALL_SIZE = 5;
const char DNA_ALL[ALL_SIZE] = {'$', 'A', 'C', 'G', 'T'};

static const size_t size = 4;
static const char DNA[size] = {'A', 'C', 'G', 'T'};
const size_t ALL_SIZE = 5;
const char DNA_ALL[ALL_SIZE] = {'$', 'A', 'C', 'G', 'T'};

inline int torank(char c) {
static int RANK_ALL[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,1,0,2,0,0,0,3,0,0,0,0,0,0,0,0,
0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
};
return RANK_ALL[(size_t)c];
}
static const size_t size = 4;
static const char DNA[size] = {'A', 'C', 'G', 'T'};

inline char tochar(size_t rank) {
return DNA_ALL[rank];
}
inline int torank(char c) {
static int RANK_ALL[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,1,0,2,0,0,0,3,0,0,0,0,0,0,0,0,
0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
};
return RANK_ALL[(size_t)c];
}

template <class Storage>
class AlphaCount {
public:
AlphaCount() {
memset(_data, 0, sizeof(_data));
}
inline char tochar(size_t rank) {
return DNA_ALL[rank];
}

Storage& operator[](size_t i) {
return _data[i];
}
const Storage& operator[](size_t i) const {
return _data[i];
}
size_t size() const {
return ALL_SIZE;
}
bool hasDNA() const {
for (size_t i = 1; i < size(); ++i) {
if (_data[i] > 0) {
return true;
}
}
return false;
}
void complement() {
std::swap(_data[torank('A')], _data[torank('T')]);
std::swap(_data[torank('C')], _data[torank('G')]);
}
AlphaCount<Storage> operator-(const AlphaCount<Storage>& c) const {
AlphaCount<Storage> r;
for (size_t i = 0; i < size(); ++i) {
r._data[i] = _data[i] - c._data[i];
}
return r;
}
AlphaCount<Storage> operator+(const AlphaCount<Storage>& c) const {
AlphaCount<Storage> r;
for (size_t i = 0; i < size(); ++i) {
r._data[i] = _data[i] + c._data[i];
}
return r;
}
AlphaCount<Storage>& operator-=(const AlphaCount<Storage>& c) {
for (size_t i = 0; i < size(); ++i) {
_data[i] -= c._data[i];
}
return *this;
}
AlphaCount<Storage>& operator+=(const AlphaCount<Storage>& c) {
for (size_t i = 0; i < size(); ++i) {
_data[i] += c._data[i];
}
return *this;
}
private:
template <class T>
friend std::ostream& operator<<(std::ostream& stream, const AlphaCount<T>& c);
template <class T>
friend std::istream& operator>>(std::istream& stream, AlphaCount<T>& c);
template <class Storage>
class AlphaCount {
public:
AlphaCount() {
memset(_data, 0, sizeof(_data));
}

Storage _data[ALL_SIZE];
};
Storage& operator[](size_t i) {
return _data[i];
}
const Storage& operator[](size_t i) const {
return _data[i];
}
size_t size() const {
return ALL_SIZE;
}
bool hasDNA() const {
for (size_t i = 1; i < size(); ++i) {
if (_data[i] > 0) {
return true;
}
}
return false;
}
void complement() {
std::swap(_data[torank('A')], _data[torank('T')]);
std::swap(_data[torank('C')], _data[torank('G')]);
}
AlphaCount<Storage> operator-(const AlphaCount<Storage>& c) const {
AlphaCount<Storage> r;
for (size_t i = 0; i < size(); ++i) {
r._data[i] = _data[i] - c._data[i];
}
return r;
}
AlphaCount<Storage> operator+(const AlphaCount<Storage>& c) const {
AlphaCount<Storage> r;
for (size_t i = 0; i < size(); ++i) {
r._data[i] = _data[i] + c._data[i];
}
return r;
}
AlphaCount<Storage>& operator-=(const AlphaCount<Storage>& c) {
for (size_t i = 0; i < size(); ++i) {
_data[i] -= c._data[i];
}
return *this;
}
AlphaCount<Storage>& operator+=(const AlphaCount<Storage>& c) {
for (size_t i = 0; i < size(); ++i) {
_data[i] += c._data[i];
}
return *this;
}

typedef AlphaCount<uint64_t> AlphaCount64;
typedef AlphaCount<uint32_t> AlphaCount32;
typedef AlphaCount<uint16_t> AlphaCount16;
private:
template <class T>
friend std::ostream& operator<<(std::ostream& stream, const AlphaCount<T>& c);
template <class T>
friend std::istream& operator>>(std::istream& stream, AlphaCount<T>& c);

template <class T>
std::ostream& operator<<(std::ostream& stream, const AlphaCount<T>& c) {
std::copy(c._data, c._data + c.size(), std::ostream_iterator<T>(stream, " "));
return stream;
}
Storage _data[ALL_SIZE];
};

#endif // alphabet_h_
typedef AlphaCount<uint64_t> AlphaCount64;
typedef AlphaCount<uint32_t> AlphaCount32;
typedef AlphaCount<uint16_t> AlphaCount16;

template <class T>
std::ostream& operator<<(std::ostream& stream, const AlphaCount<T>& c) {
std::copy(c._data, c._data + c.size(), std::ostream_iterator<T>(stream, " "));
return stream;
}

}; // namespace DNAAlphabet

#endif // alphabet_h_
Loading

0 comments on commit c6b2a09

Please sign in to comment.