Skip to content

Commit

Permalink
Merge pull request #28 from chungongyu/dev-c++11
Browse files Browse the repository at this point in the history
refactor: add comment support to fasta&fastq file format
  • Loading branch information
chungongyu committed Jul 16, 2020
2 parents 7f20688 + 540b89f commit 9814836
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 17 deletions.
45 changes: 31 additions & 14 deletions src/kseq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,24 @@ std::string make_reverse_complement_dna_copy(const std::string& sequence) {
return complement;
}

void make_seq_name(std::string& name, std::string& comment) {
size_t i = name.find_first_of(" \t");
if (i != std::string::npos) {
comment = name.substr(i + 1);
name.resize(i);
} else {
comment.clear();
}
}

DNASeq::DNASeq(const std::string& name, const std::string& seq) : name(name), seq(seq) {
make_seq_name(this->name, this->comment);
}

DNASeq::DNASeq(const std::string& name, const std::string& seq, const std::string& quality) : name(name), seq(seq), quality(quality) {
make_seq_name(this->name, this->comment);
}

void DNASeq::make_complement() {
make_complement_dna(seq);
}
Expand All @@ -85,10 +103,18 @@ void DNASeq::make_reverse_complement() {

std::ostream& operator << (std::ostream& os, const DNASeq& seq) {
if (seq.quality.empty()) {
os << '>' << seq.name << '\n';
os << '>' << seq.name;
if (!seq.comment.empty()) {
os << ' ' << seq.comment;
}
os << '\n';
os << seq.seq << '\n';
} else {
os << '@' << seq.name << '\n';
os << '@' << seq.name;
if (!seq.comment.empty()) {
os << ' ' << seq.comment;
}
os << '\n';
os << seq.seq << '\n';
os << '+' << '\n';
os << seq.quality << '\n';
Expand Down Expand Up @@ -145,10 +171,7 @@ bool FASTQReader::read(DNASeq& sequence) {
if (buf.length() == sequence.seq.length()) {
sequence.quality = buf;
// name
size_t i = sequence.name.find_first_of(" \t");
if (i != std::string::npos) {
sequence.name.resize(i);
}
make_seq_name(sequence.name, sequence.comment);
return true;
} else {
LOG4CXX_WARN(logger, boost::format("fastq=>length of sequence and quality are not equal: %s") % buf);
Expand All @@ -172,11 +195,8 @@ bool FASTAReader::read(DNASeq& sequence) {
if (boost::algorithm::starts_with(line, ">")) {
if (!seq.empty() && !_name.empty()) {
// name
size_t i = _name.find_first_of(" \t");
if (i != std::string::npos) {
_name.resize(i);
}
sequence.name = _name;
make_seq_name(sequence.name, sequence.comment);
sequence.seq = seq;
_name = line.substr(1);
return true;
Expand All @@ -193,11 +213,8 @@ bool FASTAReader::read(DNASeq& sequence) {
// the last one
if (!seq.empty() && !_name.empty()) {
// name
size_t i = _name.find_first_of(" \t");
if (i != std::string::npos) {
_name.resize(i);
}
sequence.name = _name;
make_seq_name(sequence.name, sequence.comment);
sequence.seq = seq;
return true;
}
Expand Down
5 changes: 3 additions & 2 deletions src/kseq.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,14 @@ std::string make_reverse_complement_dna_copy(const std::string& dna);
class DNASeq {
public:
DNASeq() {}
DNASeq(const std::string& name, const std::string& seq) : name(name), seq(seq) {}
DNASeq(const std::string& name, const std::string& seq, const std::string& quality) : name(name), seq(seq), quality(quality) {}
DNASeq(const std::string& name, const std::string& seq);
DNASeq(const std::string& name, const std::string& seq, const std::string& quality);
virtual ~DNASeq() {}

std::string name;
std::string seq;
std::string quality;
std::string comment;

int score(size_t i) const {
if (!quality.empty()) {
Expand Down
13 changes: 12 additions & 1 deletion test/preprocess_test.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
#define BOOST_TEST_MODULE "siga.preprocess"
#include <boost/test/included/unit_test.hpp>

#include <iostream>
#include <sstream>

#include <boost/format.hpp>

#include "kseq.h"
#include "reads.h"
#include "primer_screen.h"
Expand All @@ -25,14 +28,18 @@ BOOST_AUTO_TEST_CASE(PrimerScreen_contains) {
}

BOOST_AUTO_TEST_CASE(KSeq_transform) {
DNASeq seq("test", "ACGTGAC");
DNASeq seq("test BX:Z:ACGT", "ACGTGAC");
BOOST_CHECK_EQUAL(seq.name, "test");
BOOST_CHECK_EQUAL(seq.comment, "BX:Z:ACGT");
BOOST_CHECK_EQUAL(seq.seq, "ACGTGAC");
std::cout << boost::format("%1%::KSeq_transform name: %2%") % BOOST_TEST_MODULE % seq;
BOOST_CHECK(seq.quality.empty());
seq.make_reverse();
BOOST_CHECK_EQUAL(seq.seq, "CAGTGCA");
std::cout << boost::format("%1%::KSeq_transform name: %2%") % BOOST_TEST_MODULE % seq;
seq.make_complement();
BOOST_CHECK_EQUAL(seq.seq, "GTCACGT");
std::cout << boost::format("%1%::KSeq_transform name: %2%") % BOOST_TEST_MODULE % seq;
}

BOOST_AUTO_TEST_CASE(KSeq_read) {
Expand All @@ -42,6 +49,10 @@ BOOST_AUTO_TEST_CASE(KSeq_read) {
DNASeqList sequences;
BOOST_CHECK(ReadDNASequences(stream, sequences));
BOOST_CHECK_EQUAL(sequences.size(), 1);

for (const auto& seq: sequences) {
std::cout << boost::format("%1%::KSeq_read name: %2%") % BOOST_TEST_MODULE % seq;
}
}
// FASTQ
}
Expand Down

0 comments on commit 9814836

Please sign in to comment.