Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Regarding #109 #111

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
trying to add --NoMe option
  • Loading branch information
vivekbhr committed Jan 18, 2021
commit 1d60350ca6a747704cd004c866f7a638d8ca318c
7 changes: 5 additions & 2 deletions MethylDackel.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ typedef struct {
@field keepCpG 1: Output CpG metrics
@field keepCHG 1: Output CHG metrics
@field keepCHH 1: Output CHH metrics
@field keepGCH 1: Output GCH metrics
@field minMapq Minimum MAPQ value to include a read (-q)
@field minPhred Minimum Phred score to include a base (-p)
@field keepDupes 1: Include marked duplicates when calculating metrics
Expand Down Expand Up @@ -81,7 +82,7 @@ typedef struct {
@field chunkSize The number of bases processed by each thread at a time (can be adjusted a bit to ensure CpGs/CHGs aren't split between processors)
*/
typedef struct {
int keepCpG, keepCHG, keepCHH;
int keepCpG, keepCHG, keepCHH, keepGCH, NoMeCpG;
int minMapq, minPhred, keepDupes, minDepth;
int keepDiscordant, keepSingleton, ignoreFlags, requireFlags;
int merge, methylKit, minOppositeDepth;
Expand Down Expand Up @@ -135,7 +136,7 @@ typedef struct {
@abstract Determine the strand from which a read arose
@param b Pointer to an alignment
@returns 1, original top; 2, original bottom; 3, complementary to the original top; 4, complementary to the original bottom
@discussion There are two methods used to determine the strand of origin. The
@discussion There are two methods used to determine the strand of origin. The
first method uses XG auxiliary tags as output by Bison and Bismark. For details
on how strand is determined from this, see the source code for this function or
the documentation for either Bison or bismark. This method can handle non-
Expand Down Expand Up @@ -194,6 +195,8 @@ void makeTXT(strandMeth **meths);
int isCpG(char *seq, int pos, int seqlen);
int isCHG(char *seq, int pos, int seqlen);
int isCHH(char *seq, int pos, int seqlen);
int isGCH(char *seq, int pos, int seqlen);
int isNoMeCpG(char *seq, int pos, int seqlen);

/*! @function
@abstract Determine what strand an alignment originated from
Expand Down
46 changes: 42 additions & 4 deletions common.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,44 @@ inline int isCHH(char *seq, int pos, int seqlen) {
return 0;
}

// NoMe-Seq related options:
// 1. return nonZero only if the CpG also has no C/G upstream
inline int isNoMeCpG(char *seq, int pos, int seqlen) {
if(pos >= seqlen) return 0;
if(*(seq+pos) == 'C' || *(seq+pos) == 'c') {
if(pos+1 == seqlen) return 0;
// skip if upstream base is G/C
if(*(seq+pos-1) == 'G' || *(seq+pos-1) == 'g' || *(seq+pos-1) == 'C' || *(seq+pos-1) == 'c') return 0;
if(*(seq+pos+1) == 'G' || *(seq+pos+1) == 'g') return 1;
return 0;
} else if(*(seq+pos) == 'G' || *(seq+pos) == 'g') {
if(pos == 0) return 0;
// skip if upstream base is G/C
if(*(seq+pos+1) == 'G' || *(seq+pos+1) == 'g' || *(seq+pos+1) == 'C' || *(seq+pos+1) == 'c') return 0;
if(*(seq+pos-1) == 'C' || *(seq+pos-1) == 'c') return -1;
return 0;
}
return 0;
}

// 2. return nonZero if it's a GCH context
inline int isGCH(char *seq, int pos, int seqlen) {
// for NoMe-seq, we want GC-me in GCH context
if(pos >= seqlen) return 0;
if(*(seq+pos) == 'G' || *(seq+pos) == 'g') {
if(pos+2 >= seqlen) return 0;
if(*(seq+pos+2) == 'G' || *(seq+pos+2) == 'g') return 0;
if(*(seq+pos+1) == 'C' || *(seq+pos+1) != 'c') return 1;
return 0;
} else if(*(seq+pos) == 'C' || *(seq+pos) == 'c') {
if(pos <= 1) return 0;
if(*(seq+pos-2) == 'C' || *(seq+pos-2) == 'c') return 0;
if(*(seq+pos-1) == 'G' || *(seq+pos-1) == 'g') return -1;
return 0;
}
return 0;
}

int getStrand(bam1_t *b) {
char *XG = (char *) bam_aux_get(b, "XG");
//Only bismark uses the XG tag like this. Some other aligners use it for other purposes...
Expand Down Expand Up @@ -247,7 +285,7 @@ unsigned char* getMappabilityValue(Config* config, char* chrom_n, uint32_t start
{
offset++;
}

}
return data;
}
Expand All @@ -269,7 +307,7 @@ char check_mappability(void *data, bam1_t *b) {
read1_end = b->core.pos + b->core.l_qseq;
read2_start = b->core.mpos;
read2_end = b->core.mpos + b->core.l_qseq; //assuming both reads same length to avoid issues finding read2_end on right read (doing the same on the left read for consistency)

}
else //get pos for right read
{
Expand All @@ -279,7 +317,7 @@ char check_mappability(void *data, bam1_t *b) {
read1_end = b->core.mpos + b->core.l_qseq; //assuming both reads same length to avoid issues finding read2_end
}
vals = getMappabilityValue(ldata->config, ldata->hdr->target_name[b->core.tid], read1_start, read1_end+1);

for (i=0; i<=read1_end-read1_start; i++)
{
if(vals[i] > 0) //is base above threshold?
Expand All @@ -294,7 +332,7 @@ char check_mappability(void *data, bam1_t *b) {
}
free(vals);
vals = getMappabilityValue(ldata->config, ldata->hdr->target_name[b->core.tid], read2_start, read2_end+1);

num_mappable_bases = 0;
for (i=0; i<=read2_end-read2_start; i++)
{
Expand Down
Loading