Skip to content

Commit

Permalink
Some renames, and granges_filter() work.
Browse files Browse the repository at this point in the history
 - Prototype of one branch of granges_filter()
 - Renamed GenomicRangeParsers enum to GenomicRangeParser
  • Loading branch information
vsbuffalo committed Feb 20, 2024
1 parent c151fbe commit 7a014af
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 83 deletions.
86 changes: 51 additions & 35 deletions src/commands.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::path::PathBuf;

use crate::{
io::{OutputFile, parsers::GenomicRangesParsers},
io::{OutputFile, parsers::GenomicRangesParser},
prelude::*,
ranges::operations::adjust_range,
reporting::{CommandOutput, Report},
Expand Down Expand Up @@ -68,57 +68,73 @@ pub fn granges_adjust(

let ranges_iter = GenomicRangesFile::parsing_iterator(bedfile)?;
match ranges_iter {
GenomicRangesParsers::Bed3(iter) => {
GenomicRangesParser::Bed3(iter) => {
let gr = GRangesEmpty::from_iter(iter, &genome)?;
gr.adjust_ranges(-both, both).to_tsv(output)?
},
GenomicRangesParsers::Bedlike(iter) => {
GenomicRangesParser::Bedlike(iter) => {
// Note the call to try_unwrap_data() here: this is because
// we know that the records *do* have data. Unwrapping the Option<String>
// values means that writing to TSV doesn't have to deal with this (which
// always creates headaches).
let gr = GRanges::from_iter(iter.try_unwrap_data()?, &genome)?;
gr.adjust_ranges(-both, both).to_tsv(output)?
},
GenomicRangesParsers::Unsupported => {
GenomicRangesParser::Unsupported => {
return Err(GRangesError::UnsupportedGenomicRangesFileFormat)
},

}
}
Ok(CommandOutput::new((), report))
}
//
// /// Retain only the ranges that have at least one overlap with another set of ranges.
// pub fn granges_filter<DL, DR>(
// seqlens: &PathBuf,
// left_granges: GRanges<VecRangesEmpty, DL>,
// right_granges: GRanges<VecRangesEmpty, DR>,
// sort: bool,
// ) -> Result<CommandOutput<()>, GRangesError>
// where
// // we must be able to iterate over left ranges
// VecRangesEmpty: IterableRangeContainer,
// // we must be able to convert the right GRanges to interval trees
// GRanges<VecRangesEmpty, ()>: GenomicRangesToIntervalTrees<()>,
// {
// let right_granges = right_granges.to_coitrees()?;
//
// // let intersection = left_granges.filter_overlaps_(&right_granges)?;
//
// //// output stream -- header is None for now (TODO)
// //let output_stream = output.map_or(OutputFile::new_stdout(None), |file| {
// // OutputFile::new(file, None)
// //});
// //let mut writer = output_stream.writer()?;
//
// // for reporting stuff to the user
// let mut report = Report::new();
//
// //intersection.sort().to_tsv(output)?;
//
// Ok(CommandOutput::new((), report))
// }

/// Retain only the ranges that have at least one overlap with another set of ranges.
pub fn granges_filter(
seqlens: &PathBuf,
left_path: &PathBuf,
right_path: &PathBuf,
output: Option<&PathBuf>,
sort: bool,
) -> Result<CommandOutput<()>, GRangesError> {
let genome = read_seqlens(seqlens)?;

let left_iter = GenomicRangesFile::parsing_iterator(left_path)?;
let right_iter = GenomicRangesFile::parsing_iterator(right_path)?;

let output_stream = output.as_ref().map_or(OutputFile::new_stdout(None), |file| {
OutputFile::new(file, None)
});
let mut writer = output_stream.writer()?;

// for reporting stuff to the user
let mut report = Report::new();

match (left_iter, right_iter) {
(GenomicRangesParser::Bed3(left), GenomicRangesParser::Bed3(right)) => {

let left_gr = GRangesEmpty::from_iter(left, &genome)?; let right_gr =
GRangesEmpty::from_iter(right, &genome)?;

let right_gr = right_gr.to_coitrees()?;

let intersection = left_gr.filter_overlaps(&right_gr)?;
intersection.to_tsv(output)?;

Ok(CommandOutput::new((), report))
}
(GenomicRangesParser::Bed3(left), GenomicRangesParser::Bedlike(right)) => {
Ok(CommandOutput::new((), report))
}
(GenomicRangesParser::Bedlike(left), GenomicRangesParser::Bed3(right)) => {
Ok(CommandOutput::new((), report))
}
(GenomicRangesParser::Bedlike(left), GenomicRangesParser::Bedlike(right)) => {
Ok(CommandOutput::new((), report))
}
_ => Ok(CommandOutput::new((), report)),
}
}

/// Generate a random BED-like file with genomic ranges.
pub fn granges_random_bed(
Expand Down
2 changes: 1 addition & 1 deletion src/io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ pub mod noodles;
pub mod parsers;

pub use file::{InputFile, OutputFile};
pub use parsers::{Bed3Iterator, BedlikeIterator, GenomicRangesFile, GenomicRangesParsers, TsvRecordIterator};
pub use parsers::{Bed3Iterator, BedlikeIterator, GenomicRangesFile, GenomicRangesParser, TsvRecordIterator};
8 changes: 4 additions & 4 deletions src/io/parsers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ fn valid_bedlike(input_file: &mut InputFile) -> Result<bool, GRangesError> {

/// Enum that connects a genomic ranges file type to its specific parser.
#[derive(Debug)]
pub enum GenomicRangesParsers {
pub enum GenomicRangesParser {
Bed3(Bed3Iterator),
Bedlike(BedlikeIterator),
Unsupported,
Expand Down Expand Up @@ -215,14 +215,14 @@ impl GenomicRangesFile {
///
/// This returns a [`GenomicRangesParsers`] enum, since the parsing iterator filetype
/// cannot be known at compile time.
pub fn parsing_iterator(filepath: impl Clone + Into<PathBuf>) -> Result<GenomicRangesParsers, GRangesError> {
pub fn parsing_iterator(filepath: impl Clone + Into<PathBuf>) -> Result<GenomicRangesParser, GRangesError> {
let path = filepath.into();
dbg!(&path);
match Self::detect(path)? {
GenomicRangesFile::Bed3(path) =>
Ok(GenomicRangesParsers::Bed3(Bed3Iterator::new(path)?)),
Ok(GenomicRangesParser::Bed3(Bed3Iterator::new(path)?)),
GenomicRangesFile::Bedlike(path) =>
Ok(GenomicRangesParsers::Bedlike(BedlikeIterator::new(path)?)),
Ok(GenomicRangesParser::Bedlike(BedlikeIterator::new(path)?)),
GenomicRangesFile::Unsupported => {
Err(GRangesError::UnsupportedGenomicRangesFileFormat)
}
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ pub mod prelude {
pub use crate::error::GRangesError;
pub use crate::granges::{GRanges, GRangesEmpty};
pub use crate::io::file::read_seqlens;
pub use crate::io::{Bed3Iterator, BedlikeIterator, GenomicRangesFile, TsvRecordIterator, GenomicRangesParsers};
pub use crate::io::{Bed3Iterator, BedlikeIterator, GenomicRangesFile, TsvRecordIterator, GenomicRangesParser};

pub use crate::ranges::vec::{VecRangesEmpty, VecRangesIndexed};
pub use crate::traits::{
Expand Down
44 changes: 2 additions & 42 deletions src/main/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::path::PathBuf;

use clap::{Args, Parser, Subcommand};
use granges::{
commands::{granges_adjust},
commands::{granges_adjust, granges_filter},
io::{parsers::Bed3Iterator, OutputFile},
prelude::{read_seqlens, GRanges, GRangesError, GenomicRangesFile},
reporting::{Report, CommandOutput},
Expand Down Expand Up @@ -115,47 +115,7 @@ fn run() -> Result<(), GRangesError> {
output,
sort,
}) => {
let genome = read_seqlens(seqlens)?;
let mut report = Report::new();

let left_filetype = GenomicRangesFile::detect(left)?;
let right_filetype = GenomicRangesFile::detect(right)?;

match (left_filetype, right_filetype) {
(GenomicRangesFile::Bed3(left_file), GenomicRangesFile::Bed3(right_file)) => {
let left_iter = Bed3Iterator::new(left_file)?;
let right_iter = Bed3Iterator::new(right_file)?;

let left_gr = GRangesEmpty::from_iter(left_iter, &genome)?; let right_gr =
GRangesEmpty::from_iter(right_iter, &genome)?;

let right_gr = right_gr.to_coitrees()?;

let intersection = left_gr.filter_overlaps(&right_gr);
right_gr.len();

let output_stream = output.as_ref().map_or(OutputFile::new_stdout(None), |file| {
OutputFile::new(file, None)
});
let mut writer = output_stream.writer()?;

// for reporting stuff to the user
let mut report = Report::new();

Ok(CommandOutput::new((), report))
}
(GenomicRangesFile::Bed3(left_file), GenomicRangesFile::Bedlike(right_file)) => {
Ok(CommandOutput::new((), report))
}
(GenomicRangesFile::Bedlike(left_file), GenomicRangesFile::Bed3(right_file)) => {
Ok(CommandOutput::new((), report))
}
(GenomicRangesFile::Bedlike(left_file), GenomicRangesFile::Bedlike(right_file)) => {
Ok(CommandOutput::new((), report))
}
_ => Ok(CommandOutput::new((), report)),
}
// granges_filter(left, right, output.as_ref(), *sort)
granges_filter(seqlens, left, right, output.as_ref(), *sort)
}
#[cfg(feature = "dev-commands")]
Some(Commands::RandomBed {
Expand Down

0 comments on commit 7a014af

Please sign in to comment.