Skip to content

Commit

Permalink
Add FASTA read function
Browse files Browse the repository at this point in the history
Remove DNA Cauldron dependency.
  • Loading branch information
veghp committed Apr 15, 2024
1 parent b4c302f commit b0dd3d7
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 13 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ pip install git+https://github.com/Edinburgh-Genome-Foundry/Seq_Report.git
import seqreport

seq_fasta = "seq.fa"
seq_coll = seqreport.SeqCollection(fasta=seq_fasta, projectname="EGF24")
seq_coll = seqreport.SeqCollection(records=seqreport.read_fasta(seq_fasta), projectname="EGF24")
seqreport.write_pdf_report("seq_report.pdf", seq_coll)
```

Expand Down
25 changes: 17 additions & 8 deletions seqreport/SeqCollection.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import dnacauldron
from Bio import SeqIO


class SeqCollection:
Expand All @@ -7,8 +7,8 @@ class SeqCollection:
**Parameters**
**fasta**
> The FASTA file of the sequences.
**records**
> A list of Biopython SeqRecords.
**cost_per_base**
> Cost per nucleotide base.
Expand All @@ -28,20 +28,17 @@ class SeqCollection:

def __init__(
self,
fasta,
records,
cost_per_base=0.25,
cost_per_seq=0,
currency_symbol="£",
projectname="",
comments="",
):
self.fasta = fasta
self.sequences = records
self.cost_per_base = cost_per_base
self.cost_per_seq = cost_per_seq
self.currency_symbol = currency_symbol
self.sequences = dnacauldron.biotools.load_records_from_files(
files=[self.fasta], use_file_names_as_ids=False
)
self.n_seq = len(self.sequences)
n_bp = 0
for part in self.sequences:
Expand All @@ -50,3 +47,15 @@ def __init__(
self.cost = self.n_seq * self.cost_per_seq + self.n_bp * self.cost_per_base
self.projectname = projectname
self.comments = comments


def read_fasta(fasta):
"""Read a FASTA sequence file into a list of records.
**Parameters**
**fasta**
> The FASTA filepath (`str`).
"""
return list(SeqIO.parse(fasta, "fasta"))
2 changes: 1 addition & 1 deletion seqreport/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .SeqCollection import SeqCollection
from .SeqCollection import SeqCollection, read_fasta
from .reports import write_pdf_report
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@
keywords="biology dna",
packages=find_packages(exclude="docs"),
include_package_data=True,
install_requires=["pdf_reports", "dnacauldron"],
install_requires=["pdf_reports", "biopython"],
)
5 changes: 4 additions & 1 deletion tests/test_SeqCollection.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@


def test_SeqCollection(tmpdir):
seq_records = seqreport.read_fasta(seq_fasta)
seq_coll = seqreport.SeqCollection(
fasta=seq_fasta, projectname="EGF24", comments="This is a test sequence set."
records=seq_records,
projectname="EGF24",
comments="This is a test sequence set.",
)
assert seq_coll.n_seq == 3
assert seq_coll.n_bp == 99
Expand Down
3 changes: 2 additions & 1 deletion tests/test_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@


def test_write_pdf_report(tmpdir):
seq_coll = seqreport.SeqCollection(fasta=seq_fasta)
seq_records = seqreport.read_fasta(seq_fasta)
seq_coll = seqreport.SeqCollection(records=seq_records)
pdf_path = os.path.join(str(tmpdir), "test_report.pdf")
seqreport.write_pdf_report(target=pdf_path, seqcollection=seq_coll)

Expand Down

0 comments on commit b0dd3d7

Please sign in to comment.