Add FASTA read function

Remove DNA Cauldron dependency.
Edinburgh-Genome-Foundry · Apr 15, 2024 · b0dd3d7 · b0dd3d7
1 parent b4c302f
commit b0dd3d7
Show file tree

Hide file tree

Showing 6 changed files with 26 additions and 13 deletions.
diff --git a/README.md b/README.md
@@ -22,7 +22,7 @@ pip install git+https://github.com/Edinburgh-Genome-Foundry/Seq_Report.git
 import seqreport
 
 seq_fasta = "seq.fa"
-seq_coll = seqreport.SeqCollection(fasta=seq_fasta, projectname="EGF24")
+seq_coll = seqreport.SeqCollection(records=seqreport.read_fasta(seq_fasta), projectname="EGF24")
 seqreport.write_pdf_report("seq_report.pdf", seq_coll)
 ```
 

diff --git a/seqreport/SeqCollection.py b/seqreport/SeqCollection.py
@@ -1,4 +1,4 @@
-import dnacauldron
+from Bio import SeqIO
 
 
 class SeqCollection:
@@ -7,8 +7,8 @@ class SeqCollection:
 
  **Parameters**
 
- **fasta**
- > The FASTA file of the sequences.
+ **records**
+ > A list of Biopython SeqRecords.
 
  **cost_per_base**
  > Cost per nucleotide base.
@@ -28,20 +28,17 @@ class SeqCollection:
 
  def __init__(
  self,
- fasta,
+ records,
  cost_per_base=0.25,
  cost_per_seq=0,
  currency_symbol="£",
  projectname="",
  comments="",
  ):
- self.fasta = fasta
+ self.sequences = records
  self.cost_per_base = cost_per_base
  self.cost_per_seq = cost_per_seq
  self.currency_symbol = currency_symbol
- self.sequences = dnacauldron.biotools.load_records_from_files(
- files=[self.fasta], use_file_names_as_ids=False
- )
  self.n_seq = len(self.sequences)
  n_bp = 0
  for part in self.sequences:
@@ -50,3 +47,15 @@ def __init__(
  self.cost = self.n_seq * self.cost_per_seq + self.n_bp * self.cost_per_base
  self.projectname = projectname
  self.comments = comments
+
+
+def read_fasta(fasta):
+ """Read a FASTA sequence file into a list of records.
+
+
+ **Parameters**
+
+ **fasta**
+ > The FASTA filepath (`str`).
+ """
+ return list(SeqIO.parse(fasta, "fasta"))
diff --git a/seqreport/__init__.py b/seqreport/__init__.py
@@ -1,2 +1,2 @@
-from .SeqCollection import SeqCollection
+from .SeqCollection import SeqCollection, read_fasta
 from .reports import write_pdf_report
diff --git a/setup.py b/setup.py
@@ -15,5 +15,5 @@
  keywords="biology dna",
  packages=find_packages(exclude="docs"),
  include_package_data=True,
- install_requires=["pdf_reports", "dnacauldron"],
+ install_requires=["pdf_reports", "biopython"],
 )
diff --git a/tests/test_SeqCollection.py b/tests/test_SeqCollection.py
@@ -7,8 +7,11 @@
 
 
 def test_SeqCollection(tmpdir):
+ seq_records = seqreport.read_fasta(seq_fasta)
  seq_coll = seqreport.SeqCollection(
- fasta=seq_fasta, projectname="EGF24", comments="This is a test sequence set."
+ records=seq_records,
+ projectname="EGF24",
+ comments="This is a test sequence set.",
  )
  assert seq_coll.n_seq == 3
  assert seq_coll.n_bp == 99

diff --git a/tests/test_reports.py b/tests/test_reports.py
@@ -7,7 +7,8 @@
 
 
 def test_write_pdf_report(tmpdir):
- seq_coll = seqreport.SeqCollection(fasta=seq_fasta)
+ seq_records = seqreport.read_fasta(seq_fasta)
+ seq_coll = seqreport.SeqCollection(records=seq_records)
  pdf_path = os.path.join(str(tmpdir), "test_report.pdf")
  seqreport.write_pdf_report(target=pdf_path, seqcollection=seq_coll)