Skip to content

Commit

Permalink
Add FASTA name
Browse files Browse the repository at this point in the history
  • Loading branch information
veghp committed Apr 23, 2024
1 parent 704c8ea commit d3bd10e
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 5 deletions.
21 changes: 17 additions & 4 deletions seqreport/SeqCollection.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import csv
import os

from Bio import SeqIO

Expand Down Expand Up @@ -35,10 +36,13 @@ class SeqCollection:
**name_length**
> Check which sequence IDs are longer than this cutoff. Genbank has a character
limit.
limit (`int`).
**assembly_plan**
> An optional assembly plan for calculating savings by re-using DNA parts.
> Optional assembly plan CSV path for calculating savings by re-using DNA parts.
**fasta_name**
> Optional name of the fasta file for easy identification (`str`).
"""

def __init__(
Expand All @@ -52,7 +56,8 @@ def __init__(
min_length=100, # a good cutoff for min DNA synthesis length
max_length=3000, # a good cutoff for max DNA synthesis length
name_length=15, # max seq record name length. Genbank character limit.
assembly_plan=""
assembly_plan="",
fasta_name="",
):
self.sequences = records
self.cost_per_base = cost_per_base
Expand All @@ -64,6 +69,7 @@ def __init__(
self.max_length = max_length
self.name_length = name_length
self.assembly_plan = assembly_plan
self.fasta_name = fasta_name
self.calculate_values()

def calculate_values(self):
Expand All @@ -77,6 +83,7 @@ def calculate_values(self):
n_bp += len(part.seq)
self.n_bp = n_bp
self.cost = self.n_seq * self.cost_per_seq + self.n_bp * self.cost_per_base
self.cost = round(self.cost) # (in)accuracy is fine for our purposes

# Lengths section
self.too_short = []
Expand Down Expand Up @@ -146,6 +153,7 @@ def calculate_values(self):
self.total_savings += len(record.seq) * (count_in_plan - 1) # ignore first synthesis
self.savings_list += [record.id]
self.total_cost_savings = self.total_savings * self.cost_per_base # ignore cost / seq
self.total_cost_savings = round(self.total_cost_savings) # (in)accuracy is fine for our purposes
# For the PDF report:
self.savings_list_text = " ; ".join(self.savings_list)
else:
Expand All @@ -170,7 +178,8 @@ def seqcollection_from_csv(csv_file, records=None, param_dict={}):
The CSV file parameters override the default class parameters, and this function's
parameters override the CSV file parameters. Either a FASTA file (in the CSV or in
`param_dict`) or a list of `SeqRecords` must be specified.
For the parameter descriptions, see the docstring of `SeqCollection`.
For the parameter descriptions, see parameters in the docstring of `SeqCollection`,
except records and fasta_name.
"""
with open(csv_file, "r") as f:
reader = csv.reader(f, skipinitialspace=True)
Expand All @@ -192,5 +201,9 @@ def seqcollection_from_csv(csv_file, records=None, param_dict={}):
seq_coll = SeqCollection(records=records)
for key, value in parameters.items():
setattr(seq_coll, key, value)

# To ensure that the correct filename is used:
setattr(seq_coll, "fasta_name", os.path.basename(parameters["fasta"]))
seq_coll.calculate_values()

return seq_coll
5 changes: 5 additions & 0 deletions seqreport/report_assets/seq_report.pug
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ hr
p.
This document reports basic sequence statistics.

if seqcollection.fasta_name
p.
FASTA: <b>{{ seqcollection.fasta_name }}</b>

if seqcollection.projectname
p.
Projectname: <b>{{ seqcollection.projectname }}</b>
Expand Down Expand Up @@ -68,6 +72,7 @@ if seqcollection.assembly_plan
if seqcollection.savings_list_text
p.
Re-used sequences in the provided assembly plan: {{ seqcollection.savings_list_text }}.
p.
Total savings: <b>{{ seqcollection.total_savings }}</b> bp or <b>{{ seqcollection.currency_symbol }} {{ seqcollection.total_cost_savings }}</b>.
else
p.
Expand Down
3 changes: 2 additions & 1 deletion tests/test_SeqCollection.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,6 @@ def test_read_fasta():

def test_seqcollection_from_csv():
csv_path = os.path.join(data_dir, "values.csv")
seqreport.seqcollection_from_csv(csv_file=csv_path)
seq_coll = seqreport.seqcollection_from_csv(csv_file=csv_path)
assert seq_coll.fasta_name == "test.fa" # see in CSV file
# (not tested passing records or param_dict above)

0 comments on commit d3bd10e

Please sign in to comment.