Skip to content

Commit

Permalink
black formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
daler committed Apr 11, 2024
1 parent 308f956 commit 71acc05
Show file tree
Hide file tree
Showing 8 changed files with 187 additions and 142 deletions.
5 changes: 2 additions & 3 deletions gffutils/biopython_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,8 @@ def to_seqfeature(feature):
# Convert from GFF 1-based to standard Python 0-based indexing used by
# BioPython
FeatureLocation(
feature.start - 1,
feature.stop,
strand=_biopython_strand[feature.strand]),
feature.start - 1, feature.stop, strand=_biopython_strand[feature.strand]
),
id=feature.id,
type=feature.featuretype,
qualifiers=qualifiers,
Expand Down
1 change: 0 additions & 1 deletion gffutils/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"""



def to_bed12(f, db, child_type="exon", name_field="ID"):
"""
Given a top-level feature (e.g., transcript), construct a BED12 entry
Expand Down
3 changes: 2 additions & 1 deletion gffutils/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,8 @@ def _id_handler(self, f):
"a single value is required for a primary key in the "
"database. Consider using a custom id_spec to "
"convert these multiple values into a single "
"value".format(k))
"value".format(k)
)
except KeyError:
pass
try:
Expand Down
23 changes: 20 additions & 3 deletions gffutils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,6 @@ def canonical_transcripts(db, fasta_filename):
"""
import pyfaidx


fasta = pyfaidx.Fasta(fasta_filename, as_raw=False)
for gene in db.features_of_type("gene"):

Expand All @@ -535,7 +534,20 @@ def canonical_transcripts(db, fasta_filename):
cds_len += exon_length
total_len += exon_length

exon_list.append((cds_len, total_len, transcript, exons if cds_len == 0 else [e for e in exons if e.featuretype in ['CDS', 'five_prime_UTR', 'three_prime_UTR']]))
exon_list.append(
(
cds_len,
total_len,
transcript,
exons
if cds_len == 0
else [
e
for e in exons
if e.featuretype in ["CDS", "five_prime_UTR", "three_prime_UTR"]
],
)
)

# If we have CDS, then use the longest coding transcript
if max(i[0] for i in exon_list) > 0:
Expand All @@ -548,7 +560,12 @@ def canonical_transcripts(db, fasta_filename):

canonical_exons = best[-1]
transcript = best[-2]
seqs = [i.sequence(fasta) for i in sorted(canonical_exons, key=lambda x: x.start, reverse=transcript.strand != '+')]
seqs = [
i.sequence(fasta)
for i in sorted(
canonical_exons, key=lambda x: x.start, reverse=transcript.strand != "+"
)
]
yield transcript, "".join(seqs)


Expand Down
71 changes: 45 additions & 26 deletions gffutils/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def __init__(
keep_order=False,
pragmas=constants.default_pragmas,
sort_attribute_values=False,
text_factory=str
text_factory=str,
):
"""
Connect to a database created by :func:`gffutils.create_db`.
Expand Down Expand Up @@ -871,10 +871,21 @@ def _init_interfeature(f):
Used to initialize a new interfeature that is ready to be updated
in-place.
"""
keys = ['id', 'seqid', 'source', 'featuretype', 'start', 'end',
'score', 'strand', 'frame', 'attributes', 'bin']
keys = [
"id",
"seqid",
"source",
"featuretype",
"start",
"end",
"score",
"strand",
"frame",
"attributes",
"bin",
]
d = dict(zip(keys, f.astuple()))
d['source'] = 'gffutils_derived'
d["source"] = "gffutils_derived"
return d

def _prep_for_yield(d):
Expand All @@ -885,25 +896,26 @@ def _prep_for_yield(d):
If start is greater than stop (which happens when trying to get
interfeatures for overlapping features), then return None.
"""
d['start'] += 1
d['end'] -= 1
new_bin = bins.bins(d['start'], d['end'], one=True)
d['bin'] = new_bin
d["start"] += 1
d["end"] -= 1
new_bin = bins.bins(d["start"], d["end"], one=True)
d["bin"] = new_bin

if d['start'] > d['end']:
if d["start"] > d["end"]:
return None

new_feature = self._feature_returner(**d)

# concat list of ID to create uniq IDs because feature with
# multiple values for their ID are no longer permitted since v0.11
if "ID" in new_feature.attributes and len(new_feature.attributes["ID"]) > 1:
new_id = '-'.join(new_feature.attributes["ID"])
new_id = "-".join(new_feature.attributes["ID"])
new_feature.attributes["ID"] = [new_id]
return new_feature

# If not provided, use a no-op function instead.
if not attribute_func:

def attribute_func(a):
return a

Expand Down Expand Up @@ -932,23 +944,23 @@ def attribute_func(a):
nfeatures += 1

# Adjust the interfeature dict in-place with coords...
interfeature['start'] = last_feature.stop
interfeature['end'] = f.start
interfeature["start"] = last_feature.stop
interfeature["end"] = f.start

# ...featuretype
if new_featuretype is None:
interfeature['featuretype'] = "inter_%s_%s" % (
interfeature["featuretype"] = "inter_%s_%s" % (
last_feature.featuretype,
f.featuretype,
)
else:
interfeature['featuretype'] = new_featuretype
interfeature["featuretype"] = new_featuretype

# ...strand
if last_feature.strand != f.strand:
interfeature['strand'] = '.'
interfeature["strand"] = "."
else:
interfeature['strand'] = f.strand
interfeature["strand"] = f.strand

# and attributes
if merge_attributes:
Expand All @@ -963,7 +975,7 @@ def attribute_func(a):
if update_attributes:
new_attributes.update(update_attributes)

interfeature['attributes'] = new_attributes
interfeature["attributes"] = new_attributes

# Ready to yield
new_feature = _prep_for_yield(interfeature)
Expand Down Expand Up @@ -1389,11 +1401,12 @@ def child_gen():
splice_site.start = splice_site.end - 1

# make ID uniq by adding suffix
splice_site.attributes["ID"] = [new_featuretype + "_" + splice_site.attributes["ID"][0]]
splice_site.attributes["ID"] = [
new_featuretype + "_" + splice_site.attributes["ID"][0]
]

yield splice_site


def _old_merge(self, features, ignore_strand=False):
"""
DEPRECATED, only retained here for backwards compatibility. Please use
Expand Down Expand Up @@ -1709,10 +1722,12 @@ def merge_all(
return result_features

def children_bp(
self, feature, child_featuretype="exon", merge=False,
merge_criteria=(mc.seqid, mc.overlap_end_inclusive, mc.strand,
mc.feature_type),
**kwargs
self,
feature,
child_featuretype="exon",
merge=False,
merge_criteria=(mc.seqid, mc.overlap_end_inclusive, mc.strand, mc.feature_type),
**kwargs
):
"""
Total bp of all children of a featuretype.
Expand Down Expand Up @@ -1751,9 +1766,14 @@ def children_bp(
raise ValueError(
"'ignore_strand' has been deprecated; please use "
"merge_criteria to control how features should be merged. "
"E.g., leave out the 'mc.strand' criteria to ignore strand.")
"E.g., leave out the 'mc.strand' criteria to ignore strand."
)
else:
raise TypeError("merge() got unexpected keyword arguments '{}'".format(kwargs.keys()))
raise TypeError(
"merge() got unexpected keyword arguments '{}'".format(
kwargs.keys()
)
)

children = self.children(
feature, featuretype=child_featuretype, order_by="start"
Expand Down Expand Up @@ -1940,7 +1960,6 @@ def seqids(self):
for (i,) in c:
yield i


# Recycle the docs for _relation so they stay consistent between parents()
# and children()
children.__doc__ = children.__doc__.format(_relation_docstring=_relation.__doc__)
Expand Down
2 changes: 1 addition & 1 deletion gffutils/test/conftest.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
collect_ignore=["data"]
collect_ignore = ["data"]
19 changes: 11 additions & 8 deletions gffutils/test/test_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,7 @@ def test_feature_merge():
id_spec="gene_id",
force_merge_fields=["start"],
keep_order=True,
)
)

# test that warnings are raised because of strand and frame
with warnings.catch_warnings(record=True) as w:
Expand Down Expand Up @@ -933,17 +933,19 @@ def _transform(f):
[(i.start, i.stop) for i in db.features_of_type("exon")]
)


def clean_tempdir():
tempfile.tempdir = tempdir
if os.path.exists(tempdir):
shutil.rmtree(tempdir)
os.makedirs(tempdir)


# specify a writeable temp dir for testing
tempdir = "/tmp/gffutils-test"

def test_tempfiles():

def test_tempfiles():

clean_tempdir()

Expand Down Expand Up @@ -991,6 +993,7 @@ def test_tempfiles():
assert len(filelist) == 1, filelist
assert filelist[0].endswith(".GFFtmp")


@pytest.mark.skip(reason="Unclear if still needed; currently failing")
def test_parallel_db():
# DISABLING in v0.12
Expand Down Expand Up @@ -1113,7 +1116,7 @@ def test_deprecation_handler():
gffutils.example_filename("FBgn0031208.gtf"),
":memory:",
infer_gene_extent=False,
)
)


def test_nonsense_kwarg():
Expand All @@ -1122,7 +1125,7 @@ def test_nonsense_kwarg():
gffutils.example_filename("FBgn0031208.gtf"),
":memory:",
asdf=True,
)
)


def test_infer_gene_extent():
Expand Down Expand Up @@ -1241,7 +1244,8 @@ def test_create_splice_sites():
db = gffutils.create_db(fn, ":memory:")
db = db.update(db.create_splice_sites())
observed = "\n".join(str(feature) for feature in db.all_features())
expected = dedent("""\
expected = dedent(
"""\
chr1 ensGene gene 4763287 4775820 . - . Name=ENSMUSG00000033845;ID=ENSMUSG00000033845;Alias=ENSMUSG00000033845;gid=ENSMUSG00000033845
chr1 ensGene mRNA 4764517 4775779 . - . Name=ENSMUST00000045689;Parent=ENSMUSG00000033845;ID=ENSMUST00000045689;Alias=ENSMUSG00000033845;gid=ENSMUSG00000033845
chr1 ensGene CDS 4775654 4775758 . - 0 Name=ENSMUST00000045689.cds0;Parent=ENSMUST00000045689;ID=ENSMUST00000045689.cds0;gid=ENSMUSG00000033845
Expand All @@ -1259,13 +1263,12 @@ def test_create_splice_sites():
chr1 gffutils_derived three_prime_cis_splice_site 4772815 4772816 . - . Name=ENSMUST00000045689.exon0,ENSMUST00000045689.exon1;Parent=ENSMUST00000045689;ID=three_prime_cis_splice_site_ENSMUST00000045689.exon0-ENSMUST00000045689.exon1;gid=ENSMUSG00000033845
chr1 gffutils_derived five_prime_cis_splice_site 4767604 4767605 . - . Name=ENSMUST00000045689.exon2,ENSMUST00000045689.exon3;Parent=ENSMUST00000045689;ID=five_prime_cis_splice_site_ENSMUST00000045689.exon2-ENSMUST00000045689.exon3;gid=ENSMUSG00000033845
chr1 gffutils_derived five_prime_cis_splice_site 4772647 4772648 . - . Name=ENSMUST00000045689.exon1,ENSMUST00000045689.exon2;Parent=ENSMUST00000045689;ID=five_prime_cis_splice_site_ENSMUST00000045689.exon1-ENSMUST00000045689.exon2;gid=ENSMUSG00000033845
chr1 gffutils_derived five_prime_cis_splice_site 4775652 4775653 . - . Name=ENSMUST00000045689.exon0,ENSMUST00000045689.exon1;Parent=ENSMUST00000045689;ID=five_prime_cis_splice_site_ENSMUST00000045689.exon0-ENSMUST00000045689.exon1;gid=ENSMUSG00000033845""")
chr1 gffutils_derived five_prime_cis_splice_site 4775652 4775653 . - . Name=ENSMUST00000045689.exon0,ENSMUST00000045689.exon1;Parent=ENSMUST00000045689;ID=five_prime_cis_splice_site_ENSMUST00000045689.exon0-ENSMUST00000045689.exon1;gid=ENSMUSG00000033845"""
)

assert observed == expected




if __name__ == "__main__":
# this test case fails
# test_attributes_modify()
Expand Down
Loading

0 comments on commit 71acc05

Please sign in to comment.