Skip to content

Commit

Permalink
Merge pull request #231 from daler/v0.13rc
Browse files Browse the repository at this point in the history
V0.13rc
  • Loading branch information
daler committed Apr 13, 2024
2 parents 32e48a1 + cce76a6 commit 34c9c6a
Show file tree
Hide file tree
Showing 21 changed files with 310 additions and 215 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ jobs:
build-and-test:
strategy:
matrix:
python-version: ["3.7", "3.8", "3.9", "3.10"]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
Expand All @@ -30,7 +30,7 @@ jobs:
conda config --system --set channel_priority strict
mamba create -y -n gffutils-env \
python=${{ matrix.python-version }} \
--file requirements.txt
bedtools
conda activate gffutils-env
python setup.py clean sdist
Expand All @@ -43,9 +43,9 @@ jobs:
run: |
source "${HOME}/conda/etc/profile.d/conda.sh"
source "${HOME}/conda/etc/profile.d/mamba.sh"
mamba install -y -n gffutils-env --file optional-requirements.txt pytest hypothesis
conda activate gffutils-env
pip install pytest hypothesis biopython pybedtools
pytest -v --doctest-modules gffutils
conda deactivate
Expand Down
11 changes: 11 additions & 0 deletions doc/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,17 @@
Change log
==========

v0.13
-----

- Document options for avoiding deadlocks when simultaneously reading/writing
to a db on disk (fixes `#227
<https://github.com/daler/gffutils/issues/227>`__).
- Support later versions of BioPython (fixes `#228
<https://github.com/daler/gffutils/issues/228>`__).
- Drop support for Python 3.7 and unused ``six`` dependency; support Python
3.11 and 3.12 (fixes `#223 <https://github.com/daler/gffutils/issues/223>`__)

v0.12
-----

Expand Down
3 changes: 1 addition & 2 deletions gffutils/attributes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import six
import collections

try:
Expand Down Expand Up @@ -95,7 +94,7 @@ def __str__(self):
return "\n".join(s)

def update(self, *args, **kwargs):
for k, v in six.iteritems(dict(*args, **kwargs)):
for k, v in dict(*args, **kwargs).items():
self[k] = v


Expand Down
17 changes: 9 additions & 8 deletions gffutils/biopython_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Module for integration with BioPython, specifically SeqRecords and SeqFeature
objects.
"""
import six

try:
from Bio.SeqFeature import SeqFeature, FeatureLocation
Expand All @@ -15,7 +14,8 @@
_biopython_strand = {
"+": 1,
"-": -1,
".": 0,
".": None,
"?": 0,
}
_feature_strand = dict((v, k) for k, v in _biopython_strand.items())

Expand All @@ -33,7 +33,7 @@ def to_seqfeature(feature):
If string, assume it is a GFF or GTF-format line; otherwise just use
the provided feature directly.
"""
if isinstance(feature, six.string_types):
if isinstance(feature, str):
feature = feature_from_line(feature)

qualifiers = {
Expand All @@ -46,10 +46,11 @@ def to_seqfeature(feature):
return SeqFeature(
# Convert from GFF 1-based to standard Python 0-based indexing used by
# BioPython
FeatureLocation(feature.start - 1, feature.stop),
FeatureLocation(
feature.start - 1, feature.stop, strand=_biopython_strand[feature.strand]
),
id=feature.id,
type=feature.featuretype,
strand=_biopython_strand[feature.strand],
qualifiers=qualifiers,
)

Expand All @@ -66,12 +67,12 @@ def from_seqfeature(s, **kwargs):
score = s.qualifiers.get("score", ".")[0]
seqid = s.qualifiers.get("seqid", ".")[0]
frame = s.qualifiers.get("frame", ".")[0]
strand = _feature_strand[s.strand]
strand = _feature_strand[s.location.strand]

# BioPython parses 1-based GenBank positions into 0-based for use within
# Python. We need to convert back to 1-based GFF format here.
start = s.location.start.position + 1
stop = s.location.end.position
start = s.location.start + 1
stop = s.location.end
featuretype = s.type
id = s.id
attributes = dict(s.qualifiers)
Expand Down
4 changes: 1 addition & 3 deletions gffutils/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
Conversion functions that operate on :class:`FeatureDB` classes.
"""

import six


def to_bed12(f, db, child_type="exon", name_field="ID"):
"""
Expand All @@ -22,7 +20,7 @@ def to_bed12(f, db, child_type="exon", name_field="ID"):
Attribute to be used in the "name" field of the BED12 entry. Usually
"ID" for GFF; "transcript_id" for GTF.
"""
if isinstance(f, six.string_types):
if isinstance(f, str):
f = db[f]
children = list(db.children(f, featuretype=child_type, order_by="start"))
sizes = [len(i) for i in children]
Expand Down
14 changes: 7 additions & 7 deletions gffutils/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import sys
import os
import sqlite3
import six
from textwrap import dedent
from gffutils import constants
from gffutils import version
Expand Down Expand Up @@ -119,7 +118,7 @@ def __init__(
os.unlink(dbfn)
self.dbfn = dbfn
self.id_spec = id_spec
if isinstance(dbfn, six.string_types):
if isinstance(dbfn, str):
conn = sqlite3.connect(dbfn)
else:
conn = dbfn
Expand Down Expand Up @@ -171,7 +170,7 @@ def _id_handler(self, f):
"""

# If id_spec is a string or callable, convert to iterable for later
if isinstance(self.id_spec, six.string_types):
if isinstance(self.id_spec, str):
id_key = [self.id_spec]
elif hasattr(self.id_spec, "__call__"):
id_key = [self.id_spec]
Expand All @@ -181,7 +180,7 @@ def _id_handler(self, f):
elif isinstance(self.id_spec, dict):
try:
id_key = self.id_spec[f.featuretype]
if isinstance(id_key, six.string_types):
if isinstance(id_key, str):
id_key = [id_key]

# Otherwise, use default auto-increment.
Expand Down Expand Up @@ -217,7 +216,8 @@ def _id_handler(self, f):
"a single value is required for a primary key in the "
"database. Consider using a custom id_spec to "
"convert these multiple values into a single "
"value".format(k))
"value".format(k)
)
except KeyError:
pass
try:
Expand Down Expand Up @@ -684,7 +684,7 @@ def _update_relations(self):
# c.execute('CREATE INDEX childindex ON relations (child)')
# self.conn.commit()

if isinstance(self._keep_tempfiles, six.string_types):
if isinstance(self._keep_tempfiles, str):
suffix = self._keep_tempfiles
else:
suffix = ".gffutils"
Expand Down Expand Up @@ -883,7 +883,7 @@ def _update_relations(self):
msg = "transcript"
logger.info("Inferring %s extents " "and writing to tempfile" % msg)

if isinstance(self._keep_tempfiles, six.string_types):
if isinstance(self._keep_tempfiles, str):
suffix = self._keep_tempfiles
else:
suffix = ".gffutils"
Expand Down
12 changes: 4 additions & 8 deletions gffutils/feature.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from pyfaidx import Fasta
import six
import simplejson as json
from gffutils import constants
from gffutils import helpers
Expand Down Expand Up @@ -166,7 +165,7 @@ def __init__(
# for testing.
attributes = attributes or dict_class()

if isinstance(attributes, six.string_types):
if isinstance(attributes, str):
try:
attributes = helpers._unjsonify(attributes, isattributes=True)

Expand All @@ -182,7 +181,7 @@ def __init__(
# If string, then try un-JSONifying it into a list; if that doesn't
# work then assume it's tab-delimited and convert to a list.
extra = extra or []
if isinstance(extra, six.string_types):
if isinstance(extra, str):
try:
extra = helpers._unjsonify(extra)
except json.JSONDecodeError:
Expand Down Expand Up @@ -254,10 +253,7 @@ def __setitem__(self, key, value):
self.attributes[key] = value

def __str__(self):
if six.PY3:
return self.__unicode__()
else:
return unicode(self).encode("utf-8")
return self.__unicode__()

def __unicode__(self):

Expand Down Expand Up @@ -387,7 +383,7 @@ def sequence(self, fasta, use_strand=True):
-------
string
"""
if isinstance(fasta, six.string_types):
if isinstance(fasta, str):
fasta = Fasta(fasta, as_raw=False)

# recall GTF/GFF is 1-based closed; pyfaidx uses Python slice notation
Expand Down
3 changes: 1 addition & 2 deletions gffutils/gffwriter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
##
## GFF Writer (writer): serializing gffutils records as GFF text files.
##
import six
import tempfile
import shutil
from time import strftime, localtime
Expand Down Expand Up @@ -41,7 +40,7 @@ def __init__(self, out, with_header=True, in_place=False):
self.temp_file = None
# Output stream to write to
self.out_stream = None
if isinstance(out, six.string_types):
if isinstance(out, str):
if self.in_place:
# Use temporary file
self.temp_file = tempfile.NamedTemporaryFile(delete=False)
Expand Down
38 changes: 27 additions & 11 deletions gffutils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import simplejson as json
import time
import tempfile
import six
from gffutils import constants
from gffutils import bins
import gffutils
Expand Down Expand Up @@ -202,7 +201,7 @@ def make_query(
# e.g., "featuretype = 'exon'"
#
# or, "featuretype IN ('exon', 'CDS')"
if isinstance(featuretype, six.string_types):
if isinstance(featuretype, str):
d["FEATURETYPE"] = "features.featuretype = ?"
args.append(featuretype)
else:
Expand All @@ -218,7 +217,7 @@ def make_query(
# `limit` is a string or a tuple of (chrom, start, stop)
#
# e.g., "seqid = 'chr2L' AND start > 1000 AND end < 5000"
if isinstance(limit, six.string_types):
if isinstance(limit, str):
seqid, startstop = limit.split(":")
start, end = startstop.split("-")
else:
Expand Down Expand Up @@ -257,7 +256,7 @@ def make_query(
# Default is essentially random order.
#
# e.g. "ORDER BY seqid, start DESC"
if isinstance(order_by, six.string_types):
if isinstance(order_by, str):
_order_by.append(order_by)

else:
Expand Down Expand Up @@ -387,7 +386,7 @@ def merge_attributes(attr1, attr2, numeric_sort=False):
if not isinstance(v, list):
new_d[k] = [v]

for k, v in six.iteritems(attr1):
for k, v in attr1.items():
if k in attr2:
if not isinstance(v, list):
v = [v]
Expand Down Expand Up @@ -507,9 +506,9 @@ def is_gff_db(db_fname):


def to_unicode(obj, encoding="utf-8"):
if isinstance(obj, six.string_types):
if not isinstance(obj, six.text_type):
obj = six.text_type(obj, encoding)
if isinstance(obj, str):
if not isinstance(obj, str):
obj = str(obj, encoding)
return obj


Expand All @@ -520,7 +519,6 @@ def canonical_transcripts(db, fasta_filename):
"""
import pyfaidx


fasta = pyfaidx.Fasta(fasta_filename, as_raw=False)
for gene in db.features_of_type("gene"):

Expand All @@ -536,7 +534,20 @@ def canonical_transcripts(db, fasta_filename):
cds_len += exon_length
total_len += exon_length

exon_list.append((cds_len, total_len, transcript, exons if cds_len == 0 else [e for e in exons if e.featuretype in ['CDS', 'five_prime_UTR', 'three_prime_UTR']]))
exon_list.append(
(
cds_len,
total_len,
transcript,
exons
if cds_len == 0
else [
e
for e in exons
if e.featuretype in ["CDS", "five_prime_UTR", "three_prime_UTR"]
],
)
)

# If we have CDS, then use the longest coding transcript
if max(i[0] for i in exon_list) > 0:
Expand All @@ -549,7 +560,12 @@ def canonical_transcripts(db, fasta_filename):

canonical_exons = best[-1]
transcript = best[-2]
seqs = [i.sequence(fasta) for i in sorted(canonical_exons, key=lambda x: x.start, reverse=transcript.strand != '+')]
seqs = [
i.sequence(fasta)
for i in sorted(
canonical_exons, key=lambda x: x.start, reverse=transcript.strand != "+"
)
]
yield transcript, "".join(seqs)


Expand Down
Loading

0 comments on commit 34c9c6a

Please sign in to comment.