Skip to content

Commit

Permalink
Changes for pylint. Function "ic" -> "get_info_content"
Browse files Browse the repository at this point in the history
  • Loading branch information
dvklopfenstein committed Jul 22, 2017
1 parent fff517b commit bcd454d
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 36 deletions.
68 changes: 34 additions & 34 deletions goatools/semantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,41 +12,41 @@
from collections import Counter


class TermCounts:
class TermCounts(object):
'''
TermCounts counts the term counts for each
'''
def __init__(self, go, annots):
def __init__(self, goid, annots):
'''
Initialise the counts and
'''
# Backup
self._go = go
self._go = goid

# Initialise the counters
self._counts = Counter()
self._aspect_counts = Counter()

# Fill the counters...
self._count_terms(go, annots)
self._count_terms(goid, annots)

def _count_terms(self, go, annots):
def _count_terms(self, goid, annots):
'''
Fills in the counts and overall aspect counts.
'''
for gene, terms in annots.items():
for terms in annots.values(): # key is 'gene'
# Make a union of all the terms for a gene, if term parents are
# propagated but they won't get double-counted for the gene
allterms = set(terms)
for go_id in terms:
allterms |= go[go_id].get_all_parents()
for p in allterms:
self._counts[p] += 1
allterms |= goid[go_id].get_all_parents()
for parent in allterms:
self._counts[parent] += 1

for go_id, c in self._counts.items():
for go_id, child in self._counts.items():
# Group by namespace
namespace = go[go_id].namespace
self._aspect_counts[namespace] += c
namespace = goid[go_id].namespace
self._aspect_counts[namespace] += child

def get_count(self, go_id):
'''
Expand Down Expand Up @@ -75,7 +75,7 @@ def get_term_freq(self, go_id):
return freq


def ic(go_id, termcounts):
def get_info_content(go_id, termcounts):
'''
Calculates the information content of a GO term.
'''
Expand All @@ -86,36 +86,36 @@ def ic(go_id, termcounts):
return -1.0 * math.log(freq) if freq else 0


def resnik_sim(go_id1, go_id2, go, termcounts):
def resnik_sim(go_id1, go_id2, goid, termcounts):
'''
Computes Resnik's similarity measure.
'''
msca = deepest_common_ancestor([go_id1, go_id2], go)
return ic(msca, termcounts)
msca = deepest_common_ancestor([go_id1, go_id2], goid)
return get_info_content(msca, termcounts)


def lin_sim(go_id1, go_id2, go, termcounts):
def lin_sim(go_id1, go_id2, goid, termcounts):
'''
Computes Lin's similarity measure.
'''
sim_r = resnik_sim(go_id1, go_id2, go, termcounts)
sim_r = resnik_sim(go_id1, go_id2, goid, termcounts)

return (-2*sim_r)/(ic(go_id1, termcounts) + ic(go_id2, termcounts))
return (-2*sim_r)/(get_info_content(go_id1, termcounts) + get_info_content(go_id2, termcounts))


def common_parent_go_ids(terms, go):
def common_parent_go_ids(terms, goid):
'''
This function finds the common ancestors in the GO
tree of the list of terms in the input.
'''
# Find candidates from first
rec = go[terms[0]]
rec = goid[terms[0]]
candidates = rec.get_all_parents()
candidates.update({terms[0]})

# Find intersection with second to nth term
for term in terms[1:]:
rec = go[term]
rec = goid[term]
parents = rec.get_all_parents()
parents.update({term})

Expand All @@ -125,43 +125,43 @@ def common_parent_go_ids(terms, go):
return candidates


def deepest_common_ancestor(terms, go):
def deepest_common_ancestor(terms, goid):
'''
This function gets the nearest common ancestor
using the above function.
Only returns single most specific - assumes unique exists.
'''
# Take the element at maximum depth.
return max(common_parent_go_ids(terms, go), key=lambda t: go[t].depth)
return max(common_parent_go_ids(terms, goid), key=lambda t: goid[t].depth)


def min_branch_length(go_id1, go_id2, go):
def min_branch_length(go_id1, go_id2, goid):
'''
Finds the minimum branch length between two terms in the GO DAG.
'''
# First get the deepest common ancestor
dca = deepest_common_ancestor([go_id1, go_id2], go)
dca = deepest_common_ancestor([go_id1, go_id2], goid)

# Then get the distance from the DCA to each term
dca_depth = go[dca].depth
d1 = go[go_id1].depth - dca_depth
d2 = go[go_id2].depth - dca_depth
dca_depth = goid[dca].depth
depth1 = goid[go_id1].depth - dca_depth
depth2 = goid[go_id2].depth - dca_depth

# Return the total distance - i.e., to the deepest common ancestor and back.
return d1 + d2
return depth1 + depth2


def semantic_distance(go_id1, go_id2, go):
def semantic_distance(go_id1, go_id2, goid):
'''
Finds the semantic distance (minimum number of connecting branches)
between two GO terms.
'''
return min_branch_length(go_id1, go_id2, go)
return min_branch_length(go_id1, go_id2, goid)


def semantic_similarity(go_id1, go_id2, go):
def semantic_similarity(go_id1, go_id2, goid):
'''
Finds the semantic similarity (inverse of the semantic distance)
between two GO terms.
'''
return 1.0 / float(semantic_distance(go_id1, go_id2, go))
return 1.0 / float(semantic_distance(go_id1, go_id2, goid))
4 changes: 2 additions & 2 deletions notebooks/semantic_similarity.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -166,14 +166,14 @@
}
],
"source": [
"from goatools.semantic import TermCounts, ic\n",
"from goatools.semantic import TermCounts, get_info_content\n",
"\n",
"# First get the counts of each GO term.\n",
"termcounts = TermCounts(go, associations)\n",
"\n",
"# Calculate the information content\n",
"go_id = \"GO:0048364\"\n",
"infocontent = ic(go_id, termcounts)\n",
"infocontent = get_info_content(go_id, termcounts)\n",
"print('Information content ({}) = {}'.format(go_id, infocontent))"
]
},
Expand Down

0 comments on commit bcd454d

Please sign in to comment.