From 21ef80e0fc54e4c35328b8a308bcdca251fb8058 Mon Sep 17 00:00:00 2001
From: Haibao Tang <tanghaibao@gmail.com>
Date: Sat, 19 Mar 2016 17:05:59 -0700
Subject: [PATCH] [goatools] add base and semantic; rename notebook to
 notebooks

---
 README.rst                                    |  14 +-
 goatools/associations.py                      |   7 +-
 goatools/base.py                              | 105 +++++++
 goatools/gaf_reader.py                        |  35 +--
 goatools/obo_parser.py                        |   2 -
 goatools/semantic.py                          | 166 +++++++++++
 .../annotation_coverage.ipynb                 |   0
 {notebook => notebooks}/cell_cycle.ipynb      |   0
 {notebook => notebooks}/goea_nbt3102.ipynb    |   0
 {notebook => notebooks}/images/README.md      |   0
 {notebook => notebooks}/images/nbt3102_CC.png | Bin
 .../images/nbt3102_MF_RNA_Symbols.png         | Bin
 .../images/nbt3102_MF_RNA_genecnt.png         | Bin
 {notebook => notebooks}/makefile              |   0
 .../report_depth_level.ipynb                  |   0
 notebooks/semantic_similarity.ipynb           | 268 ++++++++++++++++++
 16 files changed, 570 insertions(+), 27 deletions(-)
 create mode 100644 goatools/base.py
 create mode 100644 goatools/semantic.py
 rename {notebook => notebooks}/annotation_coverage.ipynb (100%)
 rename {notebook => notebooks}/cell_cycle.ipynb (100%)
 rename {notebook => notebooks}/goea_nbt3102.ipynb (100%)
 rename {notebook => notebooks}/images/README.md (100%)
 rename {notebook => notebooks}/images/nbt3102_CC.png (100%)
 rename {notebook => notebooks}/images/nbt3102_MF_RNA_Symbols.png (100%)
 rename {notebook => notebooks}/images/nbt3102_MF_RNA_genecnt.png (100%)
 rename {notebook => notebooks}/makefile (100%)
 rename {notebook => notebooks}/report_depth_level.ipynb (100%)
 create mode 100644 notebooks/semantic_similarity.ipynb

diff --git a/README.rst b/README.rst
index 869a80b7..dd6262d2 100755
--- a/README.rst
+++ b/README.rst
@@ -38,7 +38,7 @@ Description
 This package contains a Python library to
 
 - process over- and under-representation of certain GO terms, based on Fisher's
-  exact test. With numerous multiple correction routines including locally 
+  exact test. With numerous multiple correction routines including locally
   implemented routines for Bonferroni, Sidak, Holm, and false discovery rate. Also included are
   multiple test corrections from `statsmodels <http://www.statsmodels.org/stable/index.html>`_:
   FDR Benjamini/Hochberg, FDR Benjamini/Yekutieli, Holm-Sidak, Simes-Hochberg,
@@ -218,19 +218,23 @@ iPython Notebooks
 
 Run a Gene Ontology Enrichment Analysis (GOEA)
 ::::::::::::::::::::::::::::::::::::::::::::::
-https://github.com/tanghaibao/goatools/blob/master/notebook/goea_nbt3102.ipynb
+https://github.com/tanghaibao/goatools/blob/master/notebooks/goea_nbt3102.ipynb
 
 Report level and depth counts of a set of GO terms
 ::::::::::::::::::::::::::::::::::::::::::::::::::
-https://github.com/tanghaibao/goatools/blob/master/notebook/report_depth_level.ipynb
+https://github.com/tanghaibao/goatools/blob/master/notebooks/report_depth_level.ipynb
 
 Find all human protein-coding genes associated with cell cycle
 ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
-https://github.com/tanghaibao/goatools/blob/master/notebook/cell_cycle.ipynb
+https://github.com/tanghaibao/goatools/blob/master/notebooks/cell_cycle.ipynb
 
 Calculate annotation coverage of GO terms on various species
 ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
-https://github.com/tanghaibao/goatools/blob/master/notebook/annotation_coverage.ipynb
+https://github.com/tanghaibao/goatools/blob/master/notebooks/annotation_coverage.ipynb
+
+Determine the semantic similarities between GO terms
+::::::::::::::::::::::::::::::::::::::::::::::::::::
+https://github.com/tanghaibao/goatools/blob/master/notebooks/semantic_similarity.ipynb
 
 
 Reference
diff --git a/goatools/associations.py b/goatools/associations.py
index 4be34c7e..38101acc 100755
--- a/goatools/associations.py
+++ b/goatools/associations.py
@@ -78,7 +78,7 @@ def read_ncbi_gene2go(fin_gene2go, taxids=None, **kws):
     """Read NCBI's gene2go. Return gene2go data for user-specified taxids."""
     # Written by DV Klopfenstein
     # kws: taxid2asscs evidence_set
-    # Simple associations 
+    # Simple associations
     id2gos = defaultdict(set)
     # Optional detailed associations split by taxid and having both ID2GOs & GO2IDs
     # e.g., taxid2asscs = defaultdict(lambda: defaultdict(lambda: defaultdict(set))
@@ -117,12 +117,11 @@ def read_gaf(fin_gaf, **kws):
     # Written by DV Klopfenstein
     # kws: taxid2asscs evidence_set
     from goatools.gaf_reader import GafReader
-    # Simple associations 
+    # Simple associations
     id2gos = defaultdict(set)
     # Optional detailed associations split by taxid and having both ID2GOs & GO2IDs
     taxid2asscs = kws['taxid2asscs'] if 'taxid2asscs' in kws else None
-    gafobj = GafReader()
-    gafnts = gafobj.read_gaf(fin_gaf)
+    gafnts = GafReader(fin_gaf).associations
     # Optionaly specify a subset of GOs based on their evidence.
     evs = kws['evidence_set'] if 'evidence_set' in kws else None
     for nt in gafnts:
diff --git a/goatools/base.py b/goatools/base.py
new file mode 100644
index 00000000..1c6ced73
--- /dev/null
+++ b/goatools/base.py
@@ -0,0 +1,105 @@
+# Stolen from brentp:
+# <https://github.com/brentp/toolshed/blob/master/toolshed/files.py>
+
+import bz2
+import gzip
+import sys
+import urllib
+
+
+if sys.version_info[0] < 3:
+    int_types = (int, long)
+    urlopen = urllib.urlopen
+else:
+    int_types = (int,)
+    basestring = str
+    from urllib.request import urlopen
+
+
+def nopen(f, mode="r"):
+    r"""
+    open a file that's gzipped or return stdin for '-'
+    if f is a number, the result of nopen(sys.argv[f]) is returned.
+    >>> nopen('-') == sys.stdin, nopen('-', 'w') == sys.stdout
+    (True, True)
+    >>> nopen(sys.argv[0])
+    <...file...>
+    # expands user and vars ($HOME)
+    >>> nopen("~/.bashrc").name == nopen("$HOME/.bashrc").name
+    True
+    # an already open file.
+    >>> nopen(open(sys.argv[0]))
+    <...file...>
+    >>> nopen(0)
+    <...file...>
+    Or provide nicer access to Popen.stdout
+    >>> files = list(nopen("|ls"))
+    >>> assert 'setup.py\n' in files or b'setup.py\n' in files, files
+    """
+    if isinstance(f, int_types):
+        return nopen(sys.argv[f], mode)
+
+    if not isinstance(f, basestring):
+        return f
+    if f.startswith("|"):
+        # using shell explicitly makes things like process substitution work:
+        # http://stackoverflow.com/questions/7407667/python-subprocess-subshells-and-redirection
+        # use sys.stderr so we dont have to worry about checking it...
+        p = Popen(f[1:], stdout=PIPE, stdin=PIPE,
+                  stderr=sys.stderr if mode == "r" else PIPE,
+                  shell=True, bufsize=-1, # use system default for buffering
+                  preexec_fn=prefunc,
+                  close_fds=False, executable=os.environ.get('SHELL'))
+        if sys.version_info[0] > 2:
+            import io
+            p.stdout = io.TextIOWrapper(p.stdout)
+            p.stdin = io.TextIOWrapper(p.stdin)
+            if mode != "r":
+                p.stderr = io.TextIOWrapper(p.stderr)
+
+        if mode and mode[0] == "r":
+            return process_iter(p, f[1:])
+        return p
+
+    if f.startswith(("http://", "https://", "ftp://")):
+        fh = urlopen(f)
+        if f.endswith(".gz"):
+            return ungzipper(fh)
+        if sys.version_info[0] < 3:
+            return fh
+        import io
+        return io.TextIOWrapper(fh)
+    f = op.expanduser(op.expandvars(f))
+    if f.endswith((".gz", ".Z", ".z")):
+        fh = gzip.open(f, mode)
+        if sys.version_info[0] < 3:
+            return fh
+        import io
+        return io.TextIOWrapper(fh)
+    elif f.endswith((".bz", ".bz2", ".bzip2")):
+        fh = bz2.BZ2File(f, mode)
+        if sys.version_info[0] < 3:
+            return fh
+        import io
+        return io.TextIOWrapper(fh)
+
+    return {"r": sys.stdin, "w": sys.stdout}[mode[0]] if f == "-" \
+         else open(f, mode)
+
+
+def ungzipper(fh, blocksize=16384):
+    """
+    work-around to get streaming download of http://.../some.gz
+    """
+    import zlib
+    uzip = zlib.decompressobj(16 + zlib.MAX_WBITS)
+    data = uzip.decompress(fh.read(blocksize)).split("\n")
+
+    while len(data[0]):
+        # last chunk might not be a full line.
+        save = data.pop()
+        for line in data:
+            yield line
+        data = uzip.decompress(fh.read(blocksize)).split("\n")
+        # first line is prepended with saved chunk from end of last set.
+        data[0] = save + data[0]
diff --git a/goatools/gaf_reader.py b/goatools/gaf_reader.py
index 25860825..fe059913 100755
--- a/goatools/gaf_reader.py
+++ b/goatools/gaf_reader.py
@@ -10,6 +10,7 @@
 import sys
 import re
 from collections import namedtuple
+from base import nopen
 
 __copyright__ = "Copyright (C) 2016, DV Klopfenstein, H Tang. All rights reserved."
 __author__ = "DV Klopfenstein"
@@ -55,8 +56,10 @@ class GafReader(object):
     # Expected values for a Qualifier
     exp_qualifiers = set(['NOT', 'contributes_to', 'colocalizes_with'])
 
-    def __init__(self, log=sys.stdout):
+    def __init__(self, filename, log=sys.stdout):
+        self.filename = filename
         self.log = log
+        self.associations = self.read_gaf(filename)
 
     def _get_ntgaf(self, ntgafobj, flds, ver):
         """Convert fields from string to preferred format for GAF ver 2.1 and 2.0."""
@@ -114,22 +117,22 @@ def _rd_fld_vals(name, val, set_list_ft=True, qty_min=0, qty_max=None):
         return vals if set_list_ft else set(vals)
 
     def read_gaf(self, fin_gaf):
-        """Read GAF file."""
+        """Read GAF file. HTTP address okay. GZIPPED/BZIPPED file okay."""
         ga_lst = []
-        with open(fin_gaf) as ifstrm:
-            ver = None
-            ntgafobj = None
-            exp_numcol = None
-            for line in ifstrm:
-                if ntgafobj is not None and not line.startswith('!'):
-                    flds = self._split_line(line, exp_numcol)
-                    ntgaf = self._get_ntgaf(ntgafobj, flds, ver)
-                    ga_lst.append(ntgaf)
-                elif ntgafobj is None and line.startswith('!gaf-version:'):
-                    ver = line[13:].strip()
-                    ntgafobj = namedtuple("ntgafobj", " ".join(self.gaf_columns[ver]))
-                    exp_numcol = self.gaf_numcol[ver]
-            self.log.write("  READ {N} items: {FIN}\n".format(N=len(ga_lst), FIN=fin_gaf))
+        ifstrm = nopen(fin_gaf)
+        ver = None
+        ntgafobj = None
+        exp_numcol = None
+        for line in ifstrm:
+            if ntgafobj is not None and not line.startswith('!'):
+                flds = self._split_line(line, exp_numcol)
+                ntgaf = self._get_ntgaf(ntgafobj, flds, ver)
+                ga_lst.append(ntgaf)
+            elif ntgafobj is None and line.startswith('!gaf-version:'):
+                ver = line[13:].strip()
+                ntgafobj = namedtuple("ntgafobj", " ".join(self.gaf_columns[ver]))
+                exp_numcol = self.gaf_numcol[ver]
+        self.log.write("  READ {N} items: {FIN}\n".format(N=len(ga_lst), FIN=fin_gaf))
         return ga_lst
 
     @staticmethod
diff --git a/goatools/obo_parser.py b/goatools/obo_parser.py
index ba40e795..daf0e138 100755
--- a/goatools/obo_parser.py
+++ b/goatools/obo_parser.py
@@ -11,8 +11,6 @@
 import os
 import re
 
-import collections as cx
-
 GraphEngines = ("pygraphviz", "pydot")
 
 __copyright__ = "Copyright (C) 2010-2016, H Tang et al., All rights reserved."
diff --git a/goatools/semantic.py b/goatools/semantic.py
new file mode 100644
index 00000000..16341775
--- /dev/null
+++ b/goatools/semantic.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+"""
+Compute semantic similarities between GO terms. Borrowed from book chapter from
+Christophe Dessimoz (thanks). For details, please check out:
+notebooks/semantic_similarity.ipynb
+"""
+
+import math
+from collections import Counter
+
+
+class TermCounts:
+    '''
+        TermCounts counts the term counts for each
+    '''
+    def __init__(self, go, annots):
+        '''
+            Initialise the counts and
+        '''
+        # Backup
+        self._go = go
+
+        # Initialise the counters
+        self._counts = Counter()
+        self._aspect_counts = Counter()
+
+        # Fill the counters...
+        self._count_terms(go, annots)
+
+    def _count_terms(self, go, annots):
+        '''
+            Fills in the counts and overall aspect counts.
+        '''
+        for gene, terms in annots.items():
+            # Make a union of all the terms for a gene, if term parents are
+            # propagated but they won't get double-counted for the gene
+            allterms = set(terms)
+            for go_id in terms:
+                allterms |= go[go_id].get_all_parents()
+            for p in allterms:
+                self._counts[p] += 1
+
+        for go_id, c in self._counts.items():
+            # Group by namespace
+            namespace = go[go_id].namespace
+            self._aspect_counts[namespace] += c
+
+    def get_count(self, go_id):
+        '''
+            Returns the count of that GO term observed in the annotations.
+        '''
+        return self._counts[go_id]
+
+    def get_total_count(self, aspect):
+        '''
+            Gets the total count that's been precomputed.
+        '''
+        return self._aspect_counts[aspect]
+
+    def get_term_freq(self, go_id):
+        '''
+            Returns the frequency at which a particular GO term has
+            been observed in the annotations.
+        '''
+        try:
+            namespace = self._go[go_id].namespace
+            freq = float(self.get_count(go_id)) / float(self.get_total_count(namespace))
+            #print self.get_count(go_id), self.get_total_count(namespace), freq
+        except ZeroDivisionError:
+            freq = 0
+
+        return freq
+
+
+def ic(go_id, termcounts):
+    '''
+        Calculates the information content of a GO term.
+    '''
+    # Get the observed frequency of the GO term
+    freq = termcounts.get_term_freq(go_id)
+
+    # Calculate the information content (i.e., -log("freq of GO term")
+    return -1.0 * math.log(freq) if freq else 0
+
+
+def resnik_sim(go_id1, go_id2, go, termcounts):
+    '''
+        Computes Resnik's similarity measure.
+    '''
+    msca = deepest_common_ancestor([go_id1, go_id2], go)
+    return ic(msca, termcounts)
+
+
+def lin_sim(go_id1, go_id2, go, termcounts):
+    '''
+        Computes Lin's similarity measure.
+    '''
+    sim_r = resnik_sim(go_id1, go_id2, go, termcounts)
+
+    return (-2*sim_r)/(ic(go_id1, termcounts) + ic(go_id2, termcounts))
+
+
+def common_parent_go_ids(terms, go):
+    '''
+        This function finds the common ancestors in the GO
+        tree of the list of terms in the input.
+    '''
+    # Find candidates from first
+    rec = go[terms[0]]
+    candidates = rec.get_all_parents()
+    candidates.update({terms[0]})
+
+    # Find intersection with second to nth term
+    for term in terms[1:]:
+        rec = go[term]
+        parents = rec.get_all_parents()
+        parents.update({term})
+
+        # Find the intersection with the candidates, and update.
+        candidates.intersection_update(parents)
+
+    return candidates
+
+
+def deepest_common_ancestor(terms, go):
+    '''
+        This function gets the nearest common ancestor
+        using the above function.
+        Only returns single most specific - assumes unique exists.
+    '''
+    # Take the element at maximum depth.
+    return max(common_parent_go_ids(terms, go), key=lambda t: go[t].depth)
+
+
+def min_branch_length(go_id1, go_id2, go):
+    '''
+        Finds the minimum branch length between two terms in the GO DAG.
+    '''
+    # First get the deepest common ancestor
+    dca = deepest_common_ancestor([go_id1, go_id2], go)
+
+    # Then get the distance from the DCA to each term
+    dca_depth = go[dca].depth
+    d1 = go[go_id1].depth - dca_depth
+    d2 = go[go_id2].depth - dca_depth
+
+    # Return the total distance - i.e., to the deepest common ancestor and back.
+    return d1 + d2
+
+
+def semantic_distance(go_id1, go_id2, go):
+    '''
+        Finds the semantic distance (minimum number of connecting branches)
+        between two GO terms.
+    '''
+    return min_branch_length(go_id1, go_id2, go)
+
+
+def semantic_similarity(go_id1, go_id2, go):
+    '''
+        Finds the semantic similarity (inverse of the semantic distance)
+        between two GO terms.
+    '''
+    return 1.0 / float(semantic_distance(go_id1, go_id2, go))
diff --git a/notebook/annotation_coverage.ipynb b/notebooks/annotation_coverage.ipynb
similarity index 100%
rename from notebook/annotation_coverage.ipynb
rename to notebooks/annotation_coverage.ipynb
diff --git a/notebook/cell_cycle.ipynb b/notebooks/cell_cycle.ipynb
similarity index 100%
rename from notebook/cell_cycle.ipynb
rename to notebooks/cell_cycle.ipynb
diff --git a/notebook/goea_nbt3102.ipynb b/notebooks/goea_nbt3102.ipynb
similarity index 100%
rename from notebook/goea_nbt3102.ipynb
rename to notebooks/goea_nbt3102.ipynb
diff --git a/notebook/images/README.md b/notebooks/images/README.md
similarity index 100%
rename from notebook/images/README.md
rename to notebooks/images/README.md
diff --git a/notebook/images/nbt3102_CC.png b/notebooks/images/nbt3102_CC.png
similarity index 100%
rename from notebook/images/nbt3102_CC.png
rename to notebooks/images/nbt3102_CC.png
diff --git a/notebook/images/nbt3102_MF_RNA_Symbols.png b/notebooks/images/nbt3102_MF_RNA_Symbols.png
similarity index 100%
rename from notebook/images/nbt3102_MF_RNA_Symbols.png
rename to notebooks/images/nbt3102_MF_RNA_Symbols.png
diff --git a/notebook/images/nbt3102_MF_RNA_genecnt.png b/notebooks/images/nbt3102_MF_RNA_genecnt.png
similarity index 100%
rename from notebook/images/nbt3102_MF_RNA_genecnt.png
rename to notebooks/images/nbt3102_MF_RNA_genecnt.png
diff --git a/notebook/makefile b/notebooks/makefile
similarity index 100%
rename from notebook/makefile
rename to notebooks/makefile
diff --git a/notebook/report_depth_level.ipynb b/notebooks/report_depth_level.ipynb
similarity index 100%
rename from notebook/report_depth_level.ipynb
rename to notebooks/report_depth_level.ipynb
diff --git a/notebooks/semantic_similarity.ipynb b/notebooks/semantic_similarity.ipynb
new file mode 100644
index 00000000..9d826a86
--- /dev/null
+++ b/notebooks/semantic_similarity.ipynb
@@ -0,0 +1,268 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Computing basic semantic similarities between GO terms"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Adapted from book chapter written by _Christophe Dessimoz_"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this section we look at how to compute semantic similarity between GO terms. First we need to write a function that calculates the minimum number of branches connecting two GO terms."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "load obo file ../go-basic.obo\n",
+      "46268"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "../go-basic.obo: format-version(1.2) data-version(releases/2016-03-19)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " nodes imported\n"
+     ]
+    }
+   ],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "import sys\n",
+    "\n",
+    "sys.path.insert(0, \"..\")\n",
+    "from goatools import obo_parser\n",
+    "\n",
+    "go = obo_parser.GODag(\"../go-basic.obo\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "GO:0048364\tlevel-03\tdepth-04\troot development [biological_process] \n",
+      "GO:0044707\tlevel-02\tdepth-02\tsingle-multicellular organism process [biological_process] \n"
+     ]
+    }
+   ],
+   "source": [
+    "go_id3 = 'GO:0048364'\n",
+    "go_id4 = 'GO:0044707'\n",
+    "print(go[go_id3])\n",
+    "print(go[go_id4])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's get all the annotations from arabidopsis."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  READ 211727 items: http://geneontology.org/gene-associations/gene_association.tair.gz\n"
+     ]
+    }
+   ],
+   "source": [
+    "from goatools.associations import read_gaf\n",
+    "\n",
+    "associations = read_gaf(\"http://geneontology.org/gene-associations/gene_association.tair.gz\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we can calculate the semantic distance and semantic similarity, as so:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The semantic similarity between terms GO:0048364 and GO:0044707 is 0.25.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from goatools.semantic import semantic_similarity\n",
+    "\n",
+    "sim = semantic_similarity(go_id3, go_id4, go)\n",
+    "print('The semantic similarity between terms {} and {} is {}.'.format(go_id3, go_id4, sim))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Then we can calculate the information content of the single term, <code>GO:0048364</code>."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Information content (GO:0048364) = 7.75481392334\n"
+     ]
+    }
+   ],
+   "source": [
+    "from goatools.semantic import TermCounts, ic\n",
+    "\n",
+    "# First get the counts of each GO term.\n",
+    "termcounts = TermCounts(go, associations)\n",
+    "\n",
+    "# Calculate the information content\n",
+    "go_id = \"GO:0048364\"\n",
+    "infocontent = ic(go_id, termcounts)\n",
+    "print('Information content ({}) = {}'.format(go_id, infocontent))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Resnik's similarity measure is defined as the information content of the most informative common ancestor. That is, the most specific common parent-term in the GO. Then we can calculate this as follows:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Resnik similarity score (GO:0048364, GO:0044707) = 4.0540784252\n"
+     ]
+    }
+   ],
+   "source": [
+    "from goatools.semantic import resnik_sim\n",
+    "\n",
+    "sim_r = resnik_sim(go_id3, go_id4, go, termcounts)\n",
+    "print('Resnik similarity score ({}, {}) = {}'.format(go_id3, go_id4, sim_r))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Lin's similarity measure is defined as:\n",
+    "$$ \\textrm{sim}_{\\textrm{Lin}}(t_{1}, t_{2}) = \\frac{-2*\\textrm{sim}_{\\textrm{Resnik}}(t_1, t_2)}{IC(t_1) + IC(t_2)} $$"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Then we can calculate this as"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Lin similarity score (GO:0048364, GO:0044707) = -0.607721957763\n"
+     ]
+    }
+   ],
+   "source": [
+    "from goatools.semantic import lin_sim\n",
+    "\n",
+    "sim_l = lin_sim(go_id3, go_id4, go, termcounts)\n",
+    "print('Lin similarity score ({}, {}) = {}'.format(go_id3, go_id4, sim_l))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}