{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Calculating Wang's Semantic Similarity between two GO Terms\n", "\n", "1. Setup\n", "2. Calculate Wang's semantic similarity using optional *part_of* relationship\n", "3. Calculate Wang's semantic similarity using researcher-set edge weights. \n", "4. Calculate Wang's semantic similarity usint *is_of* relationship only\n", "\n", "## 1) Setup\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create a list of all GO IDs that will be compared" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Researcher-provided GO terms related to smell\n", "go_a = 'GO:0007608'\n", "go_b = 'GO:0050911'\n", "go_c = 'GO:0042221'\n", "\n", "# Optional relationships. (Relationship, is_a, is required and always used)\n", "relationships = {'part_of'}" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "goids = {go_a, go_b, go_c}\n", "\n", "# Annotations for plotting\n", "go2txt = {\n", " go_a:'GO TERM A',\n", " go_b:'GO TERM B',\n", " go_c:'GO TERM C'}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Read the GO DAG" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " EXISTS: go-basic.obo\n", "go-basic.obo: fmt(1.2) rel(2020-11-18) 47,326 GO Terms; optional_attrs(relationship)\n" ] } ], "source": [ "from goatools.base import get_godag\n", "\n", "godag = get_godag(\"go-basic.obo\", optional_attrs={'relationship'})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create a printing function for this notebook" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def print_details(go_a, go_b, val):\n", " \"\"\"Print concise and informative report: GO terms and their semantic similarity\"\"\"\n", " pattern = ('go_a: {GOa} {GOa_name}\\n'\n", " 'go_b: {GOb} {GOb_name}\\n'\n", " 'wang: {VAL:.8f}\\n')\n", " print(pattern.format(\n", " GOa=go_a, GOa_name=godag[go_a].name,\n", " GOb=go_b, GOb_name=godag[go_b].name,\n", " VAL=val))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2) Calculate Wang's semantic similarity using *part_of* relationship" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Instantiate a Wang's Semantic Similarity object\n", "We choose to use the optional relationship, *part_of*, in addition the required *is_a* relationships for this example. \n", "Load all GO IDs that you will be comparing." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "from goatools.semsim.termwise.wang import SsWang\n", "\n", "# goids: researcher-provided GO terms\n", "wang_r1 = SsWang(goids, godag, relationships)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Visualize researcher GO terms" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " GoSubDag: 3 sources in 14 GOs rcnt(True). 0 alt GO IDs\n", " GoSubDag: namedtuple fields: NS level depth reldepth GO alt GO_name dcnt D1 childcnt REL REL_short rel id\n", " GoSubDag: relationships: {'part_of'}\n", " 3 usr 14 GOs WROTE: smell_r1.png\n" ] } ], "source": [ "from goatools.gosubdag.gosubdag import GoSubDag\n", "from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot\n", "\n", "r1_png = 'smell_r1.png'\n", "r1_gosubdag = GoSubDag(goids, godag, relationships)\n", "GoSubDagPlot(r1_gosubdag, go2txt=go2txt).plt_dag(r1_png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![smell_r1](images/smell_r1.png)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "go_a: GO:0007608 sensory perception of smell\n", "go_b: GO:0050911 detection of chemical stimulus involved in sensory perception of smell\n", "wang: 0.59684773\n", "\n" ] } ], "source": [ "val = wang_r1.get_sim(go_a, go_b)\n", "print_details(go_a, go_b, val)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "go_a: GO:0007608 sensory perception of smell\n", "go_b: GO:0042221 response to chemical\n", "wang: 0.14114914\n", "\n" ] } ], "source": [ "val = wang_r1.get_sim(go_a, go_c)\n", "print_details(go_a, go_c, val)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "go_a: GO:0050911 detection of chemical stimulus involved in sensory perception of smell\n", "go_b: GO:0042221 response to chemical\n", "wang: 0.38020047\n", "\n" ] } ], "source": [ "val = wang_r1.get_sim(go_b, go_c)\n", "print_details(go_b, go_c, val)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3) Calculate Wang's semantic similarity using researcher-set edge weights\n", "\n", "### Print the default edge weights in the Wang configuration" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Wang Semantic Similarity Configuration:\n", " Optional relationships: part_of\n", " Edge weights:\n", " 0.80000000 is_a\n", " 0.60000000 part_of\n", "\n" ] } ], "source": [ "wang_r1.prt_cfg()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Use researcher-specified edge weights" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "relationship2weight = {\n", " 'is_a': 0.9,\n", " 'part_of': 0.9\n", "}\n", "wang_r1 = SsWang(goids, godag, relationships, relationship2weight)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "go_a: GO:0007608 sensory perception of smell\n", "go_b: GO:0050911 detection of chemical stimulus involved in sensory perception of smell\n", "wang: 0.64019253\n", "\n" ] } ], "source": [ "val = wang_r1.get_sim(go_a, go_b)\n", "print_details(go_a, go_b, val)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "go_a: GO:0007608 sensory perception of smell\n", "go_b: GO:0042221 response to chemical\n", "wang: 0.16922363\n", "\n" ] } ], "source": [ "val = wang_r1.get_sim(go_a, go_c)\n", "print_details(go_a, go_c, val)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "go_a: GO:0050911 detection of chemical stimulus involved in sensory perception of smell\n", "go_b: GO:0042221 response to chemical\n", "wang: 0.35627471\n", "\n" ] } ], "source": [ "val = wang_r1.get_sim(go_b, go_c)\n", "print_details(go_b, go_c, val)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4) Calculate Wang's semantic similarity with *is_a* relationships only" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "wang_r0 = SsWang(goids, godag)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Calculate Wang's semantic similarity using *is_a* relationship only" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " GoSubDag: 3 sources in 14 GOs rcnt(True). 0 alt GO IDs\n", " GoSubDag: namedtuple fields: NS level depth GO alt GO_name dcnt D1 id\n", " GoSubDag: relationships: set()\n", " 3 usr 14 GOs WROTE: smell_r0.png\n" ] } ], "source": [ "r0_png = 'smell_r0.png'\n", "r0_gosubdag = GoSubDag(goids, godag)\n", "GoSubDagPlot(r0_gosubdag, go2txt=go2txt).plt_dag(r0_png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![smell_r1](images/smell_r0.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Instantiate a Wang's Semantic Similarity object\n", "We use the required *is_a* relationships only. \n", "\n", "Load all GO IDs that you will be comparing." ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "go_a: GO:0007608 sensory perception of smell\n", "go_b: GO:0050911 detection of chemical stimulus involved in sensory perception of smell\n", "wang: 0.07122873\n", "\n" ] } ], "source": [ "val = wang_r0.get_sim(go_a, go_b)\n", "print_details(go_a, go_b, val)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "go_a: GO:0007608 sensory perception of smell\n", "go_b: GO:0042221 response to chemical\n", "wang: 0.14114914\n", "\n" ] } ], "source": [ "val = wang_r0.get_sim(go_a, val = wang_r0.get_sim(go_b, go_c)
print_details(go_b, go_c, val)