Skip to content

Commit

Permalink
Now using GOEnrichmentStudyNS, rather than GOEnrichmentStudy, to run …
Browse files Browse the repository at this point in the history
…separate GOEAs for BP, MF, CC

#127
  • Loading branch information
dvklopfenstein committed May 6, 2019
1 parent 28bc913 commit 040b4d9
Show file tree
Hide file tree
Showing 10 changed files with 544 additions and 614 deletions.
29 changes: 17 additions & 12 deletions notebooks/annotation_coverage.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 1,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -47,15 +47,19 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 20,385 items READ: gene2go\n",
" 11,563 items READ: gene2go\n"
"HMS:0:00:05.295065 269,133 annotations READ: gene2go \n",
"1 taxids stored: 9606\n",
"17617 IDs in association branch, BP\n",
"HMS:0:00:03.443019 102,863 annotations READ: gene2go \n",
"1 taxids stored: 7227\n",
"10440 IDs in association branch, BP\n"
]
}
],
Expand All @@ -75,14 +79,15 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 31,948 items READ: gene2go\n"
"HMS:0:00:05.829100 371,996 annotations READ: gene2go \n",
"2 taxids stored: 7227 9606\n"
]
}
],
Expand All @@ -105,7 +110,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -127,7 +132,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -163,7 +168,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 6,
"metadata": {},
"outputs": [
{
Expand All @@ -172,8 +177,8 @@
"text": [
" taxid GOs GeneIDs Coverage\n",
"------ ------ ------- ----------------------\n",
" 9606 17,975 18,475 88% GO coverage of 20,913 protein-coding genes\n",
" 7227 8,352 10,635 76% GO coverage of 13,919 protein-coding genes\n"
" 9606 0 0 0% GO coverage of 20,913 protein-coding genes\n",
" 7227 0 0 0% GO coverage of 13,919 protein-coding genes\n"
]
}
],
Expand Down Expand Up @@ -216,7 +221,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
"version": "3.6.7"
}
},
"nbformat": 4,
Expand Down
98 changes: 39 additions & 59 deletions notebooks/annotations_gaf_file.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -29,39 +29,14 @@
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--2019-04-08 09:43:16-- http:https://current.geneontology.org/annotations/goa_human.gaf.gz\n",
"Resolving current.geneontology.org (current.geneontology.org)... 143.204.145.94, 143.204.145.35, 143.204.145.220, ...\n",
"Connecting to current.geneontology.org (current.geneontology.org)|143.204.145.94|:80... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 8524279 (8.1M) [application/gzip]\n",
"Saving to: ‘goa_human.gaf.gz’\n",
"\n",
"goa_human.gaf.gz 100%[===================>] 8.13M 2.05MB/s in 3.9s \n",
"\n",
"utime(goa_human.gaf.gz): Operation not permitted\n",
"2019-04-08 09:43:20 (2.07 MB/s) - ‘goa_human.gaf.gz’ saved [8524279/8524279]\n",
"\n"
]
}
],
"source": [
"!wget http:https://current.geneontology.org/annotations/goa_human.gaf.gz"
]
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"!gunzip goa_human.gaf.gz"
"import os\n",
"if not os.path.exists('goa_human.gaf.gz'):\n",
" !wget http:https://current.geneontology.org/annotations/goa_human.gaf.gz\n",
" !gunzip goa_human.gaf.gz"
]
},
{
Expand All @@ -73,14 +48,14 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" READ 476,348 associations: goa_human.gaf\n"
"HMS:0:00:13.490551 424,966 annotations READ: goa_human.gaf \n"
]
}
],
Expand All @@ -95,43 +70,48 @@
"metadata": {},
"source": [
"## 3) Get Annotations\n",
"The annotations will be stored in a dict where:\n",
"The annotations will be stored in three dicts, one for each GODAG branch, where:\n",
" * the key is the protein ID and \n",
" * the value is a list of GO IDs associated with the protein."
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"id2gos = ogaf.read_gaf()"
"ns2assc = ogaf.get_ns2assc()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"A0A024R161 : GO:0003924 GO:0005515 GO:0005834 GO:0007186\n",
"\n",
"A0A024RBG1 : GO:0003723 GO:0005829 GO:0008486 GO:0046872 GO:0052840 GO:0052842\n",
"\n",
"A0A075B6H7 : GO:0002377 GO:0005615 GO:0006955\n",
"\n"
"BP A0A075B6H9 : GO:0002250\n",
"BP A0A075B6I0 : GO:0002250\n",
"BP A0A075B6I1 : GO:0002250\n",
"MF A0A024RBG1 : GO:0003723 GO:0008486 GO:0046872 GO:0052840 GO:0052842\n",
"MF A0A075B6H9 : GO:0003823\n",
"MF A0A075B6I0 : GO:0003823\n",
"CC A0A024RBG1 : GO:0005829\n",
"CC A0A075B6H9 : GO:0005886\n",
"CC A0A075B6I0 : GO:0005886\n"
]
}
],
"source": [
"for protein_id, go_ids in sorted(id2gos.items())[:3]:\n",
" print(\"{PROT:7} : {GOs}\\n\".format(\n",
" PROT=protein_id,\n",
" GOs=' '.join(sorted(go_ids))))"
"for namespace, associations in ns2assc.items():\n",
" for protein_id, go_ids in sorted(associations.items())[:3]:\n",
" print(\"{NS} {PROT:7} : {GOs}\".format(\n",
" NS=namespace,\n",
" PROT=protein_id,\n",
" GOs=' '.join(sorted(go_ids))))"
]
},
{
Expand All @@ -146,14 +126,14 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ntgafobj(DB='UniProtKB', DB_ID='A0A024R161', DB_Symbol='DNAJC25-GNG10', Qualifier=[], GO_ID='GO:0005834', DB_Reference={'PMID:21873635'}, Evidence_Code='IBA', With_From={'UniProtKB:P63212', 'FB:FBgn0004921', 'UniProtKB:O14610', 'RGD:621514', 'PANTHER:PTN001418483', 'RGD:69268', 'RGD:1595475'}, Aspect='C', DB_Name={'Guanine nucleotide-binding protein subunit gamma'}, DB_Synonym={'hCG_1994888', 'DNAJC25-GNG10'}, DB_Type='protein', Taxon=[9606], Date=datetime.date(2018, 4, 25), Assigned_By='GO_Central', Annotation_Extension=set(), Gene_Product_Form_ID=set())\n"
"ntgafobj(DB='UniProtKB', DB_ID='A0A024RBG1', DB_Symbol='NUDT4B', Qualifier=set(), GO_ID='GO:0003723', DB_Reference={'GO_REF:0000037'}, Evidence_Code='IEA', With_From={'UniProtKB-KW:KW-0694'}, NS='MF', DB_Name={'Diphosphoinositol polyphosphate phosphohydrolase NUDT4B'}, DB_Synonym={'NUDT4B'}, DB_Type='protein', Taxon=[9606], Date=datetime.date(2019, 4, 6), Assigned_By='UniProt', Extension=None, Gene_Product_Form_ID=set())\n"
]
}
],
Expand Down Expand Up @@ -200,23 +180,23 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"A0A024R161 DNAJC25-GNG10 GO:0005834 IBA 2018-04-25 GO_Central\n",
"A0A024R161 DNAJC25-GNG10 GO:0005515 IBA 2018-04-25 GO_Central\n",
"A0A024R161 DNAJC25-GNG10 GO:0003924 IEA 2019-02-11 InterPro\n",
"A0A024R161 DNAJC25-GNG10 GO:0007186 IEA 2019-02-11 InterPro\n",
"A0A024RBG1 NUDT4B GO:0003723 IEA 2019-04-06 UniProt\n",
"A0A024RBG1 NUDT4B GO:0005829 IDA 2016-12-04 HPA\n",
"A0A024RBG1 NUDT4B GO:0003723 IEA 2019-02-12 UniProt\n",
"A0A024RBG1 NUDT4B GO:0008486 IEA 2019-02-11 UniProt\n",
"A0A024RBG1 NUDT4B GO:0046872 IEA 2019-02-12 UniProt\n",
"A0A024RBG1 NUDT4B GO:0052840 IEA 2019-02-11 UniProt\n",
"A0A024RBG1 NUDT4B GO:0052842 IEA 2019-02-11 UniProt\n"
"A0A024RBG1 NUDT4B GO:0008486 IEA 2019-04-06 UniProt\n",
"A0A024RBG1 NUDT4B GO:0046872 IEA 2019-04-06 UniProt\n",
"A0A024RBG1 NUDT4B GO:0052840 IEA 2019-04-06 UniProt\n",
"A0A024RBG1 NUDT4B GO:0052842 IEA 2019-04-06 UniProt\n",
"A0A075B6H9 IGLV4-69 GO:0002250 IEA 2019-04-06 UniProt\n",
"A0A075B6H9 IGLV4-69 GO:0003823 IEA 2019-04-06 UniProt\n",
"A0A075B6H9 IGLV4-69 GO:0005886 IEA 2019-04-06 UniProt\n",
"A0A075B6I0 IGLV8-61 GO:0002250 IEA 2019-04-06 UniProt\n"
]
}
],
Expand Down Expand Up @@ -250,7 +230,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
"version": "3.6.7"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit 040b4d9

Please sign in to comment.