make tutorial 10 testable (deepset-ai#16)

* make tutorial 10 testable * add tutorial 10 to the nightly
blancadesal · Sep 16, 2022 · 3bee7bb · 3bee7bb
1 parent 823cd74
commit 3bee7bb
Show file tree

Hide file tree

Showing 3 changed files with 65 additions and 95 deletions.
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
@@ -28,6 +28,7 @@ jobs:
  - 05_Evaluation
  - 06_Better_Retrieval_via_Embedding_Retrieval
  - 07_RAG_Generator
+ - 10_Knowledge_Graph
 
  env:
  ELASTICSEARCH_HOST: "elasticsearch"

diff --git a/markdowns/10.md b/markdowns/10.md
@@ -15,14 +15,14 @@ Haystack allows storing and querying knowledge graphs with the help of pre-train
 This tutorial demonstrates how to load an existing knowledge graph into haystack, load a pre-trained retriever, and execute text queries on the knowledge graph.
 The training of models that translate text queries into SPARQL queries is currently not supported.
 
+To start, install the latest release of Haystack with `pip`:
 
-```python
-# Install the latest release of Haystack in your own environment
-#! pip install farm-haystack
 
-# Install the latest main of Haystack
-!pip install --upgrade pip
-!pip install git+https://github.com/deepset-ai/haystack.git#egg=farm-haystack[colab,inmemorygraph]
+```bash
+%%bash
+
+pip install --upgrade pip
+pip install git+https://github.com/deepset-ai/haystack.git#egg=farm-haystack[colab,inmemorygraph]
 ```
 
 ## Logging
@@ -40,23 +40,13 @@ logging.basicConfig(format="%(levelname)s - %(name)s - %(message)s", level=logg
 logging.getLogger("haystack").setLevel(logging.INFO)
 ```
 
+## Downloading Knowledge Graph and Model
 
-```python
-# Here are some imports that we'll need
-
-import subprocess
-import time
-from pathlib import Path
 
-from haystack.nodes import Text2SparqlRetriever
-from haystack.document_stores import InMemoryKnowledgeGraph
+```python
 from haystack.utils import fetch_archive_from_http
-```
-
-## Downloading Knowledge Graph and Model
 
 
-```python
 # Let's first fetch some triples that we want to store in our knowledge graph
 # Here: exemplary triples from the wizarding world
 graph_dir = "data/tutorial10"
@@ -79,6 +69,11 @@ Currently, Haystack supports two alternative implementations for knowledge graph
 
 
 ```python
+from pathlib import Path
+
+from haystack.document_stores import InMemoryKnowledgeGraph
+
+
 # Initialize a in memory knowledge graph and use "tutorial_10_index" as the name of the index
 kg = InMemoryKnowledgeGraph(index="tutorial_10_index")
 
@@ -98,12 +93,14 @@ print(f"There are {len(kg.get_all_triples())} triples stored in the knowledge gr
 
 #### Launching a GraphDB instance
 
+Unfortunately, there seems to be no good way to run GraphDB in colab environments.
+In your local environment, you could start a GraphDB server with docker, feel free to check GraphDB's website for the free version https://www.ontotext.com/products/graphdb/graphdb-free/
+
 
 ```python
-# # Unfortunately, there seems to be no good way to run GraphDB in colab environments
-# # In your local environment, you could start a GraphDB server with docker
-# # Feel free to check GraphDB's website for the free version https://www.ontotext.com/products/graphdb/graphdb-free/
 # import os
+# import subprocess
+# import time
 
 # LAUNCH_GRAPHDB = os.environ.get("LAUNCH_GRAPHDB", False)
 
@@ -157,6 +154,9 @@ print(f"There are {len(kg.get_all_triples())} triples stored in the knowledge gr
 
 
 ```python
+from haystack.nodes import Text2SparqlRetriever
+
+
 # Load a pre-trained model that translates text queries to SPARQL queries
 kgqa_retriever = Text2SparqlRetriever(knowledge_graph=kg, model_name_or_path=Path(model_dir) / "hp_v3.4")
 ```

diff --git a/tutorials/10_Knowledge_Graph.ipynb b/tutorials/10_Knowledge_Graph.ipynb
@@ -14,7 +14,9 @@
  "\n",
  "Haystack allows storing and querying knowledge graphs with the help of pre-trained models that translate text queries to SPARQL queries.\n",
  "This tutorial demonstrates how to load an existing knowledge graph into haystack, load a pre-trained retriever, and execute text queries on the knowledge graph.\n",
- "The training of models that translate text queries into SPARQL queries is currently not supported."
+ "The training of models that translate text queries into SPARQL queries is currently not supported.\n",
+ "\n",
+ "To start, install the latest release of Haystack with `pip`:"
  ]
  },
  {
@@ -27,75 +29,51 @@
  },
  "pycharm": {
  "name": "#%%\n"
+ },
+ "vscode": {
+ "languageId": "shellscript"
  }
  },
  "outputs": [],
  "source": [
- "# Install the latest release of Haystack in your own environment\n",
- "#! pip install farm-haystack\n",
+ "%%bash\n",
  "\n",
- "# Install the latest main of Haystack\n",
- "!pip install --upgrade pip\n",
- "!pip install git+https://github.com/deepset-ai/haystack.git#egg=farm-haystack[colab,inmemorygraph]"
+ "pip install --upgrade pip\n",
+ "pip install git+https://github.com/deepset-ai/haystack.git#egg=farm-haystack[colab,inmemorygraph]"
  ]
  },
  {
  "cell_type": "markdown",
- "source": [
- "## Logging\n",
- "\n",
- "We configure how logging messages should be displayed and which log level should be used before importing Haystack.\n",
- "Example log message:\n",
- "INFO - haystack.utils.preprocessing - Converting data/tutorial1/218_Olenna_Tyrell.txt\n",
- "Default log level in basicConfig is WARNING so the explicit parameter is not necessary but can be changed easily:"
- ],
  "metadata": {
  "collapsed": false,
  "pycharm": {
  "name": "#%% md\n"
  }
- }
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "outputs": [],
+ },
  "source": [
- "import logging\n",
+ "## Logging\n",
  "\n",
- "logging.basicConfig(format=\"%(levelname)s - %(name)s - %(message)s\", level=logging.WARNING)\n",
- "logging.getLogger(\"haystack\").setLevel(logging.INFO)"
- ],
- "metadata": {
- "collapsed": false,
- "pycharm": {
- "name": "#%%\n"
- }
- }
+ "We configure how logging messages should be displayed and which log level should be used before importing Haystack.\n",
+ "Example log message:\n",
+ "INFO - haystack.utils.preprocessing - Converting data/tutorial1/218_Olenna_Tyrell.txt\n",
+ "Default log level in basicConfig is WARNING so the explicit parameter is not necessary but can be changed easily:"
+ ]
  },
  {
  "cell_type": "code",
  "execution_count": null,
  "metadata": {
  "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- },
  "pycharm": {
  "name": "#%%\n"
  }
  },
  "outputs": [],
  "source": [
- "# Here are some imports that we'll need\n",
- "\n",
- "import subprocess\n",
- "import time\n",
- "from pathlib import Path\n",
+ "import logging\n",
  "\n",
- "from haystack.nodes import Text2SparqlRetriever\n",
- "from haystack.document_stores import InMemoryKnowledgeGraph\n",
- "from haystack.utils import fetch_archive_from_http"
+ "logging.basicConfig(format=\"%(levelname)s - %(name)s - %(message)s\", level=logging.WARNING)\n",
+ "logging.getLogger(\"haystack\").setLevel(logging.INFO)"
  ]
  },
  {
@@ -123,6 +101,9 @@
  },
  "outputs": [],
  "source": [
+ "from haystack.utils import fetch_archive_from_http\n",
+ "\n",
+ "\n",
  "# Let's first fetch some triples that we want to store in our knowledge graph\n",
  "# Here: exemplary triples from the wizarding world\n",
  "graph_dir = \"data/tutorial10\"\n",
@@ -160,19 +141,15 @@
  },
  {
  "cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
  "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "The last triple stored in the knowledge graph is: {'s': {'type': 'uri', 'value': 'https://deepset.ai/harry_potter/Harry_potter'}, 'p': {'type': 'uri', 'value': 'https://deepset.ai/harry_potter/family'}, 'o': {'type': 'uri', 'value': 'https://deepset.ai/harry_potter/Dudley_dursleys_children'}}\n",
- "There are 118543 triples stored in the knowledge graph.\n"
- ]
- }
- ],
+ "outputs": [],
  "source": [
+ "from pathlib import Path\n",
+ "\n",
+ "from haystack.document_stores import InMemoryKnowledgeGraph\n",
+ "\n",
+ "\n",
  "# Initialize a in memory knowledge graph and use \"tutorial_10_index\" as the name of the index\n",
  "kg = InMemoryKnowledgeGraph(index=\"tutorial_10_index\")\n",
  "\n",
@@ -206,7 +183,10 @@
  }
  },
  "source": [
- "#### Launching a GraphDB instance"
+ "#### Launching a GraphDB instance\n",
+ "\n",
+ "Unfortunately, there seems to be no good way to run GraphDB in colab environments.\n",
+ "In your local environment, you could start a GraphDB server with docker, feel free to check GraphDB's website for the free version https://www.ontotext.com/products/graphdb/graphdb-free/"
  ]
  },
  {
@@ -223,10 +203,9 @@
  },
  "outputs": [],
  "source": [
- "# # Unfortunately, there seems to be no good way to run GraphDB in colab environments\n",
- "# # In your local environment, you could start a GraphDB server with docker\n",
- "# # Feel free to check GraphDB's website for the free version https://www.ontotext.com/products/graphdb/graphdb-free/\n",
  "# import os\n",
+ "# import subprocess\n",
+ "# import time\n",
  "\n",
  "# LAUNCH_GRAPHDB = os.environ.get(\"LAUNCH_GRAPHDB\", False)\n",
  "\n",
@@ -322,6 +301,9 @@
  "metadata": {},
  "outputs": [],
  "source": [
+ "from haystack.nodes import Text2SparqlRetriever\n",
+ "\n",
+ "\n",
  "# Load a pre-trained model that translates text queries to SPARQL queries\n",
  "kgqa_retriever = Text2SparqlRetriever(knowledge_graph=kg, model_name_or_path=Path(model_dir) / \"hp_v3.4\")"
  ]
@@ -344,7 +326,7 @@
  },
  {
  "cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
  "metadata": {
  "collapsed": false,
  "jupyter": {
@@ -354,20 +336,7 @@
  "name": "#%%\n"
  }
  },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Translating the text query \"In which house is Harry Potter?\" to a SPARQL query and executing it on the knowledge graph...\n",
- "[{'answer': ['https://deepset.ai/harry_potter/Gryffindor'], 'prediction_meta': {'model': 'Text2SparqlRetriever', 'sparql_query': 'select ?a { hp:Harry_potter hp:house ?a . }'}}]\n",
- "Executing a SPARQL query with prefixed names of resources...\n",
- "(['https://deepset.ai/harry_potter/Rubeus_hagrid', 'https://deepset.ai/harry_potter/Ogg'], 'select distinct ?sbj where { ?sbj hp:job hp:Keeper_of_keys_and_grounds . }')\n",
- "Executing a SPARQL query with full names of resources...\n",
- "(['https://deepset.ai/harry_potter/Otter'], 'select distinct ?obj where { <https://deepset.ai/harry_potter/Hermione_granger> <https://deepset.ai/harry_potter/patronus> ?obj . }')\n"
- ]
- }
- ],
+ "outputs": [],
  "source": [
  "query = \"In which house is Harry Potter?\"\n",
  "print(f'Translating the text query \"{query}\" to a SPARQL query and executing it on the knowledge graph...')\n",
@@ -418,7 +387,7 @@
  ],
  "metadata": {
  "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
+ "display_name": "Python 3.10.6 64-bit",
  "language": "python",
  "name": "python3"
  },
@@ -432,11 +401,11 @@
  "name": "python",
  "nbconvert_exporter": "python",
  "pygments_lexer": "ipython3",
- "version": "3.10.4"
+ "version": "3.10.6"
  },
  "vscode": {
  "interpreter": {
- "hash": "d6fc774dec8e6d4d8b6a5562b41269a570ea5456d1c03f28da35966a9134f033"
+ "hash": "bda33b16be7e844498c7c2d368d72665b4f1d165582b9547ed22a0249a29ca2e"
  }
  }
  },