From b206470f9ecd71b006a37dd1298dd3d9e3dd46dd Mon Sep 17 00:00:00 2001 From: martinRenou Date: Thu, 28 Jul 2022 16:24:52 +0200 Subject: [PATCH] GHSL-2021-1017, GHSL-2021-1020, GHSL-2021-1021 --- nbconvert/exporters/html.py | 10 +++ nbconvert/exporters/templateexporter.py | 5 +- .../tests/files/notebook_inject.ipynb | 63 +++++++++++++++++++ nbconvert/exporters/tests/test_html.py | 29 ++++++++- nbconvert/nbconvertapp.py | 9 +++ .../nbconvert/templates/classic/base.html.j2 | 31 +++++++-- .../nbconvert/templates/lab/base.html.j2 | 20 +++++- 7 files changed, 155 insertions(+), 12 deletions(-) diff --git a/nbconvert/exporters/html.py b/nbconvert/exporters/html.py index bf15d3337..b56a2c24f 100644 --- a/nbconvert/exporters/html.py +++ b/nbconvert/exporters/html.py @@ -5,6 +5,7 @@ import base64 import json +from lxml.html.clean import clean_html import mimetypes import os from pathlib import Path @@ -149,6 +150,14 @@ def _template_name_default(self): help="Template specific theme(e.g. the name of a JupyterLab CSS theme distributed as prebuilt extension for the lab template)", ).tag(config=True) + sanitize_html = Bool( + False, + help=( + "Whether the HTML in Markdown cells and cell outputs should be sanitized." + "This should be set to True by nbviewer or similar tools." + ), + ).tag(config=True) + embed_images = Bool( False, help="Whether or not to embed images as base64 in markdown cells." ).tag(config=True) @@ -287,4 +296,5 @@ def resources_include_url(name): resources["jupyter_widgets_base_url"] = self.jupyter_widgets_base_url resources["widget_renderer_url"] = self.widget_renderer_url resources["html_manager_semver_range"] = self.html_manager_semver_range + resources["should_sanitize_html"] = self.sanitize_html return resources diff --git a/nbconvert/exporters/templateexporter.py b/nbconvert/exporters/templateexporter.py index 036cfb648..472549920 100644 --- a/nbconvert/exporters/templateexporter.py +++ b/nbconvert/exporters/templateexporter.py @@ -70,10 +70,9 @@ "get_metadata": filters.get_metadata, "convert_pandoc": filters.convert_pandoc, "json_dumps": json.dumps, - # browsers will parse , closing a script tag early - # Since JSON allows escaping forward slash, this will still be parsed by JSON - "escape_html_script": lambda x: x.replace("", "<\\/script>"), + # For removing any HTML "escape_html": html.escape, + # For sanitizing HTML for any XSS "clean_html": clean_html, "strip_trailing_newline": filters.strip_trailing_newline, "text_base64": filters.text_base64, diff --git a/nbconvert/exporters/tests/files/notebook_inject.ipynb b/nbconvert/exporters/tests/files/notebook_inject.ipynb index fd5e94bba..d88562564 100644 --- a/nbconvert/exporters/tests/files/notebook_inject.ipynb +++ b/nbconvert/exporters/tests/files/notebook_inject.ipynb @@ -191,6 +191,69 @@ } ], "source": [""] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "2616e107", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import os; os.system('touch /tmp/pwned')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3616e107", + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import os; os.system('touch /tmp/pwned')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "4616e107", + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "alert('application/javascript output')" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import os; os.system('touch /tmp/pwned')" + ] } ], "metadata": { diff --git a/nbconvert/exporters/tests/test_html.py b/nbconvert/exporters/tests/test_html.py index 6c44813d6..6a43c709d 100644 --- a/nbconvert/exporters/tests/test_html.py +++ b/nbconvert/exporters/tests/test_html.py @@ -137,7 +137,9 @@ def test_basic_name(self): def test_javascript_injection(self): for template in ["lab", "classic", "reveal"]: - (output, resources) = HTMLExporter(template_name=template).from_filename(self._get_notebook('notebook_inject.ipynb')) + (output, resources) = HTMLExporter( + template_name=template + ).from_filename(self._get_notebook('notebook_inject.ipynb')) # Check injection in the metadata.title of the Notebook assert "" not in output @@ -150,7 +152,6 @@ def test_javascript_injection(self): # Check injection in the cell.source of the Notebook assert "" not in output - assert "" not in output # Check injection in svg output assert "" not in output @@ -170,3 +171,27 @@ def test_javascript_injection(self): # Check injection in widget view assert "" in output + assert "" in output + assert "" in output + assert "alert('application/javascript output')" in output + + # But it's an opt-out + for template in ["lab", "classic", "reveal"]: + (output, resources) = HTMLExporter( + template_name=template, + sanitize_html=True + ).from_filename(self._get_notebook('notebook_inject.ipynb')) + + assert "" not in output + assert "" not in output + assert "" not in output + assert "alert('application/javascript output')" not in output diff --git a/nbconvert/nbconvertapp.py b/nbconvert/nbconvertapp.py index 16ff275ec..65638e7ba 100755 --- a/nbconvert/nbconvertapp.py +++ b/nbconvert/nbconvertapp.py @@ -65,6 +65,7 @@ def validate(self, obj, value): "template": "TemplateExporter.template_name", "template-file": "TemplateExporter.template_file", "theme": "HTMLExporter.theme", + "sanitize_html": "HTMLExporter.sanitize_html", "writer": "NbConvertApp.writer_class", "post": "NbConvertApp.postprocessor_class", "output": "NbConvertApp.output_base", @@ -178,6 +179,14 @@ def validate(self, obj, value): }, """Embed the images as base64 dataurls in the output. This flag is only useful for the HTML/WebPDF/Slides exports.""", ), + "sanitize-html": ( + { + "HTMLExporter": { + "sanitize_html": True, + } + }, + """Whether the HTML in Markdown cells and cell outputs should be sanitized..""", + ), } ) diff --git a/share/jupyter/nbconvert/templates/classic/base.html.j2 b/share/jupyter/nbconvert/templates/classic/base.html.j2 index 432f28ace..1e0bba0f9 100644 --- a/share/jupyter/nbconvert/templates/classic/base.html.j2 +++ b/share/jupyter/nbconvert/templates/classic/base.html.j2 @@ -81,7 +81,12 @@ {%- endif -%}
-{{ cell.source | markdown2html | strip_files_prefix | clean_html }} +{%- if resources.should_sanitize_html %} +{%- set html_value=cell.source | markdown2html | strip_files_prefix | clean_html -%} +{%- else %} +{%- set html_value=cell.source | markdown2html | strip_files_prefix -%} +{%- endif %} +{{ html_value }}
@@ -133,23 +138,33 @@ unknown type {{ cell.type }} {% block data_html scoped -%}
+{%- if resources.should_sanitize_html %} +{%- set html_value=output.data['text/html'] | clean_html -%} +{%- else %} +{%- set html_value=output.data['text/html'] -%} +{%- endif %} {%- if output.get('metadata', {}).get('text/html', {}).get('isolated') -%} {%- else -%} -{{ output.data['text/html'] }} +{{ html_value }} {%- endif -%}
{%- endblock data_html %} {% block data_markdown scoped -%} +{%- if resources.should_sanitize_html %} +{%- set html_value=output.data['text/markdown'] | markdown2html | clean_html -%} +{%- else %} +{%- set html_value=output.data['text/markdown'] | markdown2html -%} +{%- endif %}
-{{ output.data['text/markdown'] | markdown2html }} +{{ html_value }}
{%- endblock data_markdown %} @@ -234,14 +249,17 @@ alt="{{ alttext | escape_html }}" {%- block data_javascript scoped %} {% set div_id = uuid4() %}
+{%- if not resources.should_sanitize_html %} +{%- endif %}
{%- endblock -%} {%- block data_widget_view scoped %} +{%- if not resources.should_sanitize_html %} {% set div_id = uuid4() %} {% set datatype_list = output.data | filter_data_type %} {% set datatype = datatype_list[0]%} @@ -253,14 +271,17 @@ var element = $('#{{ div_id }}'); {{ output.data[datatype] | json_dumps | escape_html }} +{%- endif %} {%- endblock data_widget_view -%} {%- block footer %} +{%- if not resources.should_sanitize_html %} {% set mimetype = 'application/vnd.jupyter.widget-state+json'%} {% if mimetype in nb.metadata.get("widgets",{})%} {% endif %} +{%- endif %} {{ super() }} {%- endblock footer-%} diff --git a/share/jupyter/nbconvert/templates/lab/base.html.j2 b/share/jupyter/nbconvert/templates/lab/base.html.j2 index ea56cda9e..82a538bf7 100644 --- a/share/jupyter/nbconvert/templates/lab/base.html.j2 +++ b/share/jupyter/nbconvert/templates/lab/base.html.j2 @@ -98,7 +98,12 @@ {{ self.empty_in_prompt() }} {%- endif -%}
-{{ cell.source | markdown2html | strip_files_prefix | clean_html }} +{%- if resources.should_sanitize_html %} +{%- set html_value=cell.source | markdown2html | strip_files_prefix | clean_html -%} +{%- else %} +{%- set html_value=cell.source | markdown2html | strip_files_prefix -%} +{%- endif %} +{{ html_value }}
@@ -161,13 +166,22 @@ unknown type {{ cell.type }} {% block data_html scoped -%}
+{%- if resources.should_sanitize_html %} +{{ output.data['text/html'] | clean_html }} +{%- else %} {{ output.data['text/html'] }} +{%- endif %}
{%- endblock data_html %} {% block data_markdown scoped -%} +{%- if resources.should_sanitize_html %} +{%- set html_value=output.data['text/markdown'] | markdown2html | clean_html -%} +{%- else %} +{%- set html_value=output.data['text/markdown'] | markdown2html -%} +{%- endif %}
-{{ output.data['text/markdown'] | markdown2html }} +{{ html_value }}
{%- endblock data_markdown %} @@ -266,10 +280,12 @@ jp-needs-dark-background {% set div_id = uuid4() %} {%- block data_javascript scoped %}
+{%- if not resources.should_sanitize_html %} +{%- endif %}
{%- endblock -%}