From 29526c56ec93e70b7e062a874d59afa1f9bc9078 Mon Sep 17 00:00:00 2001 From: Donald Stufft Date: Mon, 7 Sep 2015 12:11:23 -0400 Subject: [PATCH] When URLs are invalid IPv6 URLs drop the attr rather than error --- html5lib/sanitizer.py | 6 +++++- html5lib/tests/test_sanitizer.py | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/html5lib/sanitizer.py b/html5lib/sanitizer.py index 2cef2655..b714e8c9 100644 --- a/html5lib/sanitizer.py +++ b/html5lib/sanitizer.py @@ -207,7 +207,11 @@ def allowed_token(self, token, token_type): unescape(attrs[attr])).lower() # remove replacement characters from unescaped characters val_unescaped = val_unescaped.replace("\ufffd", "") - uri = urlparse.urlparse(val_unescaped) + try: + uri = urlparse.urlparse(val_unescaped) + except ValueError: + uri = None + del attrs[attr] if uri and uri.scheme: if uri.scheme not in self.allowed_protocols: del attrs[attr] diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py index 0507d86b..e98c8c85 100644 --- a/html5lib/tests/test_sanitizer.py +++ b/html5lib/tests/test_sanitizer.py @@ -113,6 +113,11 @@ def test_sanitizer(): "", toxml) + yield (runSanitizerTest, "test_invalid_ipv6_url", + "", + "", + toxml) + yield (runSanitizerTest, "test_data_uri_disallowed_type", "", "",