diff --git a/html5lib/sanitizer.py b/html5lib/sanitizer.py index 2cef2655..b714e8c9 100644 --- a/html5lib/sanitizer.py +++ b/html5lib/sanitizer.py @@ -207,7 +207,11 @@ def allowed_token(self, token, token_type): unescape(attrs[attr])).lower() # remove replacement characters from unescaped characters val_unescaped = val_unescaped.replace("\ufffd", "") - uri = urlparse.urlparse(val_unescaped) + try: + uri = urlparse.urlparse(val_unescaped) + except ValueError: + uri = None + del attrs[attr] if uri and uri.scheme: if uri.scheme not in self.allowed_protocols: del attrs[attr] diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py index 0507d86b..e98c8c85 100644 --- a/html5lib/tests/test_sanitizer.py +++ b/html5lib/tests/test_sanitizer.py @@ -113,6 +113,11 @@ def test_sanitizer(): "", toxml) + yield (runSanitizerTest, "test_invalid_ipv6_url", + "", + "", + toxml) + yield (runSanitizerTest, "test_data_uri_disallowed_type", "", "",