diff --git a/html5lib/sanitizer.py b/html5lib/sanitizer.py
index 2cef2655..b714e8c9 100644
--- a/html5lib/sanitizer.py
+++ b/html5lib/sanitizer.py
@@ -207,7 +207,11 @@ def allowed_token(self, token, token_type):
unescape(attrs[attr])).lower()
# remove replacement characters from unescaped characters
val_unescaped = val_unescaped.replace("\ufffd", "")
- uri = urlparse.urlparse(val_unescaped)
+ try:
+ uri = urlparse.urlparse(val_unescaped)
+ except ValueError:
+ uri = None
+ del attrs[attr]
if uri and uri.scheme:
if uri.scheme not in self.allowed_protocols:
del attrs[attr]
diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py
index 0507d86b..e98c8c85 100644
--- a/html5lib/tests/test_sanitizer.py
+++ b/html5lib/tests/test_sanitizer.py
@@ -113,6 +113,11 @@ def test_sanitizer():
"",
toxml)
+ yield (runSanitizerTest, "test_invalid_ipv6_url",
+ "",
+ "",
+ toxml)
+
yield (runSanitizerTest, "test_data_uri_disallowed_type",
"",
"",