Skip to content

Commit

Permalink
When URLs are invalid IPv6 URLs drop the attr rather than error
Browse files Browse the repository at this point in the history
  • Loading branch information
dstufft committed Sep 7, 2015
1 parent 01b1ebb commit 29526c5
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 1 deletion.
6 changes: 5 additions & 1 deletion html5lib/sanitizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,11 @@ def allowed_token(self, token, token_type):
unescape(attrs[attr])).lower()
# remove replacement characters from unescaped characters
val_unescaped = val_unescaped.replace("\ufffd", "")
uri = urlparse.urlparse(val_unescaped)
try:
uri = urlparse.urlparse(val_unescaped)
except ValueError:
uri = None
del attrs[attr]
if uri and uri.scheme:
if uri.scheme not in self.allowed_protocols:
del attrs[attr]
Expand Down
5 changes: 5 additions & 0 deletions html5lib/tests/test_sanitizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,11 @@ def test_sanitizer():
"<audio controls=\"\" src=\"data:foobar\"></audio>",
toxml)

yield (runSanitizerTest, "test_invalid_ipv6_url",
"<a>",
"<a href=\"h:https://]\">",
toxml)

yield (runSanitizerTest, "test_data_uri_disallowed_type",
"<audio controls=\"\"></audio>",
"<audio controls=\"\" src=\"data:text/html,<html>\"></audio>",
Expand Down

0 comments on commit 29526c5

Please sign in to comment.