From c8e3d2b80afd63decc4ad6d889b8cda6fc3b6cc4 Mon Sep 17 00:00:00 2001
From: Jon Dufresne <jon.dufresne@gmail.com>
Date: Sat, 6 Apr 2019 14:41:27 -0700
Subject: [PATCH] Use modern Python syntax set literals and dict comprehension

Available since Python 2.7. Instances discovered using pyupgrade.
---
 html5lib/_inputstream.py          | 14 +++++++-------
 html5lib/constants.py             |  9 ++++-----
 html5lib/html5parser.py           |  9 ++++-----
 html5lib/tests/test_serializer.py |  2 +-
 html5lib/tests/tokenizer.py       |  2 +-
 html5lib/treebuilders/base.py     |  6 +++---
 utils/entities.py                 |  8 ++++----
 7 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/html5lib/_inputstream.py b/html5lib/_inputstream.py
index 37d749ca..b8021291 100644
--- a/html5lib/_inputstream.py
+++ b/html5lib/_inputstream.py
@@ -34,13 +34,13 @@
 else:
     invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
 
-non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
-                                  0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
-                                  0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
-                                  0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
-                                  0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
-                                  0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
-                                  0x10FFFE, 0x10FFFF])
+non_bmp_invalid_codepoints = {0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
+                              0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
+                              0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
+                              0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
+                              0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
+                              0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
+                              0x10FFFE, 0x10FFFF}
 
 ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]")
 
diff --git a/html5lib/constants.py b/html5lib/constants.py
index 1ff80419..fe3e237c 100644
--- a/html5lib/constants.py
+++ b/html5lib/constants.py
@@ -519,8 +519,8 @@
     "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"])
 }
 
-unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in
-                                  adjustForeignAttributes.items()])
+unadjustForeignAttributes = {(ns, local): qname for qname, (prefix, local, ns) in
+                             adjustForeignAttributes.items()}
 
 spaceCharacters = frozenset([
     "\t",
@@ -544,8 +544,7 @@
 digits = frozenset(string.digits)
 hexDigits = frozenset(string.hexdigits)
 
-asciiUpper2Lower = dict([(ord(c), ord(c.lower()))
-                         for c in string.ascii_uppercase])
+asciiUpper2Lower = {ord(c): ord(c.lower()) for c in string.ascii_uppercase}
 
 # Heading elements need to be ordered
 headingElements = (
@@ -2934,7 +2933,7 @@
                            tokenTypes["EmptyTag"]])
 
 
-prefixes = dict([(v, k) for k, v in namespaces.items()])
+prefixes = {v: k for k, v in namespaces.items()}
 prefixes["http://www.w3.org/1998/Math/MathML"] = "math"
 
 
diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
index 9d39b9d4..4d12d9de 100644
--- a/html5lib/html5parser.py
+++ b/html5lib/html5parser.py
@@ -119,8 +119,8 @@ def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=Fa
         self.tree = tree(namespaceHTMLElements)
         self.errors = []
 
-        self.phases = dict([(name, cls(self, self.tree)) for name, cls in
-                            getPhases(debug).items()])
+        self.phases = {name: cls(self, self.tree) for name, cls in
+                       getPhases(debug).items()}
 
     def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs):
 
@@ -413,8 +413,7 @@ def parseRCDataRawtext(self, token, contentType):
 def getPhases(debug):
     def log(function):
         """Logger that records which phase processes each token"""
-        type_names = dict((value, key) for key, value in
-                          tokenTypes.items())
+        type_names = {value: key for key, value in tokenTypes.items()}
 
         def wrapped(self, *args, **kwargs):
             if function.__name__.startswith("process") and len(args) > 0:
@@ -2478,7 +2477,7 @@ def processStartTag(self, token):
             currentNode = self.tree.openElements[-1]
             if (token["name"] in self.breakoutElements or
                 (token["name"] == "font" and
-                 set(token["data"].keys()) & set(["color", "face", "size"]))):
+                 set(token["data"].keys()) & {"color", "face", "size"})):
                 self.parser.parseError("unexpected-html-element-in-foreign-content",
                                        {"name": token["name"]})
                 while (self.tree.openElements[-1].namespace !=
diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py
index 9333286e..c23592af 100644
--- a/html5lib/tests/test_serializer.py
+++ b/html5lib/tests/test_serializer.py
@@ -80,7 +80,7 @@ def _convertAttrib(self, attribs):
 
 
 def serialize_html(input, options):
-    options = dict([(str(k), v) for k, v in options.items()])
+    options = {str(k): v for k, v in options.items()}
     encoding = options.get("encoding", None)
     if "encoding" in options:
         del options["encoding"]
diff --git a/html5lib/tests/tokenizer.py b/html5lib/tests/tokenizer.py
index 1440a722..f93ae030 100644
--- a/html5lib/tests/tokenizer.py
+++ b/html5lib/tests/tokenizer.py
@@ -28,7 +28,7 @@ def parse(self, stream, encoding=None, innerHTML=False):
             tokenizer.currentToken = {"type": "startTag",
                                       "name": self._lastStartTag}
 
-        types = dict((v, k) for k, v in constants.tokenTypes.items())
+        types = {v: k for k, v in constants.tokenTypes.items()}
         for token in tokenizer:
             getattr(self, 'process%s' % types[token["type"]])(token)
 
diff --git a/html5lib/treebuilders/base.py b/html5lib/treebuilders/base.py
index 15ba609e..e4a3d710 100644
--- a/html5lib/treebuilders/base.py
+++ b/html5lib/treebuilders/base.py
@@ -10,9 +10,9 @@
 
 listElementsMap = {
     None: (frozenset(scopingElements), False),
-    "button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False),
-    "list": (frozenset(scopingElements | set([(namespaces["html"], "ol"),
-                                              (namespaces["html"], "ul")])), False),
+    "button": (frozenset(scopingElements | {(namespaces["html"], "button")}), False),
+    "list": (frozenset(scopingElements | {(namespaces["html"], "ol"),
+                                          (namespaces["html"], "ul")}), False),
     "table": (frozenset([(namespaces["html"], "html"),
                          (namespaces["html"], "table")]), False),
     "select": (frozenset([(namespaces["html"], "optgroup"),
diff --git a/utils/entities.py b/utils/entities.py
index 6dccf5f0..c8f268d0 100644
--- a/utils/entities.py
+++ b/utils/entities.py
@@ -8,10 +8,10 @@ def parse(path="html5ents.xml"):
 
 
 def entity_table(tree):
-    return dict((entity_name("".join(tr[0].xpath(".//text()"))),
-                 entity_characters(tr[1].text))
-                for tr in tree.xpath("//h:tbody/h:tr",
-                                     namespaces={"h": "http://www.w3.org/1999/xhtml"}))
+    return {entity_name("".join(tr[0].xpath(".//text()"))):
+            entity_characters(tr[1].text)
+            for tr in tree.xpath("//h:tbody/h:tr",
+                                 namespaces={"h": "http://www.w3.org/1999/xhtml"})}
 
 
 def entity_name(inp):