From c8e3d2b80afd63decc4ad6d889b8cda6fc3b6cc4 Mon Sep 17 00:00:00 2001 From: Jon Dufresne Date: Sat, 6 Apr 2019 14:41:27 -0700 Subject: [PATCH] Use modern Python syntax set literals and dict comprehension Available since Python 2.7. Instances discovered using pyupgrade. --- html5lib/_inputstream.py | 14 +++++++------- html5lib/constants.py | 9 ++++----- html5lib/html5parser.py | 9 ++++----- html5lib/tests/test_serializer.py | 2 +- html5lib/tests/tokenizer.py | 2 +- html5lib/treebuilders/base.py | 6 +++--- utils/entities.py | 8 ++++---- 7 files changed, 24 insertions(+), 26 deletions(-) diff --git a/html5lib/_inputstream.py b/html5lib/_inputstream.py index 37d749ca..b8021291 100644 --- a/html5lib/_inputstream.py +++ b/html5lib/_inputstream.py @@ -34,13 +34,13 @@ else: invalid_unicode_re = re.compile(invalid_unicode_no_surrogate) -non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, - 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, - 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, - 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, - 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, - 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, - 0x10FFFE, 0x10FFFF]) +non_bmp_invalid_codepoints = {0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, + 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, + 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, + 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, + 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, + 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, + 0x10FFFE, 0x10FFFF} ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]") diff --git a/html5lib/constants.py b/html5lib/constants.py index 1ff80419..fe3e237c 100644 --- a/html5lib/constants.py +++ b/html5lib/constants.py @@ -519,8 +519,8 @@ "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"]) } -unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in - adjustForeignAttributes.items()]) +unadjustForeignAttributes = {(ns, local): qname for qname, (prefix, local, ns) in + adjustForeignAttributes.items()} spaceCharacters = frozenset([ "\t", @@ -544,8 +544,7 @@ digits = frozenset(string.digits) hexDigits = frozenset(string.hexdigits) -asciiUpper2Lower = dict([(ord(c), ord(c.lower())) - for c in string.ascii_uppercase]) +asciiUpper2Lower = {ord(c): ord(c.lower()) for c in string.ascii_uppercase} # Heading elements need to be ordered headingElements = ( @@ -2934,7 +2933,7 @@ tokenTypes["EmptyTag"]]) -prefixes = dict([(v, k) for k, v in namespaces.items()]) +prefixes = {v: k for k, v in namespaces.items()} prefixes["http://www.w3.org/1998/Math/MathML"] = "math" diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index 9d39b9d4..4d12d9de 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -119,8 +119,8 @@ def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=Fa self.tree = tree(namespaceHTMLElements) self.errors = [] - self.phases = dict([(name, cls(self, self.tree)) for name, cls in - getPhases(debug).items()]) + self.phases = {name: cls(self, self.tree) for name, cls in + getPhases(debug).items()} def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs): @@ -413,8 +413,7 @@ def parseRCDataRawtext(self, token, contentType): def getPhases(debug): def log(function): """Logger that records which phase processes each token""" - type_names = dict((value, key) for key, value in - tokenTypes.items()) + type_names = {value: key for key, value in tokenTypes.items()} def wrapped(self, *args, **kwargs): if function.__name__.startswith("process") and len(args) > 0: @@ -2478,7 +2477,7 @@ def processStartTag(self, token): currentNode = self.tree.openElements[-1] if (token["name"] in self.breakoutElements or (token["name"] == "font" and - set(token["data"].keys()) & set(["color", "face", "size"]))): + set(token["data"].keys()) & {"color", "face", "size"})): self.parser.parseError("unexpected-html-element-in-foreign-content", {"name": token["name"]}) while (self.tree.openElements[-1].namespace != diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py index 9333286e..c23592af 100644 --- a/html5lib/tests/test_serializer.py +++ b/html5lib/tests/test_serializer.py @@ -80,7 +80,7 @@ def _convertAttrib(self, attribs): def serialize_html(input, options): - options = dict([(str(k), v) for k, v in options.items()]) + options = {str(k): v for k, v in options.items()} encoding = options.get("encoding", None) if "encoding" in options: del options["encoding"] diff --git a/html5lib/tests/tokenizer.py b/html5lib/tests/tokenizer.py index 1440a722..f93ae030 100644 --- a/html5lib/tests/tokenizer.py +++ b/html5lib/tests/tokenizer.py @@ -28,7 +28,7 @@ def parse(self, stream, encoding=None, innerHTML=False): tokenizer.currentToken = {"type": "startTag", "name": self._lastStartTag} - types = dict((v, k) for k, v in constants.tokenTypes.items()) + types = {v: k for k, v in constants.tokenTypes.items()} for token in tokenizer: getattr(self, 'process%s' % types[token["type"]])(token) diff --git a/html5lib/treebuilders/base.py b/html5lib/treebuilders/base.py index 15ba609e..e4a3d710 100644 --- a/html5lib/treebuilders/base.py +++ b/html5lib/treebuilders/base.py @@ -10,9 +10,9 @@ listElementsMap = { None: (frozenset(scopingElements), False), - "button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False), - "list": (frozenset(scopingElements | set([(namespaces["html"], "ol"), - (namespaces["html"], "ul")])), False), + "button": (frozenset(scopingElements | {(namespaces["html"], "button")}), False), + "list": (frozenset(scopingElements | {(namespaces["html"], "ol"), + (namespaces["html"], "ul")}), False), "table": (frozenset([(namespaces["html"], "html"), (namespaces["html"], "table")]), False), "select": (frozenset([(namespaces["html"], "optgroup"), diff --git a/utils/entities.py b/utils/entities.py index 6dccf5f0..c8f268d0 100644 --- a/utils/entities.py +++ b/utils/entities.py @@ -8,10 +8,10 @@ def parse(path="html5ents.xml"): def entity_table(tree): - return dict((entity_name("".join(tr[0].xpath(".//text()"))), - entity_characters(tr[1].text)) - for tr in tree.xpath("//h:tbody/h:tr", - namespaces={"h": "http://www.w3.org/1999/xhtml"})) + return {entity_name("".join(tr[0].xpath(".//text()"))): + entity_characters(tr[1].text) + for tr in tree.xpath("//h:tbody/h:tr", + namespaces={"h": "http://www.w3.org/1999/xhtml"})} def entity_name(inp):