Move a whole bunch of private modules to be underscore prefixed

This moves: html5lib.ihatexml -> html5lib._ihatexml html5lib.inputstream -> html5lib._inputstream html5lib.tokenizer -> html5lib._tokenizer html5lib.trie -> html5lib._trie html5lib.utils -> html5lib._utils
html5lib · Jul 14, 2016 · c4dd677 · c4dd677
1 parent 8db5828
commit c4dd677
Show file tree

Hide file tree

Showing 21 changed files with 82 additions and 82 deletions.
diff --git a/html5lib/ihatexml.py → html5lib/_ihatexml.py b/html5lib/ihatexml.py → html5lib/_ihatexml.py
diff --git a/html5lib/inputstream.py → html5lib/_inputstream.py b/html5lib/inputstream.py → html5lib/_inputstream.py
@@ -10,7 +10,7 @@
 
 from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
 from .constants import ReparseException
-from . import utils
+from . import _utils
 
 from io import StringIO
 
@@ -28,7 +28,7 @@
 
 invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa
 
-if utils.supports_lone_surrogates:
+if _utils.supports_lone_surrogates:
  # Use one extra step of indirection and create surrogates with
  # eval. Not using this indirection would introduce an illegal
  # unicode literal on platforms not supporting such lone
@@ -176,7 +176,7 @@ def __init__(self, source):
 
  """
 
- if not utils.supports_lone_surrogates:
+ if not _utils.supports_lone_surrogates:
  # Such platforms will have already checked for such
  # surrogate errors, so no need to do this checking.
  self.reportCharacterErrors = None
@@ -304,9 +304,9 @@ def characterErrorsUCS2(self, data):
  codepoint = ord(match.group())
  pos = match.start()
  # Pretty sure there should be endianness issues here
- if utils.isSurrogatePair(data[pos:pos + 2]):
+ if _utils.isSurrogatePair(data[pos:pos + 2]):
  # We have a surrogate pair!
- char_val = utils.surrogatePairToCodepoint(data[pos:pos + 2])
+ char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2])
  if char_val in non_bmp_invalid_codepoints:
  self.errors.append("invalid-codepoint")
  skip = True

diff --git a/html5lib/tokenizer.py → html5lib/_tokenizer.py b/html5lib/tokenizer.py → html5lib/_tokenizer.py
@@ -11,9 +11,9 @@
 from .constants import tokenTypes, tagTokenTypes
 from .constants import replacementCharacters
 
-from .inputstream import HTMLInputStream
+from ._inputstream import HTMLInputStream
 
-from .trie import Trie
+from ._trie import Trie
 
 entitiesTrie = Trie(entities)
 

diff --git a/html5lib/trie/__init__.py → html5lib/_trie/__init__.py b/html5lib/trie/__init__.py → html5lib/_trie/__init__.py
diff --git a/html5lib/trie/_base.py → html5lib/_trie/_base.py b/html5lib/trie/_base.py → html5lib/_trie/_base.py
diff --git a/html5lib/trie/datrie.py → html5lib/_trie/datrie.py b/html5lib/trie/datrie.py → html5lib/_trie/datrie.py
diff --git a/html5lib/trie/py.py → html5lib/_trie/py.py b/html5lib/trie/py.py → html5lib/_trie/py.py
diff --git a/html5lib/utils.py → html5lib/_utils.py b/html5lib/utils.py → html5lib/_utils.py
diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
diff --git a/html5lib/serializer.py b/html5lib/serializer.py
@@ -7,7 +7,7 @@
 
 from .constants import voidElements, booleanAttributes, spaceCharacters
 from .constants import rcdataElements, entities, xmlEntities
-from . import treewalkers, utils
+from . import treewalkers, _utils
 from xml.sax.saxutils import escape
 
 spaceCharacters = "".join(spaceCharacters)
@@ -33,7 +33,7 @@
  continue
  if v != "&":
  if len(v) == 2:
- v = utils.surrogatePairToCodepoint(v)
+ v = _utils.surrogatePairToCodepoint(v)
  else:
  v = ord(v)
  if v not in encode_entity_map or k.islower():
@@ -51,8 +51,8 @@ def htmlentityreplace_errors(exc):
  skip = False
  continue
  index = i + exc.start
- if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
- codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2])
+ if _utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
+ codepoint = _utils.surrogatePairToCodepoint(exc.object[index:index + 2])
  skip = True
  else:
  codepoint = ord(c)

diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py
@@ -5,15 +5,15 @@
 import pytest
 
 from .support import get_data_files, test_dir, errorMessage, TestData as _TestData
-from html5lib import HTMLParser, inputstream
+from html5lib import HTMLParser, _inputstream
 
 
 def test_basic_prescan_length():
  data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
  pad = 1024 - len(data) + 1
  data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
  assert len(data) == 1024 # Sanity
- stream = inputstream.HTMLBinaryInputStream(data, useChardet=False)
+ stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
  assert 'utf-8' == stream.charEncoding[0].name
 
 
@@ -22,7 +22,7 @@ def test_parser_reparse():
  pad = 10240 - len(data) + 1
  data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
  assert len(data) == 10240 # Sanity
- stream = inputstream.HTMLBinaryInputStream(data, useChardet=False)
+ stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
  assert 'windows-1252' == stream.charEncoding[0].name
  p = HTMLParser(namespaceHTMLElements=False)
  doc = p.parse(data, useChardet=False)
@@ -47,7 +47,7 @@ def test_parser_reparse():
  ("windows-1252", b"", {}),
 ])
 def test_parser_args(expected, data, kwargs):
- stream = inputstream.HTMLBinaryInputStream(data, useChardet=False, **kwargs)
+ stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False, **kwargs)
  assert expected == stream.charEncoding[0].name
  p = HTMLParser()
  p.parse(data, useChardet=False, **kwargs)
@@ -85,7 +85,7 @@ def runParserEncodingTest(data, encoding):
 
 
 def runPreScanEncodingTest(data, encoding):
- stream = inputstream.HTMLBinaryInputStream(data, useChardet=False)
+ stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
  encoding = encoding.lower().decode("ascii")
 
  # Very crude way to ignore irrelevant tests
@@ -111,6 +111,6 @@ def test_encoding():
 else:
  def test_chardet():
  with open(os.path.join(test_dir, "encoding", "chardet", "test_big5.txt"), "rb") as fp:
- encoding = inputstream.HTMLInputStream(fp.read()).charEncoding
+ encoding = _inputstream.HTMLInputStream(fp.read()).charEncoding
  assert encoding[0].name == "big5"
 # pylint:enable=wrong-import-position
diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py
@@ -11,9 +11,9 @@
 import six
 from six.moves import http_client, urllib
 
-from html5lib.inputstream import (BufferedStream, HTMLInputStream,
- HTMLUnicodeInputStream, HTMLBinaryInputStream)
-from html5lib.utils import supports_lone_surrogates
+from html5lib._inputstream import (BufferedStream, HTMLInputStream,
+  HTMLUnicodeInputStream, HTMLBinaryInputStream)
+from html5lib._utils import supports_lone_surrogates
 
 
 def test_basic():

diff --git a/html5lib/tests/tokenizer.py b/html5lib/tests/tokenizer.py
@@ -8,8 +8,8 @@
 import pytest
 from six import unichr
 
-from html5lib.tokenizer import HTMLTokenizer
-from html5lib import constants, utils
+from html5lib._tokenizer import HTMLTokenizer
+from html5lib import constants, _utils
 
 
 class TokenizerTestParser(object):
@@ -156,7 +156,7 @@ def repl(m):
  except ValueError:
  # This occurs when unichr throws ValueError, which should
  # only be for a lone-surrogate.
- if utils.supports_lone_surrogates:
+ if _utils.supports_lone_surrogates:
  raise
  return None
 

diff --git a/html5lib/treebuilders/__init__.py b/html5lib/treebuilders/__init__.py
@@ -28,7 +28,7 @@
 
 from __future__ import absolute_import, division, unicode_literals
 
-from ..utils import default_etree
+from .._utils import default_etree
 
 treeBuilderCache = {}
 

diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py
@@ -8,7 +8,7 @@
 from . import base
 from .. import constants
 from ..constants import namespaces
-from ..utils import moduleFactoryFactory
+from .._utils import moduleFactoryFactory
 
 
 def getDomBuilder(DomImplementation):

diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py
@@ -6,10 +6,10 @@
 import re
 
 from . import base
-from .. import ihatexml
+from .. import _ihatexml
 from .. import constants
 from ..constants import namespaces
-from ..utils import moduleFactoryFactory
+from .._utils import moduleFactoryFactory
 
 tag_regexp = re.compile("{([^}]*)}(.*)")
 
@@ -259,7 +259,7 @@ def serializeElement(element, indent=0):
  def tostring(element): # pylint:disable=unused-variable
  """Serialize an element and its child nodes to a string"""
  rv = []
- filter = ihatexml.InfosetFilter()
+ filter = _ihatexml.InfosetFilter()
 
  def serializeElement(element):
  if isinstance(element, ElementTree.ElementTree):

diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
@@ -20,7 +20,7 @@
 from ..constants import DataLossWarning
 from .. import constants
 from . import etree as etree_builders
-from .. import ihatexml
+from .. import _ihatexml
 
 import lxml.etree as etree
 
@@ -54,7 +54,7 @@ def _getChildNodes(self):
 
 def testSerializer(element):
  rv = []
- infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
+ infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
 
  def serializeElement(element, indent=0):
  if not hasattr(element, "tag"):
@@ -182,7 +182,7 @@ class TreeBuilder(base.TreeBuilder):
 
  def __init__(self, namespaceHTMLElements, fullTree=False):
  builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
- infosetFilter = self.infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
+ infosetFilter = self.infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
  self.namespaceHTMLElements = namespaceHTMLElements
 
  class Attributes(dict):

diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py
@@ -11,7 +11,7 @@
 from __future__ import absolute_import, division, unicode_literals
 
 from .. import constants
-from ..utils import default_etree
+from .._utils import default_etree
 
 __all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshi", "etree_lxml"]
 

diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py
@@ -13,7 +13,7 @@
 from six import string_types
 
 from . import base
-from ..utils import moduleFactoryFactory
+from .._utils import moduleFactoryFactory
 
 tag_regexp = re.compile("{([^}]*)}(.*)")
 

diff --git a/html5lib/treewalkers/etree_lxml.py b/html5lib/treewalkers/etree_lxml.py
@@ -6,7 +6,7 @@
 
 from . import base
 
-from .. import ihatexml
+from .. import _ihatexml
 
 
 def ensure_str(s):
@@ -132,7 +132,7 @@ def __init__(self, tree):
  self.fragmentChildren = set()
  tree = Root(tree)
  base.NonRecursiveTreeWalker.__init__(self, tree)
- self.filter = ihatexml.InfosetFilter()
+ self.filter = _ihatexml.InfosetFilter()
 
  def getNodeDetails(self, node):
  if isinstance(node, tuple): # Text node

diff --git a/parse.py b/parse.py
@@ -11,7 +11,7 @@
 from html5lib import html5parser
 from html5lib import treebuilders, serializer, treewalkers
 from html5lib import constants
-from html5lib import utils
+from html5lib import _utils
 
 
 def parse():
@@ -116,7 +116,7 @@ def printOutput(parser, document, opts):
  import lxml.etree
  sys.stdout.write(lxml.etree.tostring(document, encoding="unicode"))
  elif tb == "etree":
- sys.stdout.write(utils.default_etree.tostring(document, encoding="unicode"))
+ sys.stdout.write(_utils.default_etree.tostring(document, encoding="unicode"))
  elif opts.tree:
  if not hasattr(document, '__getitem__'):
  document = [document]