Skip to content

Commit

Permalink
Move a whole bunch of private modules to be underscore prefixed
Browse files Browse the repository at this point in the history
This moves: html5lib.ihatexml -> html5lib._ihatexml
            html5lib.inputstream -> html5lib._inputstream
            html5lib.tokenizer -> html5lib._tokenizer
            html5lib.trie -> html5lib._trie
            html5lib.utils -> html5lib._utils
  • Loading branch information
gsnedders committed Jul 14, 2016
1 parent 8db5828 commit c4dd677
Show file tree
Hide file tree
Showing 21 changed files with 82 additions and 82 deletions.
File renamed without changes.
10 changes: 5 additions & 5 deletions html5lib/inputstream.py → html5lib/_inputstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
from .constants import ReparseException
from . import utils
from . import _utils

from io import StringIO

Expand All @@ -28,7 +28,7 @@

invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa

if utils.supports_lone_surrogates:
if _utils.supports_lone_surrogates:
# Use one extra step of indirection and create surrogates with
# eval. Not using this indirection would introduce an illegal
# unicode literal on platforms not supporting such lone
Expand Down Expand Up @@ -176,7 +176,7 @@ def __init__(self, source):
"""

if not utils.supports_lone_surrogates:
if not _utils.supports_lone_surrogates:
# Such platforms will have already checked for such
# surrogate errors, so no need to do this checking.
self.reportCharacterErrors = None
Expand Down Expand Up @@ -304,9 +304,9 @@ def characterErrorsUCS2(self, data):
codepoint = ord(match.group())
pos = match.start()
# Pretty sure there should be endianness issues here
if utils.isSurrogatePair(data[pos:pos + 2]):
if _utils.isSurrogatePair(data[pos:pos + 2]):
# We have a surrogate pair!
char_val = utils.surrogatePairToCodepoint(data[pos:pos + 2])
char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2])
if char_val in non_bmp_invalid_codepoints:
self.errors.append("invalid-codepoint")
skip = True
Expand Down
4 changes: 2 additions & 2 deletions html5lib/tokenizer.py → html5lib/_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
from .constants import tokenTypes, tagTokenTypes
from .constants import replacementCharacters

from .inputstream import HTMLInputStream
from ._inputstream import HTMLInputStream

from .trie import Trie
from ._trie import Trie

entitiesTrie = Trie(entities)

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
90 changes: 45 additions & 45 deletions html5lib/html5parser.py

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions html5lib/serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from .constants import voidElements, booleanAttributes, spaceCharacters
from .constants import rcdataElements, entities, xmlEntities
from . import treewalkers, utils
from . import treewalkers, _utils
from xml.sax.saxutils import escape

spaceCharacters = "".join(spaceCharacters)
Expand All @@ -33,7 +33,7 @@
continue
if v != "&":
if len(v) == 2:
v = utils.surrogatePairToCodepoint(v)
v = _utils.surrogatePairToCodepoint(v)
else:
v = ord(v)
if v not in encode_entity_map or k.islower():
Expand All @@ -51,8 +51,8 @@ def htmlentityreplace_errors(exc):
skip = False
continue
index = i + exc.start
if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2])
if _utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
codepoint = _utils.surrogatePairToCodepoint(exc.object[index:index + 2])
skip = True
else:
codepoint = ord(c)
Expand Down
12 changes: 6 additions & 6 deletions html5lib/tests/test_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
import pytest

from .support import get_data_files, test_dir, errorMessage, TestData as _TestData
from html5lib import HTMLParser, inputstream
from html5lib import HTMLParser, _inputstream


def test_basic_prescan_length():
data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
pad = 1024 - len(data) + 1
data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
assert len(data) == 1024 # Sanity
stream = inputstream.HTMLBinaryInputStream(data, useChardet=False)
stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
assert 'utf-8' == stream.charEncoding[0].name


Expand All @@ -22,7 +22,7 @@ def test_parser_reparse():
pad = 10240 - len(data) + 1
data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
assert len(data) == 10240 # Sanity
stream = inputstream.HTMLBinaryInputStream(data, useChardet=False)
stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
assert 'windows-1252' == stream.charEncoding[0].name
p = HTMLParser(namespaceHTMLElements=False)
doc = p.parse(data, useChardet=False)
Expand All @@ -47,7 +47,7 @@ def test_parser_reparse():
("windows-1252", b"", {}),
])
def test_parser_args(expected, data, kwargs):
stream = inputstream.HTMLBinaryInputStream(data, useChardet=False, **kwargs)
stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False, **kwargs)
assert expected == stream.charEncoding[0].name
p = HTMLParser()
p.parse(data, useChardet=False, **kwargs)
Expand Down Expand Up @@ -85,7 +85,7 @@ def runParserEncodingTest(data, encoding):


def runPreScanEncodingTest(data, encoding):
stream = inputstream.HTMLBinaryInputStream(data, useChardet=False)
stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
encoding = encoding.lower().decode("ascii")

# Very crude way to ignore irrelevant tests
Expand All @@ -111,6 +111,6 @@ def test_encoding():
else:
def test_chardet():
with open(os.path.join(test_dir, "encoding", "chardet", "test_big5.txt"), "rb") as fp:
encoding = inputstream.HTMLInputStream(fp.read()).charEncoding
encoding = _inputstream.HTMLInputStream(fp.read()).charEncoding
assert encoding[0].name == "big5"
# pylint:enable=wrong-import-position
6 changes: 3 additions & 3 deletions html5lib/tests/test_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
import six
from six.moves import http_client, urllib

from html5lib.inputstream import (BufferedStream, HTMLInputStream,
HTMLUnicodeInputStream, HTMLBinaryInputStream)
from html5lib.utils import supports_lone_surrogates
from html5lib._inputstream import (BufferedStream, HTMLInputStream,
HTMLUnicodeInputStream, HTMLBinaryInputStream)
from html5lib._utils import supports_lone_surrogates


def test_basic():
Expand Down
6 changes: 3 additions & 3 deletions html5lib/tests/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
import pytest
from six import unichr

from html5lib.tokenizer import HTMLTokenizer
from html5lib import constants, utils
from html5lib._tokenizer import HTMLTokenizer
from html5lib import constants, _utils


class TokenizerTestParser(object):
Expand Down Expand Up @@ -156,7 +156,7 @@ def repl(m):
except ValueError:
# This occurs when unichr throws ValueError, which should
# only be for a lone-surrogate.
if utils.supports_lone_surrogates:
if _utils.supports_lone_surrogates:
raise
return None

Expand Down
2 changes: 1 addition & 1 deletion html5lib/treebuilders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

from __future__ import absolute_import, division, unicode_literals

from ..utils import default_etree
from .._utils import default_etree

treeBuilderCache = {}

Expand Down
2 changes: 1 addition & 1 deletion html5lib/treebuilders/dom.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from . import base
from .. import constants
from ..constants import namespaces
from ..utils import moduleFactoryFactory
from .._utils import moduleFactoryFactory


def getDomBuilder(DomImplementation):
Expand Down
6 changes: 3 additions & 3 deletions html5lib/treebuilders/etree.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
import re

from . import base
from .. import ihatexml
from .. import _ihatexml
from .. import constants
from ..constants import namespaces
from ..utils import moduleFactoryFactory
from .._utils import moduleFactoryFactory

tag_regexp = re.compile("{([^}]*)}(.*)")

Expand Down Expand Up @@ -259,7 +259,7 @@ def serializeElement(element, indent=0):
def tostring(element): # pylint:disable=unused-variable
"""Serialize an element and its child nodes to a string"""
rv = []
filter = ihatexml.InfosetFilter()
filter = _ihatexml.InfosetFilter()

def serializeElement(element):
if isinstance(element, ElementTree.ElementTree):
Expand Down
6 changes: 3 additions & 3 deletions html5lib/treebuilders/etree_lxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from ..constants import DataLossWarning
from .. import constants
from . import etree as etree_builders
from .. import ihatexml
from .. import _ihatexml

import lxml.etree as etree

Expand Down Expand Up @@ -54,7 +54,7 @@ def _getChildNodes(self):

def testSerializer(element):
rv = []
infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)

def serializeElement(element, indent=0):
if not hasattr(element, "tag"):
Expand Down Expand Up @@ -182,7 +182,7 @@ class TreeBuilder(base.TreeBuilder):

def __init__(self, namespaceHTMLElements, fullTree=False):
builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
infosetFilter = self.infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
infosetFilter = self.infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
self.namespaceHTMLElements = namespaceHTMLElements

class Attributes(dict):
Expand Down
2 changes: 1 addition & 1 deletion html5lib/treewalkers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from __future__ import absolute_import, division, unicode_literals

from .. import constants
from ..utils import default_etree
from .._utils import default_etree

__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshi", "etree_lxml"]

Expand Down
2 changes: 1 addition & 1 deletion html5lib/treewalkers/etree.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from six import string_types

from . import base
from ..utils import moduleFactoryFactory
from .._utils import moduleFactoryFactory

tag_regexp = re.compile("{([^}]*)}(.*)")

Expand Down
4 changes: 2 additions & 2 deletions html5lib/treewalkers/etree_lxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from . import base

from .. import ihatexml
from .. import _ihatexml


def ensure_str(s):
Expand Down Expand Up @@ -132,7 +132,7 @@ def __init__(self, tree):
self.fragmentChildren = set()
tree = Root(tree)
base.NonRecursiveTreeWalker.__init__(self, tree)
self.filter = ihatexml.InfosetFilter()
self.filter = _ihatexml.InfosetFilter()

def getNodeDetails(self, node):
if isinstance(node, tuple): # Text node
Expand Down
4 changes: 2 additions & 2 deletions parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from html5lib import html5parser
from html5lib import treebuilders, serializer, treewalkers
from html5lib import constants
from html5lib import utils
from html5lib import _utils


def parse():
Expand Down Expand Up @@ -116,7 +116,7 @@ def printOutput(parser, document, opts):
import lxml.etree
sys.stdout.write(lxml.etree.tostring(document, encoding="unicode"))
elif tb == "etree":
sys.stdout.write(utils.default_etree.tostring(document, encoding="unicode"))
sys.stdout.write(_utils.default_etree.tostring(document, encoding="unicode"))
elif opts.tree:
if not hasattr(document, '__getitem__'):
document = [document]
Expand Down

0 comments on commit c4dd677

Please sign in to comment.