Skip to content

Commit

Permalink
Fix html5lib#37: Preserve order of attributes on serialization.
Browse files Browse the repository at this point in the history
This doesn't do anything about the fact that none of our
treebuilders preserve attribute order: it merely avoids the
serializer reordering them from the order it receives them in.

This also provides a filter to obtain the previous behaviour of
lexicographical ordering, used by the serializer tests to ensure
determinism of the output.
  • Loading branch information
gsnedders committed May 16, 2013
1 parent 72e5d8d commit 0c99d3a
Show file tree
Hide file tree
Showing 8 changed files with 52 additions and 5 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ before_install:
install:
- pip install -r requirements.txt -r requirements-test.txt --use-mirrors
- if [[ $USE_OPTIONAL == "true" ]]; then pip install -r requirements-optional.txt --use-mirrors; fi
- if [[ $TRAVIS_PYTHON_VERSION == "2.6" && $USE_OPTIONAL == "true" ]]; then pip install -r requirements-optional-2.6.txt --use-mirrors; fi
- if [[ $TRAVIS_PYTHON_VERSION != "pypy" && $USE_OPTIONAL == "true" ]]; then pip install -r requirements-optional-cpython.txt --use-mirrors; fi
- if [[ $FLAKE == "true" ]]; then pip install --use-mirrors flake8; fi

Expand Down
9 changes: 6 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ format:
More documentation is available at http:https://html5lib.readthedocs.org/.

- ``ordereddict`` can be used under Python 2.6
(``collections.OrderedDict`` is used instead on later versions) to
serialize attributes in alphabetical order.


Installation
------------
Expand Down Expand Up @@ -106,7 +110,8 @@ Tests
-----

Unit tests require the ``nose`` library and can be run using the
``nosetests`` command in the root directory. All should pass.
``nosetests`` command in the root directory; ``ordereddict`` is
required under Python 2.6. All should pass.

Test data are contained in a separate `html5lib-tests
<https://github.com/html5lib/html5lib-tests>`_ repository and included
Expand All @@ -115,8 +120,6 @@ as a submodule, thus for git checkouts they must be initialized::
$ git submodule init
$ git submodule update

This is unneeded for release tarballs.

If you have all compatible Python implementations available on your
system, you can run tests on all of them using the ``tox`` utility,
which can be found on PyPI.
Expand Down
20 changes: 20 additions & 0 deletions html5lib/filters/alphabeticalattributes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from __future__ import absolute_import, division, unicode_literals

from . import _base

try:
from collections import OrderedDict
except ImportError:
from ordereddict import OrderedDict


class Filter(_base.Filter):
def __iter__(self):
for token in _base.Filter.__iter__(self):
if token["type"] in ("StartTag", "EmptyTag"):
attrs = OrderedDict()
for name, value in sorted(token["data"].items(),
key=lambda x: x[0]):
attrs[name] = value
token["data"] = attrs
yield token
2 changes: 1 addition & 1 deletion html5lib/serializer/htmlserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def serialize(self, treewalker, encoding=None):
in_cdata = True
elif in_cdata:
self.serializeError(_("Unexpected child element of a CDATA element"))
for (attr_namespace, attr_name), attr_value in sorted(token["data"].items()):
for (attr_namespace, attr_name), attr_value in token["data"].items():
# TODO: Add namespace support here
k = attr_name
v = attr_value
Expand Down
10 changes: 9 additions & 1 deletion html5lib/tests/test_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,20 @@

from .support import get_data_files

try:
from collections import OrderedDict
except ImportError:
# Python 2.6 support
from ordereddict import OrderedDict

try:
unittest.TestCase.assertEqual
except AttributeError:
unittest.TestCase.assertEqual = unittest.TestCase.assertEquals

import html5lib
from html5lib import serializer, constants
from html5lib.filters.alphabeticalattributes import Filter as AlphabeticalAttributesFilter
from html5lib.treewalkers._base import TreeWalker

optionals_loaded = []
Expand Down Expand Up @@ -81,7 +88,8 @@ def _convertAttrib(self, attribs):

def serialize_html(input, options):
options = dict([(str(k), v) for k, v in options.items()])
return serializer.HTMLSerializer(**options).render(JsonWalker(input), options.get("encoding", None))
stream = AlphabeticalAttributesFilter(JsonWalker(input))
return serializer.HTMLSerializer(**options).render(stream, options.get("encoding", None))


def runSerializerTest(input, expected, options):
Expand Down
3 changes: 3 additions & 0 deletions requirements-optional-2.6.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Can be used to force attributes to be serialized in alphabetical
# order.
ordereddict
1 change: 1 addition & 0 deletions requirements-test.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
nose
ordereddict # Python 2.6
11 changes: 11 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,14 @@ deps =
Genshi
nose
six

[testenv:py26]
basepython = python2.6
deps =
charade
datrie
Genshi
lxml
nose
six
ordereddict

0 comments on commit 0c99d3a

Please sign in to comment.