diff --git a/.pytest.expect b/.pytest.expect
index c88e99b9..5f3b6194 100644
Binary files a/.pytest.expect and b/.pytest.expect differ
diff --git a/CHANGES.rst b/CHANGES.rst
index 4d0a1996..64162ccf 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -22,6 +22,10 @@ Released on XXX
 
 * Move testsuite to ``py.test``.
 
+* Fix #124: move to webencodings for decoding the input byte stream;
+  this makes html5lib compliant with the Encoding Standard, and
+  introduces a required dependency on webencodings.
+
 
 0.9999999/1.0b8
 ~~~~~~~~~~~~~~~
diff --git a/html5lib/constants.py b/html5lib/constants.py
index d938e0ae..f6e38cbf 100644
--- a/html5lib/constants.py
+++ b/html5lib/constants.py
@@ -2846,235 +2846,6 @@
     0x9F: "\u0178",
 }
 
-encodings = {
-    '437': 'cp437',
-    '850': 'cp850',
-    '852': 'cp852',
-    '855': 'cp855',
-    '857': 'cp857',
-    '860': 'cp860',
-    '861': 'cp861',
-    '862': 'cp862',
-    '863': 'cp863',
-    '865': 'cp865',
-    '866': 'cp866',
-    '869': 'cp869',
-    'ansix341968': 'ascii',
-    'ansix341986': 'ascii',
-    'arabic': 'iso8859-6',
-    'ascii': 'ascii',
-    'asmo708': 'iso8859-6',
-    'big5': 'big5',
-    'big5hkscs': 'big5hkscs',
-    'chinese': 'gbk',
-    'cp037': 'cp037',
-    'cp1026': 'cp1026',
-    'cp154': 'ptcp154',
-    'cp367': 'ascii',
-    'cp424': 'cp424',
-    'cp437': 'cp437',
-    'cp500': 'cp500',
-    'cp775': 'cp775',
-    'cp819': 'windows-1252',
-    'cp850': 'cp850',
-    'cp852': 'cp852',
-    'cp855': 'cp855',
-    'cp857': 'cp857',
-    'cp860': 'cp860',
-    'cp861': 'cp861',
-    'cp862': 'cp862',
-    'cp863': 'cp863',
-    'cp864': 'cp864',
-    'cp865': 'cp865',
-    'cp866': 'cp866',
-    'cp869': 'cp869',
-    'cp936': 'gbk',
-    'cpgr': 'cp869',
-    'cpis': 'cp861',
-    'csascii': 'ascii',
-    'csbig5': 'big5',
-    'cseuckr': 'cp949',
-    'cseucpkdfmtjapanese': 'euc_jp',
-    'csgb2312': 'gbk',
-    'cshproman8': 'hp-roman8',
-    'csibm037': 'cp037',
-    'csibm1026': 'cp1026',
-    'csibm424': 'cp424',
-    'csibm500': 'cp500',
-    'csibm855': 'cp855',
-    'csibm857': 'cp857',
-    'csibm860': 'cp860',
-    'csibm861': 'cp861',
-    'csibm863': 'cp863',
-    'csibm864': 'cp864',
-    'csibm865': 'cp865',
-    'csibm866': 'cp866',
-    'csibm869': 'cp869',
-    'csiso2022jp': 'iso2022_jp',
-    'csiso2022jp2': 'iso2022_jp_2',
-    'csiso2022kr': 'iso2022_kr',
-    'csiso58gb231280': 'gbk',
-    'csisolatin1': 'windows-1252',
-    'csisolatin2': 'iso8859-2',
-    'csisolatin3': 'iso8859-3',
-    'csisolatin4': 'iso8859-4',
-    'csisolatin5': 'windows-1254',
-    'csisolatin6': 'iso8859-10',
-    'csisolatinarabic': 'iso8859-6',
-    'csisolatincyrillic': 'iso8859-5',
-    'csisolatingreek': 'iso8859-7',
-    'csisolatinhebrew': 'iso8859-8',
-    'cskoi8r': 'koi8-r',
-    'csksc56011987': 'cp949',
-    'cspc775baltic': 'cp775',
-    'cspc850multilingual': 'cp850',
-    'cspc862latinhebrew': 'cp862',
-    'cspc8codepage437': 'cp437',
-    'cspcp852': 'cp852',
-    'csptcp154': 'ptcp154',
-    'csshiftjis': 'shift_jis',
-    'csunicode11utf7': 'utf-7',
-    'cyrillic': 'iso8859-5',
-    'cyrillicasian': 'ptcp154',
-    'ebcdiccpbe': 'cp500',
-    'ebcdiccpca': 'cp037',
-    'ebcdiccpch': 'cp500',
-    'ebcdiccphe': 'cp424',
-    'ebcdiccpnl': 'cp037',
-    'ebcdiccpus': 'cp037',
-    'ebcdiccpwt': 'cp037',
-    'ecma114': 'iso8859-6',
-    'ecma118': 'iso8859-7',
-    'elot928': 'iso8859-7',
-    'eucjp': 'euc_jp',
-    'euckr': 'cp949',
-    'extendedunixcodepackedformatforjapanese': 'euc_jp',
-    'gb18030': 'gb18030',
-    'gb2312': 'gbk',
-    'gb231280': 'gbk',
-    'gbk': 'gbk',
-    'greek': 'iso8859-7',
-    'greek8': 'iso8859-7',
-    'hebrew': 'iso8859-8',
-    'hproman8': 'hp-roman8',
-    'hzgb2312': 'hz',
-    'ibm037': 'cp037',
-    'ibm1026': 'cp1026',
-    'ibm367': 'ascii',
-    'ibm424': 'cp424',
-    'ibm437': 'cp437',
-    'ibm500': 'cp500',
-    'ibm775': 'cp775',
-    'ibm819': 'windows-1252',
-    'ibm850': 'cp850',
-    'ibm852': 'cp852',
-    'ibm855': 'cp855',
-    'ibm857': 'cp857',
-    'ibm860': 'cp860',
-    'ibm861': 'cp861',
-    'ibm862': 'cp862',
-    'ibm863': 'cp863',
-    'ibm864': 'cp864',
-    'ibm865': 'cp865',
-    'ibm866': 'cp866',
-    'ibm869': 'cp869',
-    'iso2022jp': 'iso2022_jp',
-    'iso2022jp2': 'iso2022_jp_2',
-    'iso2022kr': 'iso2022_kr',
-    'iso646irv1991': 'ascii',
-    'iso646us': 'ascii',
-    'iso88591': 'windows-1252',
-    'iso885910': 'iso8859-10',
-    'iso8859101992': 'iso8859-10',
-    'iso885911987': 'windows-1252',
-    'iso885913': 'iso8859-13',
-    'iso885914': 'iso8859-14',
-    'iso8859141998': 'iso8859-14',
-    'iso885915': 'iso8859-15',
-    'iso885916': 'iso8859-16',
-    'iso8859162001': 'iso8859-16',
-    'iso88592': 'iso8859-2',
-    'iso885921987': 'iso8859-2',
-    'iso88593': 'iso8859-3',
-    'iso885931988': 'iso8859-3',
-    'iso88594': 'iso8859-4',
-    'iso885941988': 'iso8859-4',
-    'iso88595': 'iso8859-5',
-    'iso885951988': 'iso8859-5',
-    'iso88596': 'iso8859-6',
-    'iso885961987': 'iso8859-6',
-    'iso88597': 'iso8859-7',
-    'iso885971987': 'iso8859-7',
-    'iso88598': 'iso8859-8',
-    'iso885981988': 'iso8859-8',
-    'iso88599': 'windows-1254',
-    'iso885991989': 'windows-1254',
-    'isoceltic': 'iso8859-14',
-    'isoir100': 'windows-1252',
-    'isoir101': 'iso8859-2',
-    'isoir109': 'iso8859-3',
-    'isoir110': 'iso8859-4',
-    'isoir126': 'iso8859-7',
-    'isoir127': 'iso8859-6',
-    'isoir138': 'iso8859-8',
-    'isoir144': 'iso8859-5',
-    'isoir148': 'windows-1254',
-    'isoir149': 'cp949',
-    'isoir157': 'iso8859-10',
-    'isoir199': 'iso8859-14',
-    'isoir226': 'iso8859-16',
-    'isoir58': 'gbk',
-    'isoir6': 'ascii',
-    'koi8r': 'koi8-r',
-    'koi8u': 'koi8-u',
-    'korean': 'cp949',
-    'ksc5601': 'cp949',
-    'ksc56011987': 'cp949',
-    'ksc56011989': 'cp949',
-    'l1': 'windows-1252',
-    'l10': 'iso8859-16',
-    'l2': 'iso8859-2',
-    'l3': 'iso8859-3',
-    'l4': 'iso8859-4',
-    'l5': 'windows-1254',
-    'l6': 'iso8859-10',
-    'l8': 'iso8859-14',
-    'latin1': 'windows-1252',
-    'latin10': 'iso8859-16',
-    'latin2': 'iso8859-2',
-    'latin3': 'iso8859-3',
-    'latin4': 'iso8859-4',
-    'latin5': 'windows-1254',
-    'latin6': 'iso8859-10',
-    'latin8': 'iso8859-14',
-    'latin9': 'iso8859-15',
-    'ms936': 'gbk',
-    'mskanji': 'shift_jis',
-    'pt154': 'ptcp154',
-    'ptcp154': 'ptcp154',
-    'r8': 'hp-roman8',
-    'roman8': 'hp-roman8',
-    'shiftjis': 'shift_jis',
-    'tis620': 'cp874',
-    'unicode11utf7': 'utf-7',
-    'us': 'ascii',
-    'usascii': 'ascii',
-    'utf16': 'utf-16',
-    'utf16be': 'utf-16-be',
-    'utf16le': 'utf-16-le',
-    'utf8': 'utf-8',
-    'windows1250': 'cp1250',
-    'windows1251': 'cp1251',
-    'windows1252': 'cp1252',
-    'windows1253': 'cp1253',
-    'windows1254': 'cp1254',
-    'windows1255': 'cp1255',
-    'windows1256': 'cp1256',
-    'windows1257': 'cp1257',
-    'windows1258': 'cp1258',
-    'windows936': 'gbk',
-    'x-x-big5': 'big5'}
-
 tokenTypes = {
     "Doctype": 0,
     "Characters": 1,
diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
index ae980c55..ed44a552 100644
--- a/html5lib/html5parser.py
+++ b/html5lib/html5parser.py
@@ -139,7 +139,7 @@ def documentEncoding(self):
         """
         if not hasattr(self, 'tokenizer'):
             return None
-        return self.tokenizer.stream.charEncoding[0]
+        return self.tokenizer.stream.charEncoding[0].name
 
     def isHTMLIntegrationPoint(self, element):
         if (element.name == "annotation-xml" and
diff --git a/html5lib/inputstream.py b/html5lib/inputstream.py
index 63373db9..20f6c95a 100644
--- a/html5lib/inputstream.py
+++ b/html5lib/inputstream.py
@@ -1,13 +1,15 @@
 from __future__ import absolute_import, division, unicode_literals
 
-from six import text_type
+from six import text_type, binary_type
 from six.moves import http_client, urllib
 
 import codecs
 import re
 
+import webencodings
+
 from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
-from .constants import encodings, ReparseException
+from .constants import ReparseException
 from . import utils
 
 from io import StringIO
@@ -195,7 +197,7 @@ def __init__(self, source):
         # List of where new lines occur
         self.newLines = [0]
 
-        self.charEncoding = ("utf-8", "certain")
+        self.charEncoding = (lookupEncoding("utf-8"), "certain")
         self.dataStream = self.openStream(source)
 
         self.reset()
@@ -421,7 +423,7 @@ def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
 
         HTMLUnicodeInputStream.__init__(self, self.rawStream)
 
-        self.charEncoding = (codecName(encoding), "certain")
+        self.charEncoding = (lookupEncoding(encoding), "certain")
 
         # Encoding Information
         # Number of bytes to use when looking for a meta element with
@@ -440,8 +442,7 @@ def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
         self.reset()
 
     def reset(self):
-        self.dataStream = codecs.getreader(self.charEncoding[0])(self.rawStream,
-                                                                 'replace')
+        self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace')
         HTMLUnicodeInputStream.reset(self)
 
     def openStream(self, source):
@@ -491,30 +492,25 @@ def detectEncoding(self, parseMeta=True, chardet=True):
                     buffers.append(buffer)
                     detector.feed(buffer)
                 detector.close()
-                encoding = detector.result['encoding']
+                encoding = lookupEncoding(detector.result['encoding'])
                 self.rawStream.seek(0)
             except ImportError:
                 pass
         # If all else fails use the default encoding
         if encoding is None:
             confidence = "tentative"
-            encoding = self.defaultEncoding
-
-        # Substitute for equivalent encodings:
-        encodingSub = {"iso-8859-1": "windows-1252"}
-
-        if encoding.lower() in encodingSub:
-            encoding = encodingSub[encoding.lower()]
+            encoding = lookupEncoding(self.defaultEncoding)
 
         return encoding, confidence
 
     def changeEncoding(self, newEncoding):
         assert self.charEncoding[1] != "certain"
-        newEncoding = codecName(newEncoding)
-        if newEncoding in ("utf-16", "utf-16-be", "utf-16-le"):
-            newEncoding = "utf-8"
+        newEncoding = lookupEncoding(newEncoding)
         if newEncoding is None:
             return
+        if newEncoding.name in ("utf-16be", "utf-16le"):
+            newEncoding = lookupEncoding("utf-8")
+            assert newEncoding is not None
         elif newEncoding == self.charEncoding[0]:
             self.charEncoding = (self.charEncoding[0], "certain")
         else:
@@ -529,8 +525,8 @@ def detectBOM(self):
         encoding otherwise return None"""
         bomDict = {
             codecs.BOM_UTF8: 'utf-8',
-            codecs.BOM_UTF16_LE: 'utf-16-le', codecs.BOM_UTF16_BE: 'utf-16-be',
-            codecs.BOM_UTF32_LE: 'utf-32-le', codecs.BOM_UTF32_BE: 'utf-32-be'
+            codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be',
+            codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be'
         }
 
         # Go to beginning of file and read in 4 bytes
@@ -550,9 +546,12 @@ def detectBOM(self):
 
         # Set the read position past the BOM if one was found, otherwise
         # set it to the start of the stream
-        self.rawStream.seek(encoding and seek or 0)
-
-        return encoding
+        if encoding:
+            self.rawStream.seek(seek)
+            return lookupEncoding(encoding)
+        else:
+            self.rawStream.seek(0)
+            return None
 
     def detectEncodingMeta(self):
         """Report the encoding declared by the meta element
@@ -563,8 +562,8 @@ def detectEncodingMeta(self):
         self.rawStream.seek(0)
         encoding = parser.getEncoding()
 
-        if encoding in ("utf-16", "utf-16-be", "utf-16-le"):
-            encoding = "utf-8"
+        if encoding is not None and encoding.name in ("utf-16be", "utf-16le"):
+            encoding = lookupEncoding("utf-8")
 
         return encoding
 
@@ -727,7 +726,7 @@ def handleMeta(self):
                         return False
                 elif attr[0] == b"charset":
                     tentativeEncoding = attr[1]
-                    codec = codecName(tentativeEncoding)
+                    codec = lookupEncoding(tentativeEncoding)
                     if codec is not None:
                         self.encoding = codec
                         return False
@@ -735,7 +734,7 @@ def handleMeta(self):
                     contentParser = ContentAttrParser(EncodingBytes(attr[1]))
                     tentativeEncoding = contentParser.parse()
                     if tentativeEncoding is not None:
-                        codec = codecName(tentativeEncoding)
+                        codec = lookupEncoding(tentativeEncoding)
                         if codec is not None:
                             if hasPragma:
                                 self.encoding = codec
@@ -892,16 +891,19 @@ def parse(self):
             return None
 
 
-def codecName(encoding):
+def lookupEncoding(encoding):
     """Return the python codec name corresponding to an encoding or None if the
     string doesn't correspond to a valid encoding."""
-    if isinstance(encoding, bytes):
+    if isinstance(encoding, binary_type):
         try:
             encoding = encoding.decode("ascii")
         except UnicodeDecodeError:
             return None
-    if encoding:
-        canonicalName = ascii_punctuation_re.sub("", encoding).lower()
-        return encodings.get(canonicalName, None)
+
+    if encoding is not None:
+        try:
+            return webencodings.lookup(encoding)
+        except AttributeError:
+            return None
     else:
         return None
diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py
index d774ce0f..837e989f 100644
--- a/html5lib/tests/test_encoding.py
+++ b/html5lib/tests/test_encoding.py
@@ -12,20 +12,6 @@
 from html5lib import HTMLParser, inputstream
 
 
-class Html5EncodingTestCase(unittest.TestCase):
-    def test_codec_name_a(self):
-        self.assertEqual(inputstream.codecName("utf-8"), "utf-8")
-
-    def test_codec_name_b(self):
-        self.assertEqual(inputstream.codecName("utf8"), "utf-8")
-
-    def test_codec_name_c(self):
-        self.assertEqual(inputstream.codecName("  utf8  "), "utf-8")
-
-    def test_codec_name_d(self):
-        self.assertEqual(inputstream.codecName("ISO_8859--1"), "windows-1252")
-
-
 def runParserEncodingTest(data, encoding):
     p = HTMLParser()
     assert p.documentEncoding is None
@@ -43,7 +29,7 @@ def runPreScanEncodingTest(data, encoding):
     if len(data) > stream.numBytesMeta:
         return
 
-    assert encoding == stream.charEncoding[0], errorMessage(data, encoding, stream.charEncoding[0])
+    assert encoding == stream.charEncoding[0].name, errorMessage(data, encoding, stream.charEncoding[0].name)
 
 
 def test_encoding():
@@ -64,4 +50,4 @@ def test_encoding():
     def test_chardet():
         with open(os.path.join(test_dir, "encoding" , "chardet", "test_big5.txt"), "rb") as fp:
             encoding = inputstream.HTMLInputStream(fp.read()).charEncoding
-            assert encoding[0].lower() == "big5"
+            assert encoding[0].name == "big5"
diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py
index 4436ef8a..ed203766 100644
--- a/html5lib/tests/test_stream.py
+++ b/html5lib/tests/test_stream.py
@@ -86,29 +86,29 @@ class HTMLInputStreamTest(unittest.TestCase):
 
     def test_char_ascii(self):
         stream = HTMLInputStream(b"'", encoding='ascii')
-        self.assertEqual(stream.charEncoding[0], 'ascii')
+        self.assertEqual(stream.charEncoding[0].name, 'windows-1252')
         self.assertEqual(stream.char(), "'")
 
     def test_char_utf8(self):
         stream = HTMLInputStream('\u2018'.encode('utf-8'), encoding='utf-8')
-        self.assertEqual(stream.charEncoding[0], 'utf-8')
+        self.assertEqual(stream.charEncoding[0].name, 'utf-8')
         self.assertEqual(stream.char(), '\u2018')
 
     def test_char_win1252(self):
         stream = HTMLInputStream("\xa9\xf1\u2019".encode('windows-1252'))
-        self.assertEqual(stream.charEncoding[0], 'windows-1252')
+        self.assertEqual(stream.charEncoding[0].name, 'windows-1252')
         self.assertEqual(stream.char(), "\xa9")
         self.assertEqual(stream.char(), "\xf1")
         self.assertEqual(stream.char(), "\u2019")
 
     def test_bom(self):
         stream = HTMLInputStream(codecs.BOM_UTF8 + b"'")
-        self.assertEqual(stream.charEncoding[0], 'utf-8')
+        self.assertEqual(stream.charEncoding[0].name, 'utf-8')
         self.assertEqual(stream.char(), "'")
 
     def test_utf_16(self):
         stream = HTMLInputStream((' ' * 1025).encode('utf-16'))
-        self.assertTrue(stream.charEncoding[0] in ['utf-16-le', 'utf-16-be'], stream.charEncoding)
+        self.assertTrue(stream.charEncoding[0].name in ['utf-16le', 'utf-16be'], stream.charEncoding)
         self.assertEqual(len(stream.charsUntil(' ', True)), 1025)
 
     def test_newlines(self):
diff --git a/requirements.txt b/requirements.txt
index ffe2fce4..15cae9dc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,2 @@
 six
+webencodings
diff --git a/setup.py b/setup.py
index 7b06b45e..187a4169 100644
--- a/setup.py
+++ b/setup.py
@@ -57,6 +57,7 @@
       packages=packages,
       install_requires=[
           'six',
+          'webencodings',
       ],
       extras_require={
           # A empty extra that only has a conditional marker will be
diff --git a/tox.ini b/tox.ini
index e66298d5..4a29b553 100644
--- a/tox.ini
+++ b/tox.ini
@@ -7,6 +7,8 @@ deps =
   pytest
   pytest-expect>=1.0,<2.0
   mock
+  base: six
+  base: webencodings
   py26-base: ordereddict
   optional: -r{toxinidir}/requirements-optional.txt
 
diff --git a/utils/iana_parse.py b/utils/iana_parse.py
deleted file mode 100644
index 6dde94c2..00000000
--- a/utils/iana_parse.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-import sys
-import urllib.request, urllib.error, urllib.parse
-import codecs
-
-def main():
-    encodings = []
-    f = urllib.request.urlopen(sys.argv[1])
-    for line in f:
-        if line.startswith("Name: ") or line.startswith("Alias: "):
-            enc = line.split()[1]
-            try:
-                codecs.lookup(enc)
-                if enc.lower not in encodings:
-                    encodings.append(enc.lower())
-            except LookupError:
-                pass
-    sys.stdout.write("encodings = frozenset((\n")
-    for enc in encodings:
-        sys.stdout.write('    "%s",\n'%enc)
-    sys.stdout.write('    ))')
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file