Skip to content

Commit

Permalink
Optimised PCDATA Data State a bit (saves maybe 3%)
Browse files Browse the repository at this point in the history
--HG--
extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%401240
  • Loading branch information
philiptaylor committed Dec 18, 2008
1 parent cfb1e85 commit b7c7de7
Showing 1 changed file with 13 additions and 6 deletions.
19 changes: 13 additions & 6 deletions src/html5lib/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,15 +354,22 @@ def dataState(self):
self.tokenQueue.append({"type": "SpaceCharacters", "data":
data + self.stream.charsUntil(spaceCharacters, True)})
# No need to update lastFourChars here, since the first space will
# have already broken any <!-- or --> sequences
# have already been appended to lastFourChars and will have broken
# any <!-- or --> sequences
else:
chars = self.stream.charsUntil(("&", "<", ">", "-"))
self.tokenQueue.append({"type": "Characters", "data":
if self.contentModelFlag in\
(contentModelFlags["CDATA"], contentModelFlags["RCDATA"]):
chars = self.stream.charsUntil((u"&", u"<", u">", u"-"))
self.lastFourChars += chars[-4:]
self.lastFourChars = self.lastFourChars[-4:]
else:
chars = self.stream.charsUntil((u"&", u"<"))
# lastFourChars only needs to be kept up-to-date if we're
# in CDATA or RCDATA, so ignore it here
self.tokenQueue.append({"type": "Characters", "data":
data + chars})
self.lastFourChars += chars[-4:]
self.lastFourChars = self.lastFourChars[-4:]
return True

def entityDataState(self):
entity = self.consumeEntity()
if entity:
Expand Down

0 comments on commit b7c7de7

Please sign in to comment.