|
|
|
@ -22,7 +22,7 @@ charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') |
|
|
|
starttagopen = re.compile('<[a-zA-Z]') |
|
|
|
piclose = re.compile('>') |
|
|
|
commentclose = re.compile(r'--\s*>') |
|
|
|
tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*') |
|
|
|
tagfind = re.compile('([a-zA-Z][-.a-zA-Z0-9:_]*)(?:\s|/(?!>))*') |
|
|
|
# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state |
|
|
|
# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state |
|
|
|
tagfind_tolerant = re.compile('[a-zA-Z][^\t\n\r\f />\x00]*') |
|
|
|
@ -36,7 +36,7 @@ attrfind = re.compile( |
|
|
|
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*' |
|
|
|
r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?') |
|
|
|
attrfind_tolerant = re.compile( |
|
|
|
r'[\s/]*((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*' |
|
|
|
r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*' |
|
|
|
r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*') |
|
|
|
locatestarttagend = re.compile(r""" |
|
|
|
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name |
|
|
|
@ -327,7 +327,7 @@ class HTMLParser(_markupbase.ParserBase): |
|
|
|
match = tagfind.match(rawdata, i+1) |
|
|
|
assert match, 'unexpected call to parse_starttag()' |
|
|
|
k = match.end() |
|
|
|
self.lasttag = tag = rawdata[i+1:k].lower() |
|
|
|
self.lasttag = tag = match.group(1).lower() |
|
|
|
while k < endpos: |
|
|
|
if self.strict: |
|
|
|
m = attrfind.match(rawdata, k) |
|
|
|
|