1
1
mirror of https://git.launchpad.net/beautifulsoup synced 2025-10-06 00:12:49 +02:00

Fixed PyRight problems in html.parser builder.

This commit is contained in:
Leonard Richardson
2025-08-10 14:14:24 -04:00
parent e6603981f4
commit 9e4707d639

View File

@@ -125,7 +125,7 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
raise ParserRejectedMarkup(message)
def handle_startendtag(
self, name: str, attrs: List[Tuple[str, Optional[str]]]
self, tag: str, attrs: List[Tuple[str, Optional[str]]]
) -> None:
"""Handle an incoming empty-element tag.
@@ -136,12 +136,12 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
# just because its name matches a known empty-element tag. We
# know that this is an empty-element tag, and we want to call
# handle_endtag ourselves.
self.handle_starttag(name, attrs, handle_empty_element=False)
self.handle_endtag(name)
self.handle_starttag(tag, attrs, handle_empty_element=False)
self.handle_endtag(tag)
def handle_starttag(
self,
name: str,
tag: str,
attrs: List[Tuple[str, Optional[str]]],
handle_empty_element: bool = True,
) -> None:
@@ -172,17 +172,17 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
on_dupe(attr_dict, key, value)
else:
attr_dict[key] = value
# print("START", name)
# print("START", tag)
sourceline: Optional[int]
sourcepos: Optional[int]
if self.soup.builder.store_line_numbers:
sourceline, sourcepos = self.getpos()
else:
sourceline = sourcepos = None
tag = self.soup.handle_starttag(
name, None, None, attr_dict, sourceline=sourceline, sourcepos=sourcepos
tagObj = self.soup.handle_starttag(
tag, None, None, attr_dict, sourceline=sourceline, sourcepos=sourcepos
)
if tag and tag.is_empty_element and handle_empty_element:
if tagObj is not None and tagObj.is_empty_element and handle_empty_element:
# Unlike other parsers, html.parser doesn't send separate end tag
# events for empty-element tags. (It's handled in
# handle_startendtag, but only if the original markup looked like
@@ -192,32 +192,32 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
# know the start event is identical to the end event, we
# don't want handle_endtag() to cross off any previous end
# events for tags of this name.
self.handle_endtag(name, check_already_closed=False)
self.handle_endtag(tag, check_already_closed=False)
# But we might encounter an explicit closing tag for this tag
# later on. If so, we want to ignore it.
self.already_closed_empty_element.append(name)
self.already_closed_empty_element.append(tag)
if self._root_tag_name is None:
self._root_tag_encountered(name)
self._root_tag_encountered(tag)
def handle_endtag(self, name: str, check_already_closed: bool = True) -> None:
def handle_endtag(self, tag: str, check_already_closed: bool = True) -> None:
"""Handle a closing tag, e.g. '</tag>'
:param name: A tag name.
:param tag: A tag name.
:param check_already_closed: True if this tag is expected to
be the closing portion of an empty-element tag,
e.g. '<tag></tag>'.
"""
# print("END", name)
if check_already_closed and name in self.already_closed_empty_element:
# print("END", tag)
if check_already_closed and tag in self.already_closed_empty_element:
# This is a redundant end tag for an empty-element tag.
# We've already called handle_endtag() for it, so just
# check it off the list.
# print("ALREADY CLOSED", name)
self.already_closed_empty_element.remove(name)
# print("ALREADY CLOSED", tag)
self.already_closed_empty_element.remove(tag)
else:
self.soup.handle_endtag(name)
self.soup.handle_endtag(tag)
def handle_data(self, data: str) -> None:
"""Handle some textual data that shows up between tags."""
@@ -291,14 +291,14 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
self.soup.handle_data(data)
self.soup.endData(Comment)
def handle_decl(self, data: str) -> None:
def handle_decl(self, decl: str) -> None:
"""Handle a DOCTYPE declaration.
:param data: The text of the declaration.
"""
self.soup.endData()
data = data[len("DOCTYPE ") :]
self.soup.handle_data(data)
decl = decl[len("DOCTYPE ") :]
self.soup.handle_data(decl)
self.soup.endData(Doctype)
def unknown_decl(self, data: str) -> None: