mirror of
https://git.launchpad.net/beautifulsoup
synced 2025-10-06 00:12:49 +02:00
Compare commits
26 Commits
c909c29d12
...
bf17d951d2
Author | SHA1 | Date | |
---|---|---|---|
|
bf17d951d2 | ||
|
cd548aaed4 | ||
|
f58228ff0b | ||
|
222984beae | ||
|
da76f83d8a | ||
|
a7cb7745f7 | ||
|
f723296e22 | ||
|
f320fa4fce | ||
|
23c15c2fb1 | ||
|
9bae5a1201 | ||
|
4c5cefec13 | ||
|
0e3199db71 | ||
|
d225e021b9 | ||
|
94408c845d | ||
|
7c06afd400 | ||
|
c1c6162581 | ||
|
4f44b052a1 | ||
|
70c7473e6c | ||
|
85f31a23af | ||
|
9e4707d639 | ||
|
e6603981f4 | ||
|
2b490a9495 | ||
|
cd24e0084b | ||
|
7f4b643e98 | ||
|
62566d8d48 | ||
|
6ba889aa56 |
41
CHANGELOG
41
CHANGELOG
@@ -1,4 +1,35 @@
|
||||
= 4.13.5 (Unreleased)
|
||||
= Unreleased
|
||||
|
||||
* This version adds function overloading to the find_* methods to make
|
||||
it easier to write type-safe Python.
|
||||
|
||||
In most cases you can just assign the result of a find() or
|
||||
find_all() call to the type of object you're expecting to get back:
|
||||
a Tag, a NavigableString, a Sequence[Tag], or a
|
||||
Sequence[NavigableString]. It's very rare that you'll have to do a
|
||||
cast like you did in previous versions of Beautiful Soup.
|
||||
|
||||
(In fact, the only time you should still have to use a cast is if
|
||||
you pass both 'string' and one of the other arguments into one of
|
||||
these methods, e.g. tag.find("a", string="tag contents".)
|
||||
|
||||
* The typing for find_parent() and find_parents() was improved without
|
||||
any overloading. Casts should never be necessary, since those
|
||||
methods only ever return Tag and ResultSet[Tag], respectively.
|
||||
|
||||
* ResultSet now inherits from Sequence. This should make it easier to
|
||||
incorporate ResultSets into your type system without needing to
|
||||
handle ResultSet specially.
|
||||
|
||||
* Fixed an unhandled exception when creating the string representation of
|
||||
a decomposed element. (The output is not *useful* and you still
|
||||
shouldn't do this, but it won't raise an exception anymore.) [bug=2120300]
|
||||
|
||||
* The default value for the 'attrs' attribute in find* methods is now
|
||||
None, not the empty dictionary. This should have no visible effect
|
||||
on anything.
|
||||
|
||||
= 4.13.5 (20250824)
|
||||
|
||||
* Fixed an unhandled exception when parsing invalid markup that contains the { character
|
||||
when using lxml==6.0.0. [bug=2116306]
|
||||
@@ -14,6 +45,11 @@
|
||||
* Used overloading to improve type hints for prettify().
|
||||
* Updated the SoupStrainer documentation to clarify that during initial
|
||||
parsing, attribute values are always passed into the SoupStrainer as raw strings. [bug=2111651]
|
||||
* Fixed all type checking errors issued by pyright. (Previously only mypy
|
||||
was used for type checking.)
|
||||
* Improved the type hints for PageElement.replace_with. [bug=2114746]
|
||||
* Improved the type hint for the arguments of the lambda function that can
|
||||
be used to match a tag's attribute. [bug=2110401]
|
||||
* Modified some of the lxml tests to accommodate behavioral changes in libxml2
|
||||
2.14.3. Specifically:
|
||||
|
||||
@@ -24,9 +60,6 @@
|
||||
|
||||
2. Out-of-range numeric entities are replaced with REPLACEMENT
|
||||
CHARACTER rather than omitted entirely. [bug=2112242]
|
||||
* Improved the type hints for PageElement.replace_with. [bug=2114746]
|
||||
* Improved the type hint for the arguments of the lambda function that can
|
||||
be used to match a tag's attribute. [bug=2110401]
|
||||
|
||||
= 4.13.4 (20250415)
|
||||
|
||||
|
@@ -15,7 +15,7 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
||||
"""
|
||||
|
||||
__author__ = "Leonard Richardson (leonardr@segfault.org)"
|
||||
__version__ = "4.13.4"
|
||||
__version__ = "4.13.5"
|
||||
__copyright__ = "Copyright (c) 2004-2025 Leonard Richardson"
|
||||
# Use of this source code is governed by the MIT license.
|
||||
__license__ = "MIT"
|
||||
@@ -101,6 +101,7 @@ from typing import (
|
||||
Iterator,
|
||||
List,
|
||||
Sequence,
|
||||
Sized,
|
||||
Optional,
|
||||
Type,
|
||||
Union,
|
||||
@@ -444,7 +445,7 @@ class BeautifulSoup(Tag):
|
||||
raise TypeError(
|
||||
f"Incoming markup is of an invalid type: {markup!r}. Markup must be a string, a bytestring, or an open filehandle."
|
||||
)
|
||||
elif len(markup) <= 256 and (
|
||||
elif isinstance(markup, Sized) and len(markup) <= 256 and (
|
||||
(isinstance(markup, bytes) and b"<" not in markup and b"\n" not in markup)
|
||||
or (isinstance(markup, str) and "<" not in markup and "\n" not in markup)
|
||||
):
|
||||
|
@@ -266,7 +266,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
|
||||
def getDocument(self) -> "BeautifulSoup":
|
||||
return self.soup
|
||||
|
||||
def testSerializer(self, element: "Element") -> None:
|
||||
def testSerializer(self, node: "Element") -> None:
|
||||
"""This is only used by the html5lib unit tests. Since we
|
||||
don't currently hook into those tests, the implementation is
|
||||
left blank.
|
||||
@@ -321,10 +321,21 @@ class AttrList(object):
|
||||
|
||||
|
||||
class BeautifulSoupNode(treebuilder_base.Node):
|
||||
element: PageElement
|
||||
# A node can correspond to _either_ a Tag _or_ a NavigableString.
|
||||
tag: Optional[Tag]
|
||||
string: Optional[NavigableString]
|
||||
soup: "BeautifulSoup"
|
||||
namespace: Optional[_NamespaceURL]
|
||||
|
||||
@property
|
||||
def element(self) -> PageElement:
|
||||
assert self.tag is not None or self.string is not None
|
||||
if self.tag is not None:
|
||||
return self.tag
|
||||
else:
|
||||
assert self.string is not None
|
||||
return self.string
|
||||
|
||||
@property
|
||||
def nodeType(self) -> int:
|
||||
"""Return the html5lib constant corresponding to the type of
|
||||
@@ -342,22 +353,24 @@ class BeautifulSoupNode(treebuilder_base.Node):
|
||||
|
||||
|
||||
class Element(BeautifulSoupNode):
|
||||
element: Tag
|
||||
namespace: Optional[_NamespaceURL]
|
||||
|
||||
def __init__(
|
||||
self, element: Tag, soup: "BeautifulSoup", namespace: Optional[_NamespaceURL]
|
||||
):
|
||||
treebuilder_base.Node.__init__(self, element.name)
|
||||
self.element = element
|
||||
self.tag = element
|
||||
self.string = None
|
||||
self.soup = soup
|
||||
self.namespace = namespace
|
||||
treebuilder_base.Node.__init__(self, element.name)
|
||||
|
||||
def appendChild(self, node: "BeautifulSoupNode") -> None:
|
||||
string_child: Optional[NavigableString] = None
|
||||
child: PageElement
|
||||
if type(node.element) is NavigableString:
|
||||
string_child = child = node.element
|
||||
if type(node.string) is NavigableString:
|
||||
# We check for NavigableString *only* because we want to avoid
|
||||
# joining PreformattedStrings, such as Comments, with nearby strings.
|
||||
string_child = child = node.string
|
||||
else:
|
||||
child = node.element
|
||||
node.parent = self
|
||||
@@ -371,13 +384,13 @@ class Element(BeautifulSoupNode):
|
||||
|
||||
if (
|
||||
string_child is not None
|
||||
and self.element.contents
|
||||
and type(self.element.contents[-1]) is NavigableString
|
||||
and self.tag is not None and self.tag.contents
|
||||
and type(self.tag.contents[-1]) is NavigableString
|
||||
):
|
||||
# We are appending a string onto another string.
|
||||
# TODO This has O(n^2) performance, for input like
|
||||
# "a</a>a</a>a</a>..."
|
||||
old_element = self.element.contents[-1]
|
||||
old_element = self.tag.contents[-1]
|
||||
new_element = self.soup.new_string(old_element + string_child)
|
||||
old_element.replace_with(new_element)
|
||||
self.soup._most_recent_element = new_element
|
||||
@@ -389,8 +402,8 @@ class Element(BeautifulSoupNode):
|
||||
# Tell Beautiful Soup to act as if it parsed this element
|
||||
# immediately after the parent's last descendant. (Or
|
||||
# immediately after the parent, if it has no children.)
|
||||
if self.element.contents:
|
||||
most_recent_element = self.element._last_descendant(False)
|
||||
if self.tag is not None and self.tag.contents:
|
||||
most_recent_element = self.tag._last_descendant(False)
|
||||
elif self.element.next_element is not None:
|
||||
# Something from further ahead in the parse tree is
|
||||
# being inserted into this earlier element. This is
|
||||
@@ -401,13 +414,12 @@ class Element(BeautifulSoupNode):
|
||||
most_recent_element = self.element
|
||||
|
||||
self.soup.object_was_parsed(
|
||||
child, parent=self.element, most_recent_element=most_recent_element
|
||||
child, parent=self.tag, most_recent_element=most_recent_element
|
||||
)
|
||||
|
||||
def getAttributes(self) -> AttrList:
|
||||
if isinstance(self.element, Comment):
|
||||
return {}
|
||||
return AttrList(self.element)
|
||||
assert self.tag is not None
|
||||
return AttrList(self.tag)
|
||||
|
||||
# An HTML5lib attribute name may either be a single string,
|
||||
# or a tuple (namespace, name).
|
||||
@@ -417,6 +429,7 @@ class Element(BeautifulSoupNode):
|
||||
_Html5libAttributes: TypeAlias = Dict[_Html5libAttributeName, str]
|
||||
|
||||
def setAttributes(self, attributes: Optional[_Html5libAttributes]) -> None:
|
||||
assert self.tag is not None
|
||||
if attributes is not None and len(attributes) > 0:
|
||||
# Replace any namespaced attributes with
|
||||
# NamespacedAttribute objects.
|
||||
@@ -439,14 +452,14 @@ class Element(BeautifulSoupNode):
|
||||
# Then set the attributes on the Tag associated with this
|
||||
# BeautifulSoupNode.
|
||||
for name, value_or_values in list(normalized_attributes.items()):
|
||||
self.element[name] = value_or_values
|
||||
self.tag[name] = value_or_values
|
||||
|
||||
# The attributes may contain variables that need substitution.
|
||||
# Call set_up_substitutions manually.
|
||||
#
|
||||
# The Tag constructor called this method when the Tag was created,
|
||||
# but we just set/changed the attributes, so call it again.
|
||||
self.soup.builder.set_up_substitutions(self.element)
|
||||
self.soup.builder.set_up_substitutions(self.tag)
|
||||
|
||||
attributes = property(getAttributes, setAttributes)
|
||||
|
||||
@@ -462,32 +475,35 @@ class Element(BeautifulSoupNode):
|
||||
def insertBefore(
|
||||
self, node: "BeautifulSoupNode", refNode: "BeautifulSoupNode"
|
||||
) -> None:
|
||||
index = self.element.index(refNode.element)
|
||||
assert self.tag is not None
|
||||
index = self.tag.index(refNode.element)
|
||||
if (
|
||||
type(node.element) is NavigableString
|
||||
and self.element.contents
|
||||
and type(self.element.contents[index - 1]) is NavigableString
|
||||
and self.tag.contents
|
||||
and type(self.tag.contents[index - 1]) is NavigableString
|
||||
):
|
||||
# (See comments in appendChild)
|
||||
old_node = self.element.contents[index - 1]
|
||||
old_node = self.tag.contents[index - 1]
|
||||
assert type(old_node) is NavigableString
|
||||
new_str = self.soup.new_string(old_node + node.element)
|
||||
old_node.replace_with(new_str)
|
||||
else:
|
||||
self.element.insert(index, node.element)
|
||||
self.tag.insert(index, node.element)
|
||||
node.parent = self
|
||||
|
||||
def removeChild(self, node: "Element") -> None:
|
||||
node.element.extract()
|
||||
|
||||
def reparentChildren(self, new_parent: "Element") -> None:
|
||||
def reparentChildren(self, newParent: "Element") -> None:
|
||||
"""Move all of this tag's children into another tag."""
|
||||
# print("MOVE", self.element.contents)
|
||||
# print("FROM", self.element)
|
||||
# print("TO", new_parent.element)
|
||||
|
||||
element = self.element
|
||||
new_parent_element = new_parent.element
|
||||
element = self.tag
|
||||
assert element is not None
|
||||
new_parent_element = newParent.tag
|
||||
assert new_parent_element is not None
|
||||
# Determine what this tag's next_element will be once all the children
|
||||
# are removed.
|
||||
final_next_element = element.next_sibling
|
||||
@@ -565,12 +581,13 @@ class Element(BeautifulSoupNode):
|
||||
# TODO-TYPING: typeshed stubs are incorrect about this;
|
||||
# hasContent returns a boolean, not None.
|
||||
def hasContent(self) -> bool: # type:ignore
|
||||
return len(self.element.contents) > 0
|
||||
return self.tag is None or len(self.tag.contents) > 0
|
||||
|
||||
# TODO-TYPING: typeshed stubs are incorrect about this;
|
||||
# cloneNode returns a new Node, not None.
|
||||
def cloneNode(self) -> treebuilder_base.Node: # type:ignore
|
||||
tag = self.soup.new_tag(self.element.name, self.namespace)
|
||||
assert self.tag is not None
|
||||
tag = self.soup.new_tag(self.tag.name, self.namespace)
|
||||
node = Element(tag, self.soup, self.namespace)
|
||||
for key, value in self.attributes:
|
||||
node.attributes[key] = value
|
||||
@@ -586,9 +603,9 @@ class Element(BeautifulSoupNode):
|
||||
|
||||
|
||||
class TextNode(BeautifulSoupNode):
|
||||
element: NavigableString
|
||||
|
||||
def __init__(self, element: NavigableString, soup: "BeautifulSoup"):
|
||||
treebuilder_base.Node.__init__(self, None)
|
||||
self.element = element
|
||||
self.tag = None
|
||||
self.string = element
|
||||
self.soup = soup
|
||||
|
@@ -125,7 +125,7 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
|
||||
raise ParserRejectedMarkup(message)
|
||||
|
||||
def handle_startendtag(
|
||||
self, name: str, attrs: List[Tuple[str, Optional[str]]]
|
||||
self, tag: str, attrs: List[Tuple[str, Optional[str]]]
|
||||
) -> None:
|
||||
"""Handle an incoming empty-element tag.
|
||||
|
||||
@@ -136,12 +136,12 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
|
||||
# just because its name matches a known empty-element tag. We
|
||||
# know that this is an empty-element tag, and we want to call
|
||||
# handle_endtag ourselves.
|
||||
self.handle_starttag(name, attrs, handle_empty_element=False)
|
||||
self.handle_endtag(name)
|
||||
self.handle_starttag(tag, attrs, handle_empty_element=False)
|
||||
self.handle_endtag(tag)
|
||||
|
||||
def handle_starttag(
|
||||
self,
|
||||
name: str,
|
||||
tag: str,
|
||||
attrs: List[Tuple[str, Optional[str]]],
|
||||
handle_empty_element: bool = True,
|
||||
) -> None:
|
||||
@@ -172,17 +172,17 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
|
||||
on_dupe(attr_dict, key, value)
|
||||
else:
|
||||
attr_dict[key] = value
|
||||
# print("START", name)
|
||||
# print("START", tag)
|
||||
sourceline: Optional[int]
|
||||
sourcepos: Optional[int]
|
||||
if self.soup.builder.store_line_numbers:
|
||||
sourceline, sourcepos = self.getpos()
|
||||
else:
|
||||
sourceline = sourcepos = None
|
||||
tag = self.soup.handle_starttag(
|
||||
name, None, None, attr_dict, sourceline=sourceline, sourcepos=sourcepos
|
||||
tagObj = self.soup.handle_starttag(
|
||||
tag, None, None, attr_dict, sourceline=sourceline, sourcepos=sourcepos
|
||||
)
|
||||
if tag and tag.is_empty_element and handle_empty_element:
|
||||
if tagObj is not None and tagObj.is_empty_element and handle_empty_element:
|
||||
# Unlike other parsers, html.parser doesn't send separate end tag
|
||||
# events for empty-element tags. (It's handled in
|
||||
# handle_startendtag, but only if the original markup looked like
|
||||
@@ -192,32 +192,32 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
|
||||
# know the start event is identical to the end event, we
|
||||
# don't want handle_endtag() to cross off any previous end
|
||||
# events for tags of this name.
|
||||
self.handle_endtag(name, check_already_closed=False)
|
||||
self.handle_endtag(tag, check_already_closed=False)
|
||||
|
||||
# But we might encounter an explicit closing tag for this tag
|
||||
# later on. If so, we want to ignore it.
|
||||
self.already_closed_empty_element.append(name)
|
||||
self.already_closed_empty_element.append(tag)
|
||||
|
||||
if self._root_tag_name is None:
|
||||
self._root_tag_encountered(name)
|
||||
self._root_tag_encountered(tag)
|
||||
|
||||
def handle_endtag(self, name: str, check_already_closed: bool = True) -> None:
|
||||
def handle_endtag(self, tag: str, check_already_closed: bool = True) -> None:
|
||||
"""Handle a closing tag, e.g. '</tag>'
|
||||
|
||||
:param name: A tag name.
|
||||
:param tag: A tag name.
|
||||
:param check_already_closed: True if this tag is expected to
|
||||
be the closing portion of an empty-element tag,
|
||||
e.g. '<tag></tag>'.
|
||||
"""
|
||||
# print("END", name)
|
||||
if check_already_closed and name in self.already_closed_empty_element:
|
||||
# print("END", tag)
|
||||
if check_already_closed and tag in self.already_closed_empty_element:
|
||||
# This is a redundant end tag for an empty-element tag.
|
||||
# We've already called handle_endtag() for it, so just
|
||||
# check it off the list.
|
||||
# print("ALREADY CLOSED", name)
|
||||
self.already_closed_empty_element.remove(name)
|
||||
# print("ALREADY CLOSED", tag)
|
||||
self.already_closed_empty_element.remove(tag)
|
||||
else:
|
||||
self.soup.handle_endtag(name)
|
||||
self.soup.handle_endtag(tag)
|
||||
|
||||
def handle_data(self, data: str) -> None:
|
||||
"""Handle some textual data that shows up between tags."""
|
||||
@@ -291,14 +291,14 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
|
||||
self.soup.handle_data(data)
|
||||
self.soup.endData(Comment)
|
||||
|
||||
def handle_decl(self, data: str) -> None:
|
||||
def handle_decl(self, decl: str) -> None:
|
||||
"""Handle a DOCTYPE declaration.
|
||||
|
||||
:param data: The text of the declaration.
|
||||
"""
|
||||
self.soup.endData()
|
||||
data = data[len("DOCTYPE ") :]
|
||||
self.soup.handle_data(data)
|
||||
decl = decl[len("DOCTYPE ") :]
|
||||
self.soup.handle_data(decl)
|
||||
self.soup.endData(Doctype)
|
||||
|
||||
def unknown_decl(self, data: str) -> None:
|
||||
|
@@ -98,7 +98,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
DEFAULT_NSMAPS_INVERTED: _InvertedNamespaceMapping = _invert(DEFAULT_NSMAPS)
|
||||
|
||||
nsmaps: List[Optional[_InvertedNamespaceMapping]]
|
||||
empty_element_tags: Set[str]
|
||||
empty_element_tags: Optional[Set[str]]
|
||||
parser: Any
|
||||
_default_parser: Optional[etree.XMLParser]
|
||||
|
||||
@@ -314,7 +314,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
def start(
|
||||
self,
|
||||
tag: str | bytes,
|
||||
attrs: Dict[str | bytes, str | bytes],
|
||||
attrib: Dict[str | bytes, str | bytes],
|
||||
nsmap: _NamespaceMapping = {},
|
||||
) -> None:
|
||||
# This is called by lxml code as a result of calling
|
||||
@@ -329,13 +329,13 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
# need a mutable dict--lxml might send us an immutable
|
||||
# dictproxy. Third, so we can handle namespaced attribute
|
||||
# names by converting the keys to NamespacedAttributes.
|
||||
new_attrs: Dict[Union[str, NamespacedAttribute], str] = (
|
||||
new_attrib: Dict[Union[str, NamespacedAttribute], str] = (
|
||||
self.attribute_dict_class()
|
||||
)
|
||||
for k, v in attrs.items():
|
||||
for k, v in attrib.items():
|
||||
assert isinstance(k, str)
|
||||
assert isinstance(v, str)
|
||||
new_attrs[k] = v
|
||||
new_attrib[k] = v
|
||||
|
||||
nsprefix: Optional[_NamespacePrefix] = None
|
||||
namespace: Optional[_NamespaceURL] = None
|
||||
@@ -375,20 +375,20 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
attribute = NamespacedAttribute(
|
||||
"xmlns", prefix, "http://www.w3.org/2000/xmlns/"
|
||||
)
|
||||
new_attrs[attribute] = namespace
|
||||
new_attrib[attribute] = namespace
|
||||
|
||||
# Namespaces are in play. Find any attributes that came in
|
||||
# from lxml with namespaces attached to their names, and
|
||||
# turn then into NamespacedAttribute objects.
|
||||
final_attrs: AttributeDict = self.attribute_dict_class()
|
||||
for attr, value in list(new_attrs.items()):
|
||||
final_attrib: AttributeDict = self.attribute_dict_class()
|
||||
for attr, value in list(new_attrib.items()):
|
||||
namespace, attr = self._getNsTag(attr)
|
||||
if namespace is None:
|
||||
final_attrs[attr] = value
|
||||
final_attrib[attr] = value
|
||||
else:
|
||||
nsprefix = self._prefix_for_namespace(namespace)
|
||||
attr = NamespacedAttribute(nsprefix, attr, namespace)
|
||||
final_attrs[attr] = value
|
||||
final_attrib[attr] = value
|
||||
|
||||
namespace, tag = self._getNsTag(tag)
|
||||
nsprefix = self._prefix_for_namespace(namespace)
|
||||
@@ -396,7 +396,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
tag,
|
||||
namespace,
|
||||
nsprefix,
|
||||
final_attrs,
|
||||
final_attrib,
|
||||
namespaces=self.active_namespace_prefixes[-1],
|
||||
)
|
||||
|
||||
@@ -411,18 +411,18 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
return inverted_nsmap[namespace]
|
||||
return None
|
||||
|
||||
def end(self, name: str | bytes) -> None:
|
||||
def end(self, tag: str | bytes) -> None:
|
||||
assert self.soup is not None
|
||||
assert isinstance(name, str)
|
||||
assert isinstance(tag, str)
|
||||
self.soup.endData()
|
||||
namespace, name = self._getNsTag(name)
|
||||
namespace, tag = self._getNsTag(tag)
|
||||
nsprefix = None
|
||||
if namespace is not None:
|
||||
for inverted_nsmap in reversed(self.nsmaps):
|
||||
if inverted_nsmap is not None and namespace in inverted_nsmap:
|
||||
nsprefix = inverted_nsmap[namespace]
|
||||
break
|
||||
self.soup.handle_endtag(name, nsprefix)
|
||||
self.soup.handle_endtag(tag, nsprefix)
|
||||
if len(self.nsmaps) > 1:
|
||||
# This tag, or one of its parents, introduced a namespace
|
||||
# mapping, so pop it off the stack.
|
||||
|
260
bs4/element.py
260
bs4/element.py
@@ -656,6 +656,7 @@ class PageElement(object):
|
||||
next_up = e.next_element
|
||||
e.__dict__.clear()
|
||||
if isinstance(e, Tag):
|
||||
e.name = ""
|
||||
e.contents = []
|
||||
e._decomposed = True
|
||||
e = next_up
|
||||
@@ -750,13 +751,33 @@ class PageElement(object):
|
||||
|
||||
return results
|
||||
|
||||
@overload
|
||||
def find_next( # pyright: ignore [reportOverlappingOverload]
|
||||
self,
|
||||
name: _FindMethodName = None,
|
||||
attrs: Optional[_StrainableAttributes] = None,
|
||||
string: None=None,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _AtMostOneTag:
|
||||
...
|
||||
|
||||
@overload
|
||||
def find_next(
|
||||
self,
|
||||
name: None=None,
|
||||
attrs: None=None,
|
||||
string: _StrainableString="",
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _AtMostOneNavigableString:
|
||||
...
|
||||
|
||||
def find_next(
|
||||
self,
|
||||
name: _FindMethodName = None,
|
||||
attrs: Optional[_StrainableAttributes] = None,
|
||||
string: Optional[_StrainableString] = None,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _AtMostOneElement:
|
||||
) -> Union[_AtMostOneTag,_AtMostOneNavigableString,_AtMostOneElement]:
|
||||
"""Find the first PageElement that matches the given criteria and
|
||||
appears later in the document than this PageElement.
|
||||
|
||||
@@ -772,15 +793,39 @@ class PageElement(object):
|
||||
|
||||
findNext = _deprecated_function_alias("findNext", "find_next", "4.0.0")
|
||||
|
||||
@overload
|
||||
def find_all_next( # pyright: ignore [reportOverlappingOverload]
|
||||
self,
|
||||
name: _FindMethodName = None,
|
||||
attrs: Optional[_StrainableAttributes] = None,
|
||||
string: None = None,
|
||||
limit: Optional[int] = None,
|
||||
_stacklevel: int = 2,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _SomeTags:
|
||||
...
|
||||
|
||||
@overload
|
||||
def find_all_next(
|
||||
self,
|
||||
name: None = None,
|
||||
attrs: None = None,
|
||||
string: _StrainableString = "",
|
||||
limit: Optional[int] = None,
|
||||
_stacklevel: int = 2,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _SomeNavigableStrings:
|
||||
...
|
||||
|
||||
def find_all_next(
|
||||
self,
|
||||
name: _FindMethodName = None,
|
||||
attrs: _StrainableAttributes = {},
|
||||
attrs: Optional[_StrainableAttributes] = None,
|
||||
string: Optional[_StrainableString] = None,
|
||||
limit: Optional[int] = None,
|
||||
_stacklevel: int = 2,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _QueryResults:
|
||||
) -> Union[_SomeTags,_SomeNavigableStrings,_QueryResults]:
|
||||
"""Find all `PageElement` objects that match the given criteria and
|
||||
appear later in the document than this `PageElement`.
|
||||
|
||||
@@ -806,13 +851,33 @@ class PageElement(object):
|
||||
|
||||
findAllNext = _deprecated_function_alias("findAllNext", "find_all_next", "4.0.0")
|
||||
|
||||
@overload
|
||||
def find_next_sibling( # pyright: ignore [reportOverlappingOverload]
|
||||
self,
|
||||
name: _FindMethodName = None,
|
||||
attrs: Optional[_StrainableAttributes] = None,
|
||||
string: None=None,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _AtMostOneTag:
|
||||
...
|
||||
|
||||
@overload
|
||||
def find_next_sibling(
|
||||
self,
|
||||
name: None=None,
|
||||
attrs: None=None,
|
||||
string: _StrainableString="",
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _AtMostOneNavigableString:
|
||||
...
|
||||
|
||||
def find_next_sibling(
|
||||
self,
|
||||
name: _FindMethodName = None,
|
||||
attrs: _StrainableAttributes = {},
|
||||
attrs: Optional[_StrainableAttributes] = None,
|
||||
string: Optional[_StrainableString] = None,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _AtMostOneElement:
|
||||
) -> Union[_AtMostOneTag,_AtMostOneNavigableString,_AtMostOneElement]:
|
||||
"""Find the closest sibling to this PageElement that matches the
|
||||
given criteria and appears later in the document.
|
||||
|
||||
@@ -830,15 +895,39 @@ class PageElement(object):
|
||||
"findNextSibling", "find_next_sibling", "4.0.0"
|
||||
)
|
||||
|
||||
@overload
|
||||
def find_next_siblings( # pyright: ignore [reportOverlappingOverload]
|
||||
self,
|
||||
name: _FindMethodName = None,
|
||||
attrs: Optional[_StrainableAttributes] = None,
|
||||
string: None = None,
|
||||
limit: Optional[int] = None,
|
||||
_stacklevel: int = 2,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _SomeTags:
|
||||
...
|
||||
|
||||
@overload
|
||||
def find_next_siblings(
|
||||
self,
|
||||
name: None = None,
|
||||
attrs: None = None,
|
||||
string: _StrainableString = "",
|
||||
limit: Optional[int] = None,
|
||||
_stacklevel: int = 2,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _SomeNavigableStrings:
|
||||
...
|
||||
|
||||
def find_next_siblings(
|
||||
self,
|
||||
name: _FindMethodName = None,
|
||||
attrs: _StrainableAttributes = {},
|
||||
attrs: Optional[_StrainableAttributes] = None,
|
||||
string: Optional[_StrainableString] = None,
|
||||
limit: Optional[int] = None,
|
||||
_stacklevel: int = 2,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _QueryResults:
|
||||
) -> Union[_SomeTags,_SomeNavigableStrings,_QueryResults]:
|
||||
"""Find all siblings of this `PageElement` that match the given criteria
|
||||
and appear later in the document.
|
||||
|
||||
@@ -869,13 +958,33 @@ class PageElement(object):
|
||||
"fetchNextSiblings", "find_next_siblings", "3.0.0"
|
||||
)
|
||||
|
||||
@overload
|
||||
def find_previous( # pyright: ignore [reportOverlappingOverload]
|
||||
self,
|
||||
name: _FindMethodName = None,
|
||||
attrs: Optional[_StrainableAttributes] = None,
|
||||
string: None=None,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _AtMostOneTag:
|
||||
...
|
||||
|
||||
@overload
|
||||
def find_previous(
|
||||
self,
|
||||
name: None=None,
|
||||
attrs: None=None,
|
||||
string: _StrainableString="",
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _AtMostOneNavigableString:
|
||||
...
|
||||
|
||||
def find_previous(
|
||||
self,
|
||||
name: _FindMethodName = None,
|
||||
attrs: _StrainableAttributes = {},
|
||||
attrs: Optional[_StrainableAttributes] = None,
|
||||
string: Optional[_StrainableString] = None,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _AtMostOneElement:
|
||||
) -> Union[_AtMostOneTag,_AtMostOneNavigableString,_AtMostOneElement]:
|
||||
"""Look backwards in the document from this `PageElement` and find the
|
||||
first `PageElement` that matches the given criteria.
|
||||
|
||||
@@ -891,15 +1000,39 @@ class PageElement(object):
|
||||
|
||||
findPrevious = _deprecated_function_alias("findPrevious", "find_previous", "3.0.0")
|
||||
|
||||
@overload
|
||||
def find_all_previous( # pyright: ignore [reportOverlappingOverload]
|
||||
self,
|
||||
name: _FindMethodName = None,
|
||||
attrs: Optional[_StrainableAttributes] = None,
|
||||
string: None = None,
|
||||
limit: Optional[int] = None,
|
||||
_stacklevel: int = 2,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _SomeTags:
|
||||
...
|
||||
|
||||
@overload
|
||||
def find_all_previous(
|
||||
self,
|
||||
name: None = None,
|
||||
attrs: None = None,
|
||||
string: _StrainableString = "",
|
||||
limit: Optional[int] = None,
|
||||
_stacklevel: int = 2,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _SomeNavigableStrings:
|
||||
...
|
||||
|
||||
def find_all_previous(
|
||||
self,
|
||||
name: _FindMethodName = None,
|
||||
attrs: _StrainableAttributes = {},
|
||||
attrs: Optional[_StrainableAttributes] = None,
|
||||
string: Optional[_StrainableString] = None,
|
||||
limit: Optional[int] = None,
|
||||
_stacklevel: int = 2,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _QueryResults:
|
||||
) -> Union[_SomeTags,_SomeNavigableStrings,_QueryResults]:
|
||||
"""Look backwards in the document from this `PageElement` and find all
|
||||
`PageElement` that match the given criteria.
|
||||
|
||||
@@ -930,13 +1063,33 @@ class PageElement(object):
|
||||
"fetchAllPrevious", "find_all_previous", "3.0.0"
|
||||
)
|
||||
|
||||
@overload
|
||||
def find_previous_sibling( # pyright: ignore [reportOverlappingOverload]
|
||||
self,
|
||||
name: _FindMethodName = None,
|
||||
attrs: Optional[_StrainableAttributes] = None,
|
||||
string: None=None,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _AtMostOneTag:
|
||||
...
|
||||
|
||||
@overload
|
||||
def find_previous_sibling(
|
||||
self,
|
||||
name: None=None,
|
||||
attrs: None=None,
|
||||
string: _StrainableString="",
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _AtMostOneNavigableString:
|
||||
...
|
||||
|
||||
def find_previous_sibling(
|
||||
self,
|
||||
name: _FindMethodName = None,
|
||||
attrs: _StrainableAttributes = {},
|
||||
attrs: Optional[_StrainableAttributes] = None,
|
||||
string: Optional[_StrainableString] = None,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _AtMostOneElement:
|
||||
) -> Union[_AtMostOneTag,_AtMostOneNavigableString,_AtMostOneElement]:
|
||||
"""Returns the closest sibling to this `PageElement` that matches the
|
||||
given criteria and appears earlier in the document.
|
||||
|
||||
@@ -956,15 +1109,39 @@ class PageElement(object):
|
||||
"findPreviousSibling", "find_previous_sibling", "4.0.0"
|
||||
)
|
||||
|
||||
@overload
|
||||
def find_previous_siblings( # pyright: ignore [reportOverlappingOverload]
|
||||
self,
|
||||
name: _FindMethodName = None,
|
||||
attrs: Optional[_StrainableAttributes] = None,
|
||||
string: None = None,
|
||||
limit: Optional[int] = None,
|
||||
_stacklevel: int = 2,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _SomeTags:
|
||||
...
|
||||
|
||||
@overload
|
||||
def find_previous_siblings(
|
||||
self,
|
||||
name: None = None,
|
||||
attrs: None = None,
|
||||
string: _StrainableString = "",
|
||||
limit: Optional[int] = None,
|
||||
_stacklevel: int = 2,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _SomeNavigableStrings:
|
||||
...
|
||||
|
||||
def find_previous_siblings(
|
||||
self,
|
||||
name: _FindMethodName = None,
|
||||
attrs: _StrainableAttributes = {},
|
||||
attrs: Optional[_StrainableAttributes] = None,
|
||||
string: Optional[_StrainableString] = None,
|
||||
limit: Optional[int] = None,
|
||||
_stacklevel: int = 2,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _QueryResults:
|
||||
) -> Union[_SomeTags,_SomeNavigableStrings,_QueryResults]:
|
||||
"""Returns all siblings to this PageElement that match the
|
||||
given criteria and appear earlier in the document.
|
||||
|
||||
@@ -998,9 +1175,9 @@ class PageElement(object):
|
||||
def find_parent(
|
||||
self,
|
||||
name: _FindMethodName = None,
|
||||
attrs: _StrainableAttributes = {},
|
||||
attrs: Optional[_StrainableAttributes] = None,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _AtMostOneElement:
|
||||
) -> _AtMostOneTag:
|
||||
"""Find the closest parent of this PageElement that matches the given
|
||||
criteria.
|
||||
|
||||
@@ -1028,11 +1205,11 @@ class PageElement(object):
|
||||
def find_parents(
|
||||
self,
|
||||
name: _FindMethodName = None,
|
||||
attrs: _StrainableAttributes = {},
|
||||
attrs: Optional[_StrainableAttributes] = None,
|
||||
limit: Optional[int] = None,
|
||||
_stacklevel: int = 2,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> _QueryResults:
|
||||
) -> _SomeTags:
|
||||
"""Find all parents of this `PageElement` that match the given criteria.
|
||||
|
||||
All find_* methods take a common set of arguments. See the online
|
||||
@@ -1045,9 +1222,11 @@ class PageElement(object):
|
||||
:kwargs: Additional filters on attribute values.
|
||||
"""
|
||||
iterator = self.parents
|
||||
return self._find_all(
|
||||
# Only Tags can have children, so this ResultSet will contain
|
||||
# nothing but Tags.
|
||||
return cast(ResultSet[Tag], self._find_all(
|
||||
name, attrs, None, limit, iterator, _stacklevel=_stacklevel + 1, **kwargs
|
||||
)
|
||||
))
|
||||
|
||||
findParents = _deprecated_function_alias("findParents", "find_parents", "4.0.0")
|
||||
fetchParents = _deprecated_function_alias("fetchParents", "find_parents", "3.0.0")
|
||||
@@ -2244,7 +2423,7 @@ class Tag(PageElement):
|
||||
self.attrs.pop(key, None)
|
||||
|
||||
@overload
|
||||
def __call__(
|
||||
def __call__( # pyright: ignore [reportOverlappingOverload]
|
||||
self,
|
||||
name: _FindMethodName = None,
|
||||
attrs: Optional[_StrainableAttributes] = None,
|
||||
@@ -2278,14 +2457,15 @@ class Tag(PageElement):
|
||||
limit: Optional[int] = None,
|
||||
_stacklevel: int = 2,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> Union[_SomeTags|_SomeNavigableStrings|_QueryResults]:
|
||||
) -> Union[_SomeTags,_SomeNavigableStrings,_QueryResults]:
|
||||
"""Calling a Tag like a function is the same as calling its
|
||||
find_all() method. Eg. tag('a') returns a list of all the A tags
|
||||
found within this tag."""
|
||||
if string is not None and (name is not None or attrs is not None or kwargs):
|
||||
# This is the version that can't be expressed using the @overload
|
||||
# decorator--searching for a mixed list of tags and strings.
|
||||
return self.find_all(name, attrs, recursive, string, limit, _stacklevel, **kwargs) #type: ignore
|
||||
# TODO: Using the @overload decorator to express the three ways you
|
||||
# could get into this path is way too much code for a rarely(?) used
|
||||
# feature.
|
||||
return cast(ResultSet[Tag], self.find_all(name, attrs, recursive, string, limit, _stacklevel, **kwargs)) #type: ignore
|
||||
|
||||
if string is None:
|
||||
# If string is None, we're searching for tags.
|
||||
@@ -2295,7 +2475,7 @@ class Tag(PageElement):
|
||||
return tags
|
||||
|
||||
# Otherwise, we're searching for strings.
|
||||
strings = self.find_all(
|
||||
strings:ResultSet[NavigableString] = self.find_all(
|
||||
None, None, recursive, string, limit, _stacklevel, **kwargs
|
||||
)
|
||||
return strings
|
||||
@@ -2779,7 +2959,7 @@ class Tag(PageElement):
|
||||
recursive: bool = True,
|
||||
string: Optional[_StrainableString] = None,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> Union[_AtMostOneTag|_AtMostOneNavigableString|_AtMostOneElement]:
|
||||
) -> Union[_AtMostOneTag,_AtMostOneNavigableString,_AtMostOneElement]:
|
||||
"""Look in the children of this PageElement and find the first
|
||||
PageElement that matches the given criteria.
|
||||
|
||||
@@ -2795,11 +2975,12 @@ class Tag(PageElement):
|
||||
:kwargs: Additional filters on attribute values.
|
||||
"""
|
||||
if string is not None and (name is not None or attrs is not None or kwargs):
|
||||
# This is the version that can't be expressed using the @overload
|
||||
# decorator--searching for a mixed list of tags and strings.
|
||||
# TODO: Using the @overload decorator to express the three ways you
|
||||
# could get into this path is way too much code for a rarely(?) used
|
||||
# feature.
|
||||
elements = self.find_all(name, attrs, recursive, string, 1, _stacklevel=3, **kwargs) # type:ignore
|
||||
if elements:
|
||||
return cast(PageElement, elements[0])
|
||||
return cast(Tag, elements[0])
|
||||
elif string is None:
|
||||
tags = self.find_all(name, attrs, recursive, None, 1, _stacklevel=3, **kwargs)
|
||||
if tags:
|
||||
@@ -2813,7 +2994,7 @@ class Tag(PageElement):
|
||||
findChild = _deprecated_function_alias("findChild", "find", "3.0.0")
|
||||
|
||||
@overload
|
||||
def find_all(
|
||||
def find_all( # pyright: ignore [reportOverlappingOverload]
|
||||
self,
|
||||
name: _FindMethodName = None,
|
||||
attrs: Optional[_StrainableAttributes] = None,
|
||||
@@ -2847,7 +3028,7 @@ class Tag(PageElement):
|
||||
limit: Optional[int] = None,
|
||||
_stacklevel: int = 2,
|
||||
**kwargs: _StrainableAttribute,
|
||||
) -> Union[_SomeTags|_SomeNavigableStrings|_QueryResults]:
|
||||
) -> Union[_SomeTags,_SomeNavigableStrings,_QueryResults]:
|
||||
"""Look in the children of this `PageElement` and find all
|
||||
`PageElement` objects that match the given criteria.
|
||||
|
||||
@@ -2869,10 +3050,13 @@ class Tag(PageElement):
|
||||
_stacklevel += 1
|
||||
|
||||
if string is not None and (name is not None or attrs is not None or kwargs):
|
||||
# This is the version that can't be expressed using the
|
||||
# @overload decorator--searching for a mixed list of strings and tags.
|
||||
return self._find_all(name, attrs, string, limit, generator,
|
||||
_stacklevel=_stacklevel, **kwargs)
|
||||
# TODO: Using the @overload decorator to express the three ways you
|
||||
# could get into this path is way too much code for a rarely(?) used
|
||||
# feature.
|
||||
return cast(ResultSet[Tag],
|
||||
self._find_all(name, attrs, string, limit, generator,
|
||||
_stacklevel=_stacklevel, **kwargs)
|
||||
)
|
||||
|
||||
if string is None:
|
||||
# If string is None, we're searching for tags.
|
||||
@@ -3000,7 +3184,7 @@ _PageElementT = TypeVar("_PageElementT", bound=PageElement)
|
||||
|
||||
|
||||
class ResultSet(Sequence[_PageElementT], Generic[_PageElementT]):
|
||||
"""A ResultSet is a list of `PageElement` objects, gathered as the result
|
||||
"""A ResultSet is a sequence of `PageElement` objects, gathered as the result
|
||||
of matching an :py:class:`ElementFilter` against a parse tree. Basically, a list of
|
||||
search results.
|
||||
"""
|
||||
|
@@ -1378,6 +1378,8 @@ class TestTreeModification(SoupTest):
|
||||
# p2 is unaffected.
|
||||
assert False is p2.decomposed
|
||||
|
||||
assert "<></>" == str(p1)
|
||||
|
||||
def test_decompose_string(self):
|
||||
soup = self.soup("<div><p>String 1</p><p>String 2</p></p>")
|
||||
div = soup.div
|
||||
@@ -1386,6 +1388,7 @@ class TestTreeModification(SoupTest):
|
||||
text.decompose()
|
||||
assert True is text.decomposed
|
||||
assert "<div><p></p><p>String 2</p></div>" == div.decode()
|
||||
assert "String 1" == str(text)
|
||||
|
||||
def test_string_set(self):
|
||||
"""Tag.string = 'string'"""
|
||||
|
@@ -16,7 +16,7 @@ with examples. I show you what the library is good for, how it works,
|
||||
how to use it, how to make it do what you want, and what to do when it
|
||||
violates your expectations.
|
||||
|
||||
This document covers Beautiful Soup version 4.13.4. The examples in
|
||||
This document covers Beautiful Soup version 4.13.5. The examples in
|
||||
this documentation were written for Python 3.8.
|
||||
|
||||
You might be looking for the documentation for `Beautiful Soup 3
|
||||
@@ -3228,6 +3228,42 @@ example can give different results every time you run it, thanks
|
||||
to the random element. It's very unlikely, but this function could
|
||||
wander around the parse tree forever and *never* complete.)
|
||||
|
||||
.. py:method:: ElementFilter.filter_tags()
|
||||
|
||||
The :py:meth:`ElementFilter.filter_tags()` method works exactly like
|
||||
the :py:meth:`ElementFilter.filter()` method, but it is constrained to
|
||||
only yield `Tag` objects. Anything it encounters that is not a `Tag`
|
||||
will just be ignored. This can be useful if you're writing type-safe
|
||||
Python and want to be able to process the results as `Tag` objects
|
||||
without doing a cast::
|
||||
|
||||
from typing import List
|
||||
tags:List[Tag]
|
||||
|
||||
# This will give a mypy error:
|
||||
# List comprehension has incompatible type List[PageElement | Tag | NavigableString]; expected List[Tag]
|
||||
tags = [x for x in non_whitespace_filter.filter(random_walk(soup.b))]
|
||||
|
||||
# This will not give a mypy error:
|
||||
tags = [x for x in non_whitespace_filter.filter_tags(random_walk(soup.b))]
|
||||
|
||||
.. py:method:: ElementFilter.filter_strings()
|
||||
|
||||
The :py:meth:`ElementFilter.filter_strings()` method is also designed
|
||||
for use in type-safe Python. It works like
|
||||
:py:meth:`ElementFilter.filter_tags()` but it's constrained to only
|
||||
yield `NavigableString` objects::
|
||||
|
||||
strings:List[NavigableString]
|
||||
|
||||
# This will give a mypy error:
|
||||
# List comprehension has incompatible type List[PageElement | Tag | NavigableString]; expected List[NavigableString]
|
||||
strings = [x for x in non_whitespace_filter.filter(random_walk(soup.b))]
|
||||
|
||||
# This will not give a mypy error:
|
||||
strings = [x for x in non_whitespace_filter.filter_strings(random_walk(soup.b))]
|
||||
|
||||
|
||||
Advanced parser customization
|
||||
=============================
|
||||
|
||||
|
@@ -23,12 +23,13 @@ tox run-parallel
|
||||
# Build sdist and wheel.
|
||||
hatch build
|
||||
|
||||
# Install the sdist locally and run unit tests.
|
||||
# Install the sdist locally and run unit tests. Note that we run the tests from the original source tree because
|
||||
# the tests are no longer installed (tarball) or packaged (wheel)
|
||||
pyenv virtualenv-delete -f py3-install-test-virtualenv
|
||||
pyenv virtualenv 3.13.1 py3-install-test-virtualenv
|
||||
pyenv activate py3-install-test-virtualenv
|
||||
pip install dist/beautifulsoup4-*.tar.gz pytest lxml html5lib soupsieve
|
||||
python -m pytest ~/.pyenv/versions/3.13.*/envs/py3-install-test-virtualenv/lib/python3.13/site-packages/bs4/tests
|
||||
python -m pytest bs4/tests
|
||||
echo "EXPECT HTML ON LINE BELOW"
|
||||
(cd .. && python --version && python -c "from bs4 import _s, __version__; print(__version__, _s('<a>foo', 'lxml'))")
|
||||
# That should print something like:
|
||||
@@ -36,11 +37,10 @@ echo "EXPECT HTML ON LINE BELOW"
|
||||
# [new version number] <a>foo</a>
|
||||
|
||||
|
||||
# Install the wheel locally and test basic functionality. Note that we
|
||||
# don't run the unit tests because tests are not included with the
|
||||
# wheel.
|
||||
# Install the wheel locally and test basic functionality.
|
||||
pip uninstall beautifulsoup4
|
||||
pip install dist/beautifulsoup4-*.whl
|
||||
python -m pytest bs4/tests
|
||||
echo "EXPECT HTML ON LINE BELOW"
|
||||
(cd .. && python --version && python -c "from bs4 import _s, __version__; print(__version__, _s('<a>foo', 'lxml'))")
|
||||
|
||||
@@ -58,7 +58,7 @@ pip install pytest lxml html5lib soupsieve typing-extensions hatchling
|
||||
|
||||
# First, install from source and run the tests.
|
||||
pip install -i https://test.pypi.org/simple/ beautifulsoup4 --extra-index-url=https://pypi.python.org/pypi --no-binary beautifulsoup4
|
||||
python -m pytest ~/.pyenv/versions/py3-install-test-virtualenv/lib/python3.13/site-packages/bs4/
|
||||
python -m pytest bs4/tests
|
||||
echo "EXPECT HTML ON LINE BELOW"
|
||||
(cd .. && which python && python -c "from bs4 import _s, __version__; print(__version__, _s('<a>foo', 'lxml'))")
|
||||
# That should print something like:
|
||||
@@ -68,6 +68,7 @@ echo "EXPECT HTML ON LINE BELOW"
|
||||
# Next, install the wheel and test basic functionality.
|
||||
pip uninstall beautifulsoup4
|
||||
pip install -i https://test.pypi.org/simple/ beautifulsoup4 --extra-index-url=https://pypi.python.org/pypi --no-binary beautifulsoup4
|
||||
python -m pytest bs4/tests
|
||||
echo "EXPECT HTML ON LINE BELOW"
|
||||
(cd .. && which python && python -c "from bs4 import _s, __version__; print(__version__, _s('<a>foo', 'lxml'))")
|
||||
# That should print something like:
|
||||
@@ -88,7 +89,7 @@ pyenv virtualenv py3-install-test-virtualenv
|
||||
pyenv activate py3-install-test-virtualenv
|
||||
|
||||
pip install pytest lxml html5lib beautifulsoup4 --no-binary beautifulsoup4
|
||||
python -m pytest ~/.pyenv/versions/py3-install-test-virtualenv/lib/python3.*/site-packages/bs4/
|
||||
python -m pytest bs4/tests
|
||||
echo "EXPECT HTML ON LINE BELOW"
|
||||
(cd .. && which python && python -c "from bs4 import _s, __version__; print(__version__, _s('<a>foo', 'html.parser'))")
|
||||
# That should print something like:
|
||||
@@ -98,6 +99,7 @@ echo "EXPECT HTML ON LINE BELOW"
|
||||
# Next, from the wheel
|
||||
pip uninstall beautifulsoup4
|
||||
pip install beautifulsoup4
|
||||
python -m pytest bs4/tests
|
||||
echo "EXPECT HTML ON LINE BELOW"
|
||||
(cd .. && which python && python -c "from bs4 import _s, __version__; print(__version__, _s('<a>foo', 'html.parser'))")
|
||||
# That should print something like:
|
||||
@@ -106,3 +108,12 @@ echo "EXPECT HTML ON LINE BELOW"
|
||||
|
||||
# Cleanup
|
||||
pyenv virtualenv-delete -f py3-install-test-virtualenv
|
||||
|
||||
# Copy source distro
|
||||
scp dist/beautifulsoup4-*.tar.gz crummy.com:public_html/software/BeautifulSoup/bs4/download/4.13/
|
||||
|
||||
# Refresh docs
|
||||
cd doc
|
||||
make clean; rm -rf _build build
|
||||
make html
|
||||
rsync -avp _build/html/* crummy.com:public_html/software/BeautifulSoup/bs4/doc/
|
||||
|
Reference in New Issue
Block a user