mirror of
https://git.launchpad.net/beautifulsoup
synced 2025-10-06 00:12:49 +02:00
Applied the unsafe fixes from ruff check, some of which indicated that tests weren't running properly to begin with.
This commit is contained in:
@@ -528,7 +528,6 @@ class BeautifulSoup(Tag):
|
||||
"""
|
||||
problem: bool = False
|
||||
if isinstance(markup, bytes):
|
||||
cant_start_with_b: Tuple[bytes, bytes] = (b"http:", b"https:")
|
||||
problem = (
|
||||
any(markup.startswith(prefix) for prefix in (b"http:", b"https:"))
|
||||
and b" " not in markup
|
||||
@@ -1073,7 +1072,7 @@ class BeautifulSoup(Tag):
|
||||
# go into an XML document because it means nothing
|
||||
# outside of Python.
|
||||
declared_encoding = None
|
||||
if declared_encoding != None:
|
||||
if declared_encoding is not None:
|
||||
encoding_part = ' encoding="%s"' % declared_encoding
|
||||
prefix = '<?xml version="1.0"%s?>\n' % encoding_part
|
||||
else:
|
||||
|
@@ -620,7 +620,6 @@ class HTMLTreeBuilder(TreeBuilder):
|
||||
"link": {"rel", "rev"},
|
||||
"td": {"headers"},
|
||||
"th": {"headers"},
|
||||
"td": {"headers"},
|
||||
"form": {"accept-charset"},
|
||||
"object": {"archive"},
|
||||
# These are HTML5 specific, as are *.accesskey and *.dropzone above.
|
||||
@@ -675,7 +674,6 @@ class HTMLTreeBuilder(TreeBuilder):
|
||||
if charset is not None:
|
||||
# HTML 5 style:
|
||||
# <meta charset="utf8">
|
||||
meta_encoding = charset
|
||||
tag["charset"] = CharsetMetaAttributeValue(charset)
|
||||
substituted = True
|
||||
|
||||
|
@@ -577,7 +577,7 @@ class Element(BeautifulSoupNode):
|
||||
return node
|
||||
|
||||
def getNameTuple(self) -> Tuple[Optional[_NamespaceURL], str]:
|
||||
if self.namespace == None:
|
||||
if self.namespace is None:
|
||||
return namespaces["html"], self.name
|
||||
else:
|
||||
return self.namespace, self.name
|
||||
|
@@ -174,7 +174,6 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
|
||||
on_dupe(attr_dict, key, value)
|
||||
else:
|
||||
attr_dict[key] = value
|
||||
attrvalue = '""'
|
||||
# print("START", name)
|
||||
if self.soup.builder.store_line_numbers:
|
||||
sourceline, sourcepos = self.getpos()
|
||||
@@ -421,7 +420,6 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
|
||||
# lower-priority user encoding.
|
||||
user_encodings.append(document_declared_encoding)
|
||||
|
||||
try_encodings = [user_specified_encoding, document_declared_encoding]
|
||||
dammit = UnicodeDammit(
|
||||
markup,
|
||||
known_definite_encodings=known_definite_encodings,
|
||||
|
@@ -413,7 +413,6 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
assert self.soup is not None
|
||||
assert isinstance(name, str)
|
||||
self.soup.endData()
|
||||
completed_tag = self.soup.tagStack[-1]
|
||||
namespace, name = self._getNsTag(name)
|
||||
nsprefix = None
|
||||
if namespace is not None:
|
||||
|
@@ -280,7 +280,7 @@ class EntitySubstitution(object):
|
||||
"""Used with a regular expression to substitute the
|
||||
appropriate HTML entity for a special character string."""
|
||||
entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0))
|
||||
if entity == None:
|
||||
if entity is None:
|
||||
return "&%s;" % original_entity
|
||||
return "&%s;" % entity
|
||||
|
||||
|
@@ -223,7 +223,7 @@ def benchmark_parsers(num_elements: int = 100000) -> None:
|
||||
success = False
|
||||
try:
|
||||
a = time.time()
|
||||
soup = BeautifulSoup(data, parser_name)
|
||||
BeautifulSoup(data, parser_name)
|
||||
b = time.time()
|
||||
success = True
|
||||
except Exception:
|
||||
|
@@ -324,7 +324,7 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
|
||||
CHARSET_RE: Pattern[str] = re.compile(r"((^|;)\s*charset=)([^;]*)", re.M)
|
||||
|
||||
def __new__(cls, original_value: str) -> Self:
|
||||
match = cls.CHARSET_RE.search(original_value)
|
||||
cls.CHARSET_RE.search(original_value)
|
||||
obj = str.__new__(cls, original_value)
|
||||
obj.original_value = original_value
|
||||
return obj
|
||||
@@ -1409,7 +1409,7 @@ class PreformattedString(NavigableString):
|
||||
suffix added on.
|
||||
"""
|
||||
if formatter is not None:
|
||||
ignore = self.format_string(self, formatter)
|
||||
self.format_string(self, formatter)
|
||||
return self.PREFIX + self + self.SUFFIX
|
||||
|
||||
|
||||
|
@@ -334,9 +334,12 @@ class TreeBuilderSmokeTest(SoupTest):
|
||||
assert soup.a["class"] == ["a", "b", "c"]
|
||||
|
||||
def test_invalid_doctype(self):
|
||||
# We don't have an official opinion on how these are parsed,
|
||||
# but they shouldn't crash any of the parsers.
|
||||
markup = "<![if word]>content<![endif]>"
|
||||
self.soup(markup)
|
||||
markup = "<!DOCTYPE html]ff>"
|
||||
soup = self.soup(markup)
|
||||
self.soup(markup)
|
||||
|
||||
def test_doctype_filtered(self):
|
||||
markup = "<!DOCTYPE html>\n<html>\n</html>"
|
||||
@@ -415,7 +418,7 @@ class HTMLTreeBuilderSmokeTest(TreeBuilderSmokeTest):
|
||||
]:
|
||||
soup = self.soup("")
|
||||
new_tag = soup.new_tag(name)
|
||||
assert new_tag.is_empty_element == True
|
||||
assert new_tag.is_empty_element is True
|
||||
|
||||
self.assert_soup("<br/><br/><br/>", "<br/><br/><br/>")
|
||||
self.assert_soup("<br /><br /><br />", "<br/><br/><br/>")
|
||||
@@ -666,7 +669,7 @@ Hello, world!
|
||||
self.assert_soup(nested_b_tag)
|
||||
|
||||
double_nested_b_tag = "<p>A <a>doubly <i>nested <b>tag</b></i></a></p>"
|
||||
self.assert_soup(nested_b_tag)
|
||||
self.assert_soup(double_nested_b_tag)
|
||||
|
||||
def test_nested_block_level_elements(self):
|
||||
"""Block elements can be nested."""
|
||||
@@ -837,7 +840,6 @@ Hello, world!
|
||||
markup = b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:mathml="http://www.w3.org/1998/Math/MathML" xmlns:svg="http://www.w3.org/2000/svg"><head></head><body><mathml:msqrt>4</mathml:msqrt><b svg:fill="red"></b></body></html>'
|
||||
soup = self.soup(markup)
|
||||
assert markup == soup.encode()
|
||||
html = soup.html
|
||||
assert "http://www.w3.org/1999/xhtml" == soup.html["xmlns"]
|
||||
assert "http://www.w3.org/1998/Math/MathML" == soup.html["xmlns:mathml"]
|
||||
assert "http://www.w3.org/2000/svg" == soup.html["xmlns:svg"]
|
||||
|
@@ -45,7 +45,7 @@ class TestBuiltInRegistry(object):
|
||||
assert registry.lookup("html") == LXMLTreeBuilder
|
||||
assert registry.lookup("xml") == LXMLTreeBuilderForXML
|
||||
else:
|
||||
assert registry.lookup("xml") == None
|
||||
assert registry.lookup("xml") is None
|
||||
if HTML5LIB_PRESENT:
|
||||
assert registry.lookup("html") == HTML5TreeBuilder
|
||||
else:
|
||||
@@ -61,7 +61,7 @@ class TestBuiltInRegistry(object):
|
||||
assert registry.lookup("html.parser") == HTMLParserTreeBuilder
|
||||
|
||||
def test_beautifulsoup_constructor_does_lookup(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
with warnings.catch_warnings(record=True):
|
||||
# This will create a warning about not explicitly
|
||||
# specifying a parser, but we'll ignore it.
|
||||
|
||||
@@ -108,11 +108,10 @@ class TestRegistry(object):
|
||||
assert self.registry.lookup("bar") is builder
|
||||
|
||||
def test_lookup_fails_when_no_builder_implements_feature(self):
|
||||
builder = self.builder_for_features("foo", "bar")
|
||||
assert self.registry.lookup("baz") is None
|
||||
|
||||
def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
|
||||
builder1 = self.builder_for_features("foo")
|
||||
self.builder_for_features("foo")
|
||||
builder2 = self.builder_for_features("bar")
|
||||
assert self.registry.lookup() == builder2
|
||||
|
||||
@@ -120,12 +119,12 @@ class TestRegistry(object):
|
||||
assert self.registry.lookup() is None
|
||||
|
||||
def test_lookup_gets_most_recent_builder_supporting_all_features(self):
|
||||
has_one = self.builder_for_features("foo")
|
||||
has_the_other = self.builder_for_features("bar")
|
||||
self.builder_for_features("foo")
|
||||
self.builder_for_features("bar")
|
||||
has_both_early = self.builder_for_features("foo", "bar", "baz")
|
||||
has_both_late = self.builder_for_features("foo", "bar", "quux")
|
||||
lacks_one = self.builder_for_features("bar")
|
||||
has_the_other = self.builder_for_features("foo")
|
||||
self.builder_for_features("bar")
|
||||
self.builder_for_features("foo")
|
||||
|
||||
# There are two builders featuring 'foo' and 'bar', but
|
||||
# the one that also features 'quux' was registered later.
|
||||
@@ -135,6 +134,6 @@ class TestRegistry(object):
|
||||
assert self.registry.lookup("foo", "bar", "baz") == has_both_early
|
||||
|
||||
def test_lookup_fails_when_cannot_reconcile_requested_features(self):
|
||||
builder1 = self.builder_for_features("foo", "bar")
|
||||
builder2 = self.builder_for_features("foo", "baz")
|
||||
self.builder_for_features("foo", "bar")
|
||||
self.builder_for_features("foo", "baz")
|
||||
assert self.registry.lookup("bar", "baz") is None
|
||||
|
@@ -140,10 +140,9 @@ class TestCSSSelectors(SoupTest):
|
||||
|
||||
def test_select_one_returns_none_if_no_match(self):
|
||||
match = self._soup.select_one("nonexistenttag")
|
||||
assert None == match
|
||||
assert None is match
|
||||
|
||||
def test_tag_in_tag_one(self):
|
||||
els = self._soup.select("div div")
|
||||
self.assert_css_selects("div div", ["inner", "data1"])
|
||||
|
||||
def test_tag_in_tag_many(self):
|
||||
@@ -508,8 +507,8 @@ class TestCSSSelectors(SoupTest):
|
||||
def test_match(self):
|
||||
inner = self._soup.find("div", id="inner")
|
||||
main = self._soup.find("div", id="main")
|
||||
assert inner.css.match("div[id=main]") == False
|
||||
assert main.css.match("div[id=main]") == True
|
||||
assert inner.css.match("div[id=main]") is False
|
||||
assert main.css.match("div[id=main]") is True
|
||||
|
||||
def test_iselect(self):
|
||||
gen = self._soup.css.iselect("h2")
|
||||
|
@@ -79,7 +79,7 @@ class TestUnicodeDammit(object):
|
||||
|
||||
# And if we exclude that, there is no valid guess at all.
|
||||
dammit = UnicodeDammit(utf8_data, exclude_encodings=["utf-8", "windows-1252"])
|
||||
assert dammit.original_encoding == None
|
||||
assert dammit.original_encoding is None
|
||||
|
||||
|
||||
class TestEncodingDetector(object):
|
||||
@@ -128,7 +128,7 @@ class TestEncodingDetector(object):
|
||||
|
||||
bs4.dammit._chardet_dammit = noop
|
||||
dammit = UnicodeDammit(doc)
|
||||
assert True == dammit.contains_replacement_characters
|
||||
assert True is dammit.contains_replacement_characters
|
||||
assert "\ufffd" in dammit.unicode_markup
|
||||
|
||||
soup = BeautifulSoup(doc, "html.parser")
|
||||
@@ -191,7 +191,6 @@ class TestEncodingDetector(object):
|
||||
message = warning.message
|
||||
assert isinstance(message, DeprecationWarning)
|
||||
assert warning.filename == __file__
|
||||
msg = str(message)
|
||||
assert "iso-8859-8" == dammit.original_encoding
|
||||
|
||||
# known_definite_encodings and override_encodings were tried
|
||||
|
@@ -97,10 +97,10 @@ class TestAttributeDicts:
|
||||
# This preserves Beautiful Soup's old behavior in the absence of
|
||||
# guidance from the spec.
|
||||
d["v"] = False
|
||||
assert d["v"] == False
|
||||
assert d["v"] is False
|
||||
|
||||
d["v"] = True
|
||||
assert d["v"] == True
|
||||
assert d["v"] is True
|
||||
|
||||
d["v"] = None
|
||||
assert d["v"] == ""
|
||||
|
@@ -30,14 +30,14 @@ class TestElementFilter(SoupTest):
|
||||
soup = self.soup("<a>text</a>")
|
||||
tag = soup.a
|
||||
string = tag.string
|
||||
assert True == selector.match(soup)
|
||||
assert True == selector.match(tag)
|
||||
assert True == selector.match(string)
|
||||
assert True is selector.match(soup)
|
||||
assert True is selector.match(tag)
|
||||
assert True is selector.match(string)
|
||||
assert soup.find(selector).name == "a"
|
||||
|
||||
# And allows any incoming markup to be turned into PageElements.
|
||||
assert True == selector.allow_tag_creation(None, "tag", None)
|
||||
assert True == selector.allow_string_creation("some string")
|
||||
assert True is selector.allow_tag_creation(None, "tag", None)
|
||||
assert True is selector.allow_string_creation("some string")
|
||||
|
||||
def test_match(self):
|
||||
def m(pe):
|
||||
@@ -50,10 +50,10 @@ class TestElementFilter(SoupTest):
|
||||
deny_string = soup.find(string="deny")
|
||||
|
||||
selector = ElementFilter(match_function=m)
|
||||
assert True == selector.match(allow_tag)
|
||||
assert True == selector.match(allow_string)
|
||||
assert False == selector.match(deny_tag)
|
||||
assert False == selector.match(deny_string)
|
||||
assert True is selector.match(allow_tag)
|
||||
assert True is selector.match(allow_string)
|
||||
assert False is selector.match(deny_tag)
|
||||
assert False is selector.match(deny_string)
|
||||
|
||||
# Since only the match function was provided, there is
|
||||
# no effect on tag or string creation.
|
||||
@@ -64,10 +64,10 @@ class TestElementFilter(SoupTest):
|
||||
# By default, ElementFilter.allow_tag_creation allows everything.
|
||||
filter = ElementFilter()
|
||||
f = filter.allow_tag_creation
|
||||
assert True == f("allow", "ignore", {})
|
||||
assert True == f("ignore", "allow", {})
|
||||
assert True == f(None, "ignore", {"allow": "1"})
|
||||
assert True == f("no", "no", {"no": "nope"})
|
||||
assert True is f("allow", "ignore", {})
|
||||
assert True is f("ignore", "allow", {})
|
||||
assert True is f(None, "ignore", {"allow": "1"})
|
||||
assert True is f("no", "no", {"no": "nope"})
|
||||
|
||||
# You can customize this behavior by overriding
|
||||
# allow_tag_creation in a subclass.
|
||||
@@ -86,10 +86,10 @@ class TestElementFilter(SoupTest):
|
||||
|
||||
filter = MyFilter()
|
||||
f = filter.allow_tag_creation
|
||||
assert True == f("allow", "ignore", {})
|
||||
assert True == f("ignore", "allow", {})
|
||||
assert True == f(None, "ignore", {"allow": "1"})
|
||||
assert False == f("no", "no", {"no": "nope"})
|
||||
assert True is f("allow", "ignore", {})
|
||||
assert True is f("ignore", "allow", {})
|
||||
assert True is f(None, "ignore", {"allow": "1"})
|
||||
assert False is f("no", "no", {"no": "nope"})
|
||||
|
||||
# Test the customized ElementFilter as a value for parse_only.
|
||||
soup = self.soup(
|
||||
@@ -109,9 +109,9 @@ class TestElementFilter(SoupTest):
|
||||
# By default, ElementFilter.allow_string_creation allows everything.
|
||||
filter = ElementFilter()
|
||||
f = filter.allow_string_creation
|
||||
assert True == f("allow")
|
||||
assert True == f("deny")
|
||||
assert True == f("please allow")
|
||||
assert True is f("allow")
|
||||
assert True is f("deny")
|
||||
assert True is f("please allow")
|
||||
|
||||
# You can customize this behavior by overriding allow_string_creation
|
||||
# in a subclass.
|
||||
@@ -121,9 +121,9 @@ class TestElementFilter(SoupTest):
|
||||
|
||||
filter = MyFilter()
|
||||
f = filter.allow_string_creation
|
||||
assert True == f("allow")
|
||||
assert False == f("deny")
|
||||
assert False == f("please allow")
|
||||
assert True is f("allow")
|
||||
assert False is f("deny")
|
||||
assert False is f("please allow")
|
||||
|
||||
# Test the customized ElementFilter as a value for parse_only.
|
||||
soup = self.soup(
|
||||
@@ -285,7 +285,7 @@ class TestSoupStrainer(SoupTest):
|
||||
def test_search_tag_deprecated(self):
|
||||
strainer = SoupStrainer(name="a")
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
assert False == strainer.search_tag("b", {})
|
||||
assert False is strainer.search_tag("b", {})
|
||||
[w1] = w
|
||||
msg = str(w1.message)
|
||||
assert w1.filename == __file__
|
||||
@@ -299,7 +299,7 @@ class TestSoupStrainer(SoupTest):
|
||||
soup = self.soup("<a></a><b></b>")
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
assert soup.a == strainer.search(soup.a)
|
||||
assert None == strainer.search(soup.b)
|
||||
assert None is strainer.search(soup.b)
|
||||
[w1, w2] = w
|
||||
msg = str(w1.message)
|
||||
assert msg == str(w2.message)
|
||||
@@ -609,7 +609,7 @@ class TestSoupStrainer(SoupTest):
|
||||
assert True, soupstrainer.excludes_everything
|
||||
assert "" == self.soup(markup, parse_only=soupstrainer).decode()
|
||||
[warning] = w
|
||||
msg = str(warning.message)
|
||||
str(warning.message)
|
||||
assert warning.filename == __file__
|
||||
assert str(warning.message).startswith(
|
||||
"The given value for parse_only will exclude everything:"
|
||||
|
@@ -55,17 +55,17 @@ class TestFormatter(SoupTest):
|
||||
|
||||
for name in ("html", "minimal", None):
|
||||
formatter = HTMLFormatter.REGISTRY[name]
|
||||
assert False == formatter.empty_attributes_are_booleans
|
||||
assert False is formatter.empty_attributes_are_booleans
|
||||
|
||||
formatter = XMLFormatter.REGISTRY[None]
|
||||
assert False == formatter.empty_attributes_are_booleans
|
||||
assert False is formatter.empty_attributes_are_booleans
|
||||
|
||||
formatter = HTMLFormatter.REGISTRY["html5"]
|
||||
assert True == formatter.empty_attributes_are_booleans
|
||||
assert True is formatter.empty_attributes_are_booleans
|
||||
|
||||
# Verify that the constructor sets the value.
|
||||
formatter = Formatter(empty_attributes_are_booleans=True)
|
||||
assert True == formatter.empty_attributes_are_booleans
|
||||
assert True is formatter.empty_attributes_are_booleans
|
||||
|
||||
# Now demonstrate what it does to markup.
|
||||
for markup in ("<option selected></option>", '<option selected=""></option>'):
|
||||
|
@@ -195,8 +195,8 @@ class TestHTML5LibBuilder(HTML5TreeBuilderSmokeTest):
|
||||
|
||||
# You can deactivate this behavior.
|
||||
soup = self.soup(markup, store_line_numbers=False)
|
||||
assert None == soup.p.sourceline
|
||||
assert None == soup.p.sourcepos
|
||||
assert None is soup.p.sourceline
|
||||
assert None is soup.p.sourcepos
|
||||
|
||||
def test_special_string_containers(self):
|
||||
# The html5lib tree builder doesn't support this standard feature,
|
||||
|
@@ -35,7 +35,7 @@ class TestHTMLParserTreeBuilder(HTMLTreeBuilderSmokeTest):
|
||||
]
|
||||
for markup in bad_markup:
|
||||
with pytest.raises(ParserRejectedMarkup):
|
||||
soup = self.soup(markup)
|
||||
self.soup(markup)
|
||||
|
||||
def test_namespaced_system_doctype(self):
|
||||
# html.parser can't handle namespaced doctypes, so skip this one.
|
||||
@@ -74,8 +74,8 @@ class TestHTMLParserTreeBuilder(HTMLTreeBuilderSmokeTest):
|
||||
|
||||
# You can deactivate this behavior.
|
||||
soup = self.soup(markup, store_line_numbers=False)
|
||||
assert None == soup.p.sourceline
|
||||
assert None == soup.p.sourcepos
|
||||
assert None is soup.p.sourceline
|
||||
assert None is soup.p.sourcepos
|
||||
|
||||
def test_on_duplicate_attribute(self):
|
||||
# The html.parser tree builder has a variety of ways of
|
||||
|
@@ -74,8 +74,8 @@ class TestLXMLTreeBuilder(HTMLTreeBuilderSmokeTest):
|
||||
"\n <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>",
|
||||
store_line_numbers=True,
|
||||
)
|
||||
assert None == soup.p.sourceline
|
||||
assert None == soup.p.sourcepos
|
||||
assert None is soup.p.sourceline
|
||||
assert None is soup.p.sourcepos
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
|
@@ -53,7 +53,7 @@ class TestNavigableString(SoupTest):
|
||||
def test_string_has_immutable_name_property(self):
|
||||
# string.name is defined as None and can't be modified
|
||||
string = self.soup("s").string
|
||||
assert None == string.name
|
||||
assert None is string.name
|
||||
with pytest.raises(AttributeError):
|
||||
string.name = "foo"
|
||||
|
||||
|
@@ -313,8 +313,8 @@ class TestPersistence(SoupTest):
|
||||
|
||||
soup = self.soup(markup)
|
||||
|
||||
copied = copy.copy(soup)
|
||||
copied = copy.deepcopy(soup)
|
||||
copy.copy(soup)
|
||||
copy.deepcopy(soup)
|
||||
|
||||
def test_copy_preserves_encoding(self):
|
||||
soup = BeautifulSoup(b"<p> </p>", "html.parser")
|
||||
@@ -355,11 +355,11 @@ class TestPersistence(SoupTest):
|
||||
s1 = soup.find(string="Foo")
|
||||
s2 = copy.copy(s1)
|
||||
assert s1 == s2
|
||||
assert None == s2.parent
|
||||
assert None == s2.next_element
|
||||
assert None != s1.next_sibling
|
||||
assert None == s2.next_sibling
|
||||
assert None == s2.previous_element
|
||||
assert None is s2.parent
|
||||
assert None is s2.next_element
|
||||
assert None is not s1.next_sibling
|
||||
assert None is s2.next_sibling
|
||||
assert None is s2.previous_element
|
||||
|
||||
def test_copy_navigablestring_subclass_has_same_type(self):
|
||||
html = "<b><!--Foo--></b>"
|
||||
@@ -390,10 +390,10 @@ class TestPersistence(SoupTest):
|
||||
|
||||
# And they don't have the same relation to the parse tree. The
|
||||
# copy is not associated with a parse tree at all.
|
||||
assert None == div_copy.parent
|
||||
assert None == div_copy.previous_element
|
||||
assert None == div_copy.find(string="Bar").next_element
|
||||
assert None != div.find(string="Bar").next_element
|
||||
assert None is div_copy.parent
|
||||
assert None is div_copy.previous_element
|
||||
assert None is div_copy.find(string="Bar").next_element
|
||||
assert None is not div.find(string="Bar").next_element
|
||||
|
||||
# Modifying one of the tag's multi-valued attributes
|
||||
# doesn't modify the other.
|
||||
|
@@ -163,7 +163,7 @@ class TestConstructor(SoupTest):
|
||||
# Here are two ways of saying that `id` is a multi-valued
|
||||
# attribute in this context, but 'class' is not.
|
||||
for switcheroo in ({"*": "id"}, {"a": "id"}):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
with warnings.catch_warnings(record=True):
|
||||
# This will create a warning about not explicitly
|
||||
# specifying a parser, but we'll ignore it.
|
||||
soup = self.soup(
|
||||
@@ -313,23 +313,23 @@ class TestWarnings(SoupTest):
|
||||
|
||||
def test_warning_if_no_parser_specified(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = BeautifulSoup("<a><b></b></a>")
|
||||
BeautifulSoup("<a><b></b></a>")
|
||||
self._assert_no_parser_specified(w)
|
||||
|
||||
def test_warning_if_parser_specified_too_vague(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = BeautifulSoup("<a><b></b></a>", "html")
|
||||
BeautifulSoup("<a><b></b></a>", "html")
|
||||
self._assert_no_parser_specified(w)
|
||||
|
||||
def test_no_warning_if_explicit_parser_specified(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("<a><b></b></a>")
|
||||
self.soup("<a><b></b></a>")
|
||||
assert [] == w
|
||||
|
||||
def test_warning_if_strainer_filters_everything(self):
|
||||
strainer = SoupStrainer(name="a", string="b")
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("<a><b></b></a>", parse_only=strainer)
|
||||
self.soup("<a><b></b></a>", parse_only=strainer)
|
||||
warning = self._assert_warning(w, UserWarning)
|
||||
msg = str(warning.message)
|
||||
assert msg.startswith("The given value for parse_only will exclude everything:")
|
||||
@@ -378,7 +378,7 @@ class TestWarnings(SoupTest):
|
||||
# A warning is issued if the "markup" looks like the name of
|
||||
# an HTML or text file, or a full path to a file on disk.
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = BeautifulSoup(markup, "html.parser")
|
||||
BeautifulSoup(markup, "html.parser")
|
||||
warning = self._assert_warning(w, MarkupResemblesLocatorWarning)
|
||||
assert "looks more like a filename" in str(warning.message)
|
||||
|
||||
@@ -423,13 +423,13 @@ class TestWarnings(SoupTest):
|
||||
# the markup looks like a bare string, a domain name, or a
|
||||
# file that's not an HTML file.
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup(markup)
|
||||
self.soup(markup)
|
||||
assert [] == w
|
||||
|
||||
def test_url_warning_with_bytes_url(self):
|
||||
url = b"http://www.crummybytes.com/"
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
soup = BeautifulSoup(url, "html.parser")
|
||||
BeautifulSoup(url, "html.parser")
|
||||
warning = self._assert_warning(warning_list, MarkupResemblesLocatorWarning)
|
||||
assert "looks more like a URL" in str(warning.message)
|
||||
assert url not in str(warning.message).encode("utf8")
|
||||
@@ -439,7 +439,7 @@ class TestWarnings(SoupTest):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
# note - this url must differ from the bytes one otherwise
|
||||
# python's warnings system swallows the second warning
|
||||
soup = BeautifulSoup(url, "html.parser")
|
||||
BeautifulSoup(url, "html.parser")
|
||||
warning = self._assert_warning(warning_list, MarkupResemblesLocatorWarning)
|
||||
assert "looks more like a URL" in str(warning.message)
|
||||
assert url not in str(warning.message)
|
||||
@@ -448,12 +448,12 @@ class TestWarnings(SoupTest):
|
||||
# Here the markup contains something besides a URL, so no warning
|
||||
# is issued.
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
soup = self.soup(b"http://www.crummybytes.com/ is great")
|
||||
self.soup(b"http://www.crummybytes.com/ is great")
|
||||
assert not any("looks more like a URL" in str(w.message) for w in warning_list)
|
||||
|
||||
def test_url_warning_with_unicode_and_space(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
soup = self.soup("http://www.crummyunicode.com/ is great")
|
||||
self.soup("http://www.crummyunicode.com/ is great")
|
||||
assert not any("looks more like a URL" in str(w.message) for w in warning_list)
|
||||
|
||||
|
||||
@@ -475,13 +475,13 @@ class TestNewTag(SoupTest):
|
||||
assert "foo" == new_tag.name
|
||||
assert new_tag.string == "txt"
|
||||
assert dict(bar="baz", name="a name") == new_tag.attrs
|
||||
assert None == new_tag.parent
|
||||
assert None is new_tag.parent
|
||||
|
||||
# string can be null
|
||||
new_tag = soup.new_tag("foo")
|
||||
assert None == new_tag.string
|
||||
assert None is new_tag.string
|
||||
new_tag = soup.new_tag("foo", string=None)
|
||||
assert None == new_tag.string
|
||||
assert None is new_tag.string
|
||||
|
||||
# Or the empty string
|
||||
new_tag = soup.new_tag("foo", string="")
|
||||
@@ -587,7 +587,7 @@ class TestEncodingConversion(SoupTest):
|
||||
soup_from_unicode = self.soup(self.unicode_data)
|
||||
assert soup_from_unicode.decode() == self.unicode_data
|
||||
assert soup_from_unicode.foo.string == "Sacr\xe9 bleu!"
|
||||
assert soup_from_unicode.original_encoding == None
|
||||
assert soup_from_unicode.original_encoding is None
|
||||
|
||||
def test_utf8_in_unicode_out(self):
|
||||
# UTF-8 input is converted to Unicode. The original_encoding
|
||||
|
@@ -17,18 +17,18 @@ class TestTag(SoupTest):
|
||||
|
||||
# No list of whitespace-preserving tags -> pretty-print
|
||||
tag._preserve_whitespace_tags = None
|
||||
assert True == tag._should_pretty_print(0)
|
||||
assert True is tag._should_pretty_print(0)
|
||||
|
||||
# List exists but tag is not on the list -> pretty-print
|
||||
tag.preserve_whitespace_tags = ["some_other_tag"]
|
||||
assert True == tag._should_pretty_print(1)
|
||||
assert True is tag._should_pretty_print(1)
|
||||
|
||||
# Indent level is None -> don't pretty-print
|
||||
assert False == tag._should_pretty_print(None)
|
||||
assert False is tag._should_pretty_print(None)
|
||||
|
||||
# Tag is on the whitespace-preserving list -> don't pretty-print
|
||||
tag.preserve_whitespace_tags = ["some_other_tag", "a_tag"]
|
||||
assert False == tag._should_pretty_print(1)
|
||||
assert False is tag._should_pretty_print(1)
|
||||
|
||||
def test_len(self):
|
||||
"""The length of a Tag is its number of children."""
|
||||
@@ -49,7 +49,7 @@ class TestTag(SoupTest):
|
||||
soup = self.soup("<b><i></i></b>")
|
||||
assert soup.b == soup.find("b")
|
||||
assert soup.b.i == soup.find("b").find("i")
|
||||
assert soup.a == None
|
||||
assert soup.a is None
|
||||
|
||||
def test_deprecated_member_access(self):
|
||||
soup = self.soup("<b><i></i></b>")
|
||||
@@ -85,21 +85,21 @@ class TestTag(SoupTest):
|
||||
def test_empty_tag_has_no_string(self):
|
||||
# A Tag with no children has no .stirng.
|
||||
soup = self.soup("<b></b>")
|
||||
assert soup.b.string == None
|
||||
assert soup.b.string is None
|
||||
|
||||
def test_tag_with_multiple_children_has_no_string(self):
|
||||
# A Tag with no children has no .string.
|
||||
soup = self.soup("<a>foo<b></b><b></b></b>")
|
||||
assert soup.b.string == None
|
||||
assert soup.b.string is None
|
||||
|
||||
soup = self.soup("<a>foo<b></b>bar</b>")
|
||||
assert soup.b.string == None
|
||||
assert soup.b.string is None
|
||||
|
||||
# Even if all the children are strings, due to trickery,
|
||||
# it won't work--but this would be a good optimization.
|
||||
soup = self.soup("<a>foo</b>")
|
||||
soup.a.insert(1, "bar")
|
||||
assert soup.a.string == None
|
||||
assert soup.a.string is None
|
||||
|
||||
def test_tag_with_recursive_string_has_string(self):
|
||||
# A Tag with a single child which has a .string inherits that
|
||||
|
@@ -406,7 +406,7 @@ class TestSmooth(SoupTest):
|
||||
# output.
|
||||
|
||||
# Since the <span> tag has two children, its .string is None.
|
||||
assert None == div.span.string
|
||||
assert None is div.span.string
|
||||
|
||||
assert 7 == len(div.contents)
|
||||
div.smooth()
|
||||
@@ -470,7 +470,7 @@ class TestParentOperations(SoupTest):
|
||||
assert top_tag.parent == self.tree
|
||||
|
||||
def test_soup_object_has_no_parent(self):
|
||||
assert None == self.tree.parent
|
||||
assert None is self.tree.parent
|
||||
|
||||
def test_find_parents(self):
|
||||
self.assert_selects_ids(
|
||||
@@ -486,7 +486,7 @@ class TestParentOperations(SoupTest):
|
||||
# assert self.start.find_parent('ul')['id'] == 'bottom'
|
||||
assert self.start.find_parent("ul", id="top")["id"] == "top"
|
||||
|
||||
assert self.start.find_parent(id="start") == None
|
||||
assert self.start.find_parent(id="start") is None
|
||||
assert self.start.find_parent(id="start", consider_self=True)["id"] == "start"
|
||||
|
||||
def test_parent_of_text_element(self):
|
||||
@@ -532,11 +532,11 @@ class TestNextOperations(ProximityTest):
|
||||
|
||||
def test_next_of_last_item_is_none(self):
|
||||
last = self.tree.find(string="Three")
|
||||
assert last.next_element == None
|
||||
assert last.next_element is None
|
||||
|
||||
def test_next_of_root_is_none(self):
|
||||
# The document root is outside the next/previous chain.
|
||||
assert self.tree.next_element == None
|
||||
assert self.tree.next_element is None
|
||||
|
||||
def test_find_all_next(self):
|
||||
self.assert_selects(self.start.find_all_next("b"), ["Two", "Three"])
|
||||
@@ -572,11 +572,11 @@ class TestPreviousOperations(ProximityTest):
|
||||
|
||||
def test_previous_of_first_item_is_none(self):
|
||||
first = self.tree.find("html")
|
||||
assert first.previous_element == None
|
||||
assert first.previous_element is None
|
||||
|
||||
def test_previous_of_root_is_none(self):
|
||||
# The document root is outside the next/previous chain.
|
||||
assert self.tree.previous_element == None
|
||||
assert self.tree.previous_element is None
|
||||
|
||||
def test_find_all_previous(self):
|
||||
# The <b> tag containing the "Three" node is the predecessor
|
||||
@@ -633,7 +633,7 @@ class TestNextSibling(SiblingTest):
|
||||
self.start = self.tree.find(id="1")
|
||||
|
||||
def test_next_sibling_of_root_is_none(self):
|
||||
assert self.tree.next_sibling == None
|
||||
assert self.tree.next_sibling is None
|
||||
|
||||
def test_next_sibling(self):
|
||||
assert self.start.next_sibling["id"] == "2"
|
||||
@@ -643,13 +643,13 @@ class TestNextSibling(SiblingTest):
|
||||
assert self.start.next_element["id"] == "1.1"
|
||||
|
||||
def test_next_sibling_may_not_exist(self):
|
||||
assert self.tree.html.next_sibling == None
|
||||
assert self.tree.html.next_sibling is None
|
||||
|
||||
nested_span = self.tree.find(id="1.1")
|
||||
assert nested_span.next_sibling == None
|
||||
assert nested_span.next_sibling is None
|
||||
|
||||
last_span = self.tree.find(id="4")
|
||||
assert last_span.next_sibling == None
|
||||
assert last_span.next_sibling is None
|
||||
|
||||
def test_find_next_sibling(self):
|
||||
assert self.start.find_next_sibling("span")["id"] == "2"
|
||||
@@ -667,7 +667,7 @@ class TestNextSibling(SiblingTest):
|
||||
|
||||
self.assert_selects(start.find_next_siblings("b"), ["bar"])
|
||||
assert start.find_next_sibling(string="baz") == "baz"
|
||||
assert start.find_next_sibling(string="nonesuch") == None
|
||||
assert start.find_next_sibling(string="nonesuch") is None
|
||||
|
||||
|
||||
class TestPreviousSibling(SiblingTest):
|
||||
@@ -676,7 +676,7 @@ class TestPreviousSibling(SiblingTest):
|
||||
self.end = self.tree.find(id="4")
|
||||
|
||||
def test_previous_sibling_of_root_is_none(self):
|
||||
assert self.tree.previous_sibling == None
|
||||
assert self.tree.previous_sibling is None
|
||||
|
||||
def test_previous_sibling(self):
|
||||
assert self.end.previous_sibling["id"] == "3"
|
||||
@@ -686,13 +686,13 @@ class TestPreviousSibling(SiblingTest):
|
||||
assert self.end.previous_element["id"] == "3.1"
|
||||
|
||||
def test_previous_sibling_may_not_exist(self):
|
||||
assert self.tree.html.previous_sibling == None
|
||||
assert self.tree.html.previous_sibling is None
|
||||
|
||||
nested_span = self.tree.find(id="1.1")
|
||||
assert nested_span.previous_sibling == None
|
||||
assert nested_span.previous_sibling is None
|
||||
|
||||
first_span = self.tree.find(id="1")
|
||||
assert first_span.previous_sibling == None
|
||||
assert first_span.previous_sibling is None
|
||||
|
||||
def test_find_previous_sibling(self):
|
||||
assert self.end.find_previous_sibling("span")["id"] == "3"
|
||||
@@ -712,7 +712,7 @@ class TestPreviousSibling(SiblingTest):
|
||||
|
||||
self.assert_selects(start.find_previous_siblings("b"), ["bar"])
|
||||
assert start.find_previous_sibling(string="Foo") == "Foo"
|
||||
assert start.find_previous_sibling(string="nonesuch") == None
|
||||
assert start.find_previous_sibling(string="nonesuch") is None
|
||||
|
||||
|
||||
class TestTreeModification(SoupTest):
|
||||
@@ -773,7 +773,7 @@ class TestTreeModification(SoupTest):
|
||||
soup = self.soup("<a><b>Foo</b></a><c>Bar</c>")
|
||||
a = soup.a
|
||||
a.extract()
|
||||
assert None == a.parent
|
||||
assert None is a.parent
|
||||
with pytest.raises(ValueError):
|
||||
a.unwrap()
|
||||
with pytest.raises(ValueError):
|
||||
@@ -821,7 +821,6 @@ class TestTreeModification(SoupTest):
|
||||
def test_replace_with_maintains_next_element_throughout(self):
|
||||
soup = self.soup("<p><a>one</a><b>three</b></p>")
|
||||
a = soup.a
|
||||
b = a.contents[0]
|
||||
# Make it so the <a> tag has two text children.
|
||||
a.insert(1, "two")
|
||||
|
||||
@@ -841,7 +840,7 @@ class TestTreeModification(SoupTest):
|
||||
assert new_text.previous_element == b
|
||||
assert new_text.parent == b
|
||||
assert new_text.previous_element.next_element == new_text
|
||||
assert new_text.next_element == None
|
||||
assert new_text.next_element is None
|
||||
|
||||
def test_consecutive_text_nodes(self):
|
||||
# A builder should never create two consecutive text nodes,
|
||||
@@ -859,7 +858,7 @@ class TestTreeModification(SoupTest):
|
||||
assert new_text.previous_sibling == "Argh!"
|
||||
assert new_text.previous_sibling.next_sibling == new_text
|
||||
|
||||
assert new_text.next_sibling == None
|
||||
assert new_text.next_sibling is None
|
||||
assert new_text.next_element == soup.c
|
||||
|
||||
def test_insert_string(self):
|
||||
@@ -1078,7 +1077,7 @@ class TestTreeModification(SoupTest):
|
||||
"<p>There's business like <b>no</b> business</p>"
|
||||
)
|
||||
|
||||
assert show.parent == None
|
||||
assert show.parent is None
|
||||
assert no.parent == soup.p
|
||||
assert no.next_element == "no"
|
||||
assert no.next_sibling == " business"
|
||||
@@ -1144,17 +1143,17 @@ class TestTreeModification(SoupTest):
|
||||
)
|
||||
|
||||
# The <b> tag is now an orphan.
|
||||
assert remove_tag.parent == None
|
||||
assert remove_tag.find(string="right").next_element == None
|
||||
assert remove_tag.previous_element == None
|
||||
assert remove_tag.next_sibling == None
|
||||
assert remove_tag.previous_sibling == None
|
||||
assert remove_tag.parent is None
|
||||
assert remove_tag.find(string="right").next_element is None
|
||||
assert remove_tag.previous_element is None
|
||||
assert remove_tag.next_sibling is None
|
||||
assert remove_tag.previous_sibling is None
|
||||
|
||||
# The <f> tag is now connected to the <a> tag.
|
||||
assert move_tag.parent == soup.a
|
||||
assert move_tag.previous_element == "We"
|
||||
assert move_tag.next_element.next_element == soup.e
|
||||
assert move_tag.next_sibling == None
|
||||
assert move_tag.next_sibling is None
|
||||
|
||||
# The gap where the <f> tag used to be has been mended, and
|
||||
# the word "to" is now connected to the <g> tag.
|
||||
@@ -1170,7 +1169,7 @@ class TestTreeModification(SoupTest):
|
||||
<p>Unneeded <em>formatting</em> is unneeded</p>
|
||||
""")
|
||||
tree.em.unwrap()
|
||||
assert tree.em == None
|
||||
assert tree.em is None
|
||||
assert tree.p.text == "Unneeded formatting is unneeded"
|
||||
|
||||
def test_wrap(self):
|
||||
@@ -1205,9 +1204,9 @@ class TestTreeModification(SoupTest):
|
||||
|
||||
# The extracted tag is now an orphan.
|
||||
assert len(soup.body.contents) == 2
|
||||
assert extracted.parent == None
|
||||
assert extracted.previous_element == None
|
||||
assert extracted.next_element.next_element == None
|
||||
assert extracted.parent is None
|
||||
assert extracted.previous_element is None
|
||||
assert extracted.next_element.next_element is None
|
||||
|
||||
# The gap where the extracted tag used to be has been mended.
|
||||
content_1 = soup.find(string="Some content. ")
|
||||
@@ -1220,7 +1219,6 @@ class TestTreeModification(SoupTest):
|
||||
def test_extract_distinguishes_between_identical_strings(self):
|
||||
soup = self.soup("<a>foo</a><b>bar</b>")
|
||||
foo_1 = soup.a.string
|
||||
bar_1 = soup.b.string
|
||||
foo_2 = soup.new_string("foo")
|
||||
bar_2 = soup.new_string("bar")
|
||||
soup.a.append(foo_2)
|
||||
@@ -1252,7 +1250,7 @@ class TestTreeModification(SoupTest):
|
||||
def test_extract_works_when_element_is_surrounded_by_identical_strings(self):
|
||||
soup = self.soup("<html>\n" "<body>hi</body>\n" "</html>")
|
||||
soup.find("body").extract()
|
||||
assert None == soup.find("body")
|
||||
assert None is soup.find("body")
|
||||
|
||||
def test_clear(self):
|
||||
"""Tag.clear()"""
|
||||
@@ -1312,22 +1310,22 @@ class TestTreeModification(SoupTest):
|
||||
a = p1.a
|
||||
text = p1.em.string
|
||||
for i in [p1, p2, a, text]:
|
||||
assert False == i.decomposed
|
||||
assert False is i.decomposed
|
||||
|
||||
# This sets p1 and everything beneath it to decomposed.
|
||||
p1.decompose()
|
||||
for i in [p1, a, text]:
|
||||
assert True == i.decomposed
|
||||
assert True is i.decomposed
|
||||
# p2 is unaffected.
|
||||
assert False == p2.decomposed
|
||||
assert False is p2.decomposed
|
||||
|
||||
def test_decompose_string(self):
|
||||
soup = self.soup("<div><p>String 1</p><p>String 2</p></p>")
|
||||
div = soup.div
|
||||
text = div.p.string
|
||||
assert False == text.decomposed
|
||||
assert False is text.decomposed
|
||||
text.decompose()
|
||||
assert True == text.decomposed
|
||||
assert True is text.decomposed
|
||||
assert "<div><p></p><p>String 2</p></div>" == div.decode()
|
||||
|
||||
def test_string_set(self):
|
||||
|
Reference in New Issue
Block a user