Files
TI-Status-Bot/.venv/Lib/site-packages/bs4/tests/test_css.py

537 lines
18 KiB
Python

import pytest
import types
from bs4 import (
BeautifulSoup,
ResultSet,
)
from typing import (
Any,
List,
Tuple,
Type,
)
from packaging.version import Version
from . import (
SoupTest,
SOUP_SIEVE_PRESENT,
)
SOUPSIEVE_EXCEPTION_ON_UNSUPPORTED_PSEUDOCLASS: Type[Exception]
if SOUP_SIEVE_PRESENT:
from soupsieve import __version__, SelectorSyntaxError
# Some behavior changes in soupsieve 2.6 that affects one of our
# tests. For the test to run under all versions of Python
# supported by Beautiful Soup (which includes versions of Python
# not supported by soupsieve 2.6) we need to check both behaviors.
SOUPSIEVE_EXCEPTION_ON_UNSUPPORTED_PSEUDOCLASS = SelectorSyntaxError
if Version(__version__) < Version("2.6"):
SOUPSIEVE_EXCEPTION_ON_UNSUPPORTED_PSEUDOCLASS = NotImplementedError
@pytest.mark.skipif(not SOUP_SIEVE_PRESENT, reason="Soup Sieve not installed")
class TestCSSSelectors(SoupTest):
"""Test basic CSS selector functionality.
This functionality is implemented in soupsieve, which has a much
more comprehensive test suite, so this is basically an extra check
that soupsieve works as expected.
"""
HTML = """
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>The title</title>
<link rel="stylesheet" href="blah.css" type="text/css" id="l1">
</head>
<body>
<custom-dashed-tag class="dashed" id="dash1">Hello there.</custom-dashed-tag>
<div id="main" class="fancy">
<div id="inner">
<h1 id="header1">An H1</h1>
<p>Some text</p>
<p class="onep" id="p1">Some more text</p>
<h2 id="header2">An H2</h2>
<p class="class1 class2 class3" id="pmulti">Another</p>
<a href="http://bob.example.org/" rel="friend met" id="bob">Bob</a>
<h2 id="header3">Another H2</h2>
<a id="me" href="http://simonwillison.net/" rel="me">me</a>
<span class="s1">
<a href="#" id="s1a1">span1a1</a>
<a href="#" id="s1a2">span1a2 <span id="s1a2s1">test</span></a>
<span class="span2">
<a href="#" id="s2a1">span2a1</a>
</span>
<span class="span3"></span>
<custom-dashed-tag class="dashed" id="dash2"/>
<div data-tag="dashedvalue" id="data1"/>
</span>
</div>
<x id="xid">
<z id="zida"/>
<z id="zidab"/>
<z id="zidac"/>
</x>
<y id="yid">
<z id="zidb"/>
</y>
<p lang="en" id="lang-en">English</p>
<p lang="en-gb" id="lang-en-gb">English UK</p>
<p lang="en-us" id="lang-en-us">English US</p>
<p lang="fr" id="lang-fr">French</p>
</div>
<div id="footer">
</div>
"""
def setup_method(self):
self._soup = BeautifulSoup(self.HTML, "html.parser")
def assert_css_selects(
self, selector: str, expected_ids: List[str], **kwargs: Any
) -> None:
results = self._soup.select(selector, **kwargs)
assert isinstance(results, ResultSet)
el_ids = [el["id"] for el in results]
el_ids.sort()
expected_ids.sort()
assert expected_ids == el_ids, "Selector %s, expected [%s], got [%s]" % (
selector,
", ".join(expected_ids),
", ".join(el_ids),
)
assertSelect = assert_css_selects
def assert_css_select_multiple(self, *tests: Tuple[str, List[str]]):
for selector, expected_ids in tests:
self.assert_css_selects(selector, expected_ids)
def test_precompiled(self):
sel = self._soup.css.compile("div")
els = self._soup.select(sel)
assert len(els) == 4
for div in els:
assert div.name == "div"
el = self._soup.select_one(sel)
assert "main" == el["id"]
def test_one_tag_one(self):
els = self._soup.select("title")
assert len(els) == 1
assert els[0].name == "title"
assert els[0].contents == ["The title"]
def test_one_tag_many(self):
els = self._soup.select("div")
assert len(els) == 4
for div in els:
assert div.name == "div"
el = self._soup.select_one("div")
assert "main" == el["id"]
def test_select_one_returns_none_if_no_match(self):
match = self._soup.select_one("nonexistenttag")
assert None is match
def test_tag_in_tag_one(self):
self.assert_css_selects("div div", ["inner", "data1"])
def test_tag_in_tag_many(self):
for selector in ("html div", "html body div", "body div"):
self.assert_css_selects(selector, ["data1", "main", "inner", "footer"])
def test_limit(self):
self.assert_css_selects("html div", ["main"], limit=1)
self.assert_css_selects("html body div", ["inner", "main"], limit=2)
self.assert_css_selects(
"body div", ["data1", "main", "inner", "footer"], limit=10
)
def test_tag_no_match(self):
assert len(self._soup.select("del")) == 0
def test_invalid_tag(self):
with pytest.raises(SelectorSyntaxError):
self._soup.select("tag%t")
def test_select_dashed_tag_ids(self):
self.assert_css_selects("custom-dashed-tag", ["dash1", "dash2"])
def test_select_dashed_by_id(self):
dashed = self._soup.select('custom-dashed-tag[id="dash2"]')
assert dashed[0].name == "custom-dashed-tag"
assert dashed[0]["id"] == "dash2"
def test_dashed_tag_text(self):
assert self._soup.select("body > custom-dashed-tag")[0].text == "Hello there."
def test_select_dashed_matches_find_all(self):
assert self._soup.select("custom-dashed-tag") == self._soup.find_all(
"custom-dashed-tag"
)
def test_header_tags(self):
self.assert_css_select_multiple(
("h1", ["header1"]),
("h2", ["header2", "header3"]),
)
def test_class_one(self):
for selector in (".onep", "p.onep", "html p.onep"):
els = self._soup.select(selector)
assert len(els) == 1
assert els[0].name == "p"
assert els[0]["class"] == ["onep"]
def test_class_mismatched_tag(self):
els = self._soup.select("div.onep")
assert len(els) == 0
def test_one_id(self):
for selector in ("div#inner", "#inner", "div div#inner"):
self.assert_css_selects(selector, ["inner"])
def test_bad_id(self):
els = self._soup.select("#doesnotexist")
assert len(els) == 0
def test_items_in_id(self):
els = self._soup.select("div#inner p")
assert len(els) == 3
for el in els:
assert el.name == "p"
assert els[1]["class"] == ["onep"]
assert not els[0].has_attr("class")
def test_a_bunch_of_emptys(self):
for selector in ("div#main del", "div#main div.oops", "div div#main"):
assert len(self._soup.select(selector)) == 0
def test_multi_class_support(self):
for selector in (
".class1",
"p.class1",
".class2",
"p.class2",
".class3",
"p.class3",
"html p.class2",
"div#inner .class2",
):
self.assert_css_selects(selector, ["pmulti"])
def test_multi_class_selection(self):
for selector in (".class1.class3", ".class3.class2", ".class1.class2.class3"):
self.assert_css_selects(selector, ["pmulti"])
def test_child_selector(self):
self.assert_css_selects(".s1 > a", ["s1a1", "s1a2"])
self.assert_css_selects(".s1 > a span", ["s1a2s1"])
def test_child_selector_id(self):
self.assert_css_selects(".s1 > a#s1a2 span", ["s1a2s1"])
def test_attribute_equals(self):
self.assert_css_select_multiple(
('p[class="onep"]', ["p1"]),
('p[id="p1"]', ["p1"]),
('[class="onep"]', ["p1"]),
('[id="p1"]', ["p1"]),
('link[rel="stylesheet"]', ["l1"]),
('link[type="text/css"]', ["l1"]),
('link[href="blah.css"]', ["l1"]),
('link[href="no-blah.css"]', []),
('[rel="stylesheet"]', ["l1"]),
('[type="text/css"]', ["l1"]),
('[href="blah.css"]', ["l1"]),
('[href="no-blah.css"]', []),
('p[href="no-blah.css"]', []),
('[href="no-blah.css"]', []),
)
def test_attribute_tilde(self):
self.assert_css_select_multiple(
('p[class~="class1"]', ["pmulti"]),
('p[class~="class2"]', ["pmulti"]),
('p[class~="class3"]', ["pmulti"]),
('[class~="class1"]', ["pmulti"]),
('[class~="class2"]', ["pmulti"]),
('[class~="class3"]', ["pmulti"]),
('a[rel~="friend"]', ["bob"]),
('a[rel~="met"]', ["bob"]),
('[rel~="friend"]', ["bob"]),
('[rel~="met"]', ["bob"]),
)
def test_attribute_startswith(self):
self.assert_css_select_multiple(
('[rel^="style"]', ["l1"]),
('link[rel^="style"]', ["l1"]),
('notlink[rel^="notstyle"]', []),
('[rel^="notstyle"]', []),
('link[rel^="notstyle"]', []),
('link[href^="bla"]', ["l1"]),
('a[href^="http://"]', ["bob", "me"]),
('[href^="http://"]', ["bob", "me"]),
('[id^="p"]', ["pmulti", "p1"]),
('[id^="m"]', ["me", "main"]),
('div[id^="m"]', ["main"]),
('a[id^="m"]', ["me"]),
('div[data-tag^="dashed"]', ["data1"]),
)
def test_attribute_endswith(self):
self.assert_css_select_multiple(
('[href$=".css"]', ["l1"]),
('link[href$=".css"]', ["l1"]),
('link[id$="1"]', ["l1"]),
(
'[id$="1"]',
["data1", "l1", "p1", "header1", "s1a1", "s2a1", "s1a2s1", "dash1"],
),
('div[id$="1"]', ["data1"]),
('[id$="noending"]', []),
)
def test_attribute_contains(self):
self.assert_css_select_multiple(
# From test_attribute_startswith
('[rel*="style"]', ["l1"]),
('link[rel*="style"]', ["l1"]),
('notlink[rel*="notstyle"]', []),
('[rel*="notstyle"]', []),
('link[rel*="notstyle"]', []),
('link[href*="bla"]', ["l1"]),
('[href*="http://"]', ["bob", "me"]),
('[id*="p"]', ["pmulti", "p1"]),
('div[id*="m"]', ["main"]),
('a[id*="m"]', ["me"]),
# From test_attribute_endswith
('[href*=".css"]', ["l1"]),
('link[href*=".css"]', ["l1"]),
('link[id*="1"]', ["l1"]),
(
'[id*="1"]',
[
"data1",
"l1",
"p1",
"header1",
"s1a1",
"s1a2",
"s2a1",
"s1a2s1",
"dash1",
],
),
('div[id*="1"]', ["data1"]),
('[id*="noending"]', []),
# New for this test
('[href*="."]', ["bob", "me", "l1"]),
('a[href*="."]', ["bob", "me"]),
('link[href*="."]', ["l1"]),
('div[id*="n"]', ["main", "inner"]),
('div[id*="nn"]', ["inner"]),
('div[data-tag*="edval"]', ["data1"]),
)
def test_attribute_exact_or_hypen(self):
self.assert_css_select_multiple(
('p[lang|="en"]', ["lang-en", "lang-en-gb", "lang-en-us"]),
('[lang|="en"]', ["lang-en", "lang-en-gb", "lang-en-us"]),
('p[lang|="fr"]', ["lang-fr"]),
('p[lang|="gb"]', []),
)
def test_attribute_exists(self):
self.assert_css_select_multiple(
("[rel]", ["l1", "bob", "me"]),
("link[rel]", ["l1"]),
("a[rel]", ["bob", "me"]),
("[lang]", ["lang-en", "lang-en-gb", "lang-en-us", "lang-fr"]),
("p[class]", ["p1", "pmulti"]),
("[blah]", []),
("p[blah]", []),
("div[data-tag]", ["data1"]),
)
def test_quoted_space_in_selector_name(self):
html = """<div style="display: wrong">nope</div>
<div style="display: right">yes</div>
"""
soup = BeautifulSoup(html, "html.parser")
[chosen] = soup.select('div[style="display: right"]')
assert "yes" == chosen.string
def test_unsupported_pseudoclass(self):
with pytest.raises(SOUPSIEVE_EXCEPTION_ON_UNSUPPORTED_PSEUDOCLASS):
self._soup.select("a:no-such-pseudoclass")
with pytest.raises(SelectorSyntaxError):
self._soup.select("a:nth-of-type(a)")
def test_nth_of_type(self):
# Try to select first paragraph
els = self._soup.select("div#inner p:nth-of-type(1)")
assert len(els) == 1
assert els[0].string == "Some text"
# Try to select third paragraph
els = self._soup.select("div#inner p:nth-of-type(3)")
assert len(els) == 1
assert els[0].string == "Another"
# Try to select (non-existent!) fourth paragraph
els = self._soup.select("div#inner p:nth-of-type(4)")
assert len(els) == 0
# Zero will select no tags.
els = self._soup.select("div p:nth-of-type(0)")
assert len(els) == 0
def test_nth_of_type_direct_descendant(self):
els = self._soup.select("div#inner > p:nth-of-type(1)")
assert len(els) == 1
assert els[0].string == "Some text"
def test_id_child_selector_nth_of_type(self):
self.assert_css_selects("#inner > p:nth-of-type(2)", ["p1"])
def test_select_on_element(self):
# Other tests operate on the tree; this operates on an element
# within the tree.
inner = self._soup.find("div", id="main")
selected = inner.select("div")
# The <div id="inner"> tag was selected. The <div id="footer">
# tag was not.
self.assert_selects_ids(selected, ["inner", "data1"])
def test_overspecified_child_id(self):
self.assert_css_selects(".fancy #inner", ["inner"])
self.assert_css_selects(".normal #inner", [])
def test_adjacent_sibling_selector(self):
self.assert_css_selects("#p1 + h2", ["header2"])
self.assert_css_selects("#p1 + h2 + p", ["pmulti"])
self.assert_css_selects("#p1 + #header2 + .class1", ["pmulti"])
assert [] == self._soup.select("#p1 + p")
def test_general_sibling_selector(self):
self.assert_css_selects("#p1 ~ h2", ["header2", "header3"])
self.assert_css_selects("#p1 ~ #header2", ["header2"])
self.assert_css_selects("#p1 ~ h2 + a", ["me"])
self.assert_css_selects('#p1 ~ h2 + [rel="me"]', ["me"])
assert [] == self._soup.select("#inner ~ h2")
def test_dangling_combinator(self):
with pytest.raises(SelectorSyntaxError):
self._soup.select("h1 >")
def test_sibling_combinator_wont_select_same_tag_twice(self):
self.assert_css_selects("p[lang] ~ p", ["lang-en-gb", "lang-en-us", "lang-fr"])
# Test the selector grouping operator (the comma)
def test_multiple_select(self):
self.assert_css_selects("x, y", ["xid", "yid"])
def test_multiple_select_with_no_space(self):
self.assert_css_selects("x,y", ["xid", "yid"])
def test_multiple_select_with_more_space(self):
self.assert_css_selects("x, y", ["xid", "yid"])
def test_multiple_select_duplicated(self):
self.assert_css_selects("x, x", ["xid"])
def test_multiple_select_sibling(self):
self.assert_css_selects("x, y ~ p[lang=fr]", ["xid", "lang-fr"])
def test_multiple_select_tag_and_direct_descendant(self):
self.assert_css_selects("x, y > z", ["xid", "zidb"])
def test_multiple_select_direct_descendant_and_tags(self):
self.assert_css_selects(
"div > x, y, z", ["xid", "yid", "zida", "zidb", "zidab", "zidac"]
)
def test_multiple_select_indirect_descendant(self):
self.assert_css_selects(
"div x,y, z", ["xid", "yid", "zida", "zidb", "zidab", "zidac"]
)
def test_invalid_multiple_select(self):
with pytest.raises(SelectorSyntaxError):
self._soup.select(",x, y")
with pytest.raises(SelectorSyntaxError):
self._soup.select("x,,y")
def test_multiple_select_attrs(self):
self.assert_css_selects("p[lang=en], p[lang=en-gb]", ["lang-en", "lang-en-gb"])
def test_multiple_select_ids(self):
self.assert_css_selects(
"x, y > z[id=zida], z[id=zidab], z[id=zidb]", ["xid", "zidb", "zidab"]
)
def test_multiple_select_nested(self):
self.assert_css_selects("body > div > x, y > z", ["xid", "zidb"])
def test_select_duplicate_elements(self):
# When markup contains duplicate elements, a multiple select
# will find all of them.
markup = '<div class="c1"/><div class="c2"/><div class="c1"/>'
soup = BeautifulSoup(markup, "html.parser")
selected = soup.select(".c1, .c2")
assert 3 == len(selected)
# Verify that find_all finds the same elements, though because
# of an implementation detail it finds them in a different
# order.
for element in soup.find_all(class_=["c1", "c2"]):
assert element in selected
def test_closest(self):
inner = self._soup.find("div", id="inner")
closest = inner.css.closest("div[id=main]")
assert closest == self._soup.find("div", id="main")
def test_match(self):
inner = self._soup.find("div", id="inner")
main = self._soup.find("div", id="main")
assert inner.css.match("div[id=main]") is False
assert main.css.match("div[id=main]") is True
def test_iselect(self):
gen = self._soup.css.iselect("h2")
assert isinstance(gen, types.GeneratorType)
[header2, header3] = gen
assert header2["id"] == "header2"
assert header3["id"] == "header3"
def test_filter(self):
inner = self._soup.find("div", id="inner")
results = inner.css.filter("h2")
assert len(inner.css.filter("h2")) == 2
results = inner.css.filter("h2[id=header3]")
assert isinstance(results, ResultSet)
[result] = results
assert result["id"] == "header3"
def test_escape(self):
m = self._soup.css.escape
assert m(".foo#bar") == "\\.foo\\#bar"
assert m("()[]{}") == "\\(\\)\\[\\]\\{\\}"
assert m(".foo") == self._soup.css.escape(".foo")