139 lines
4.3 KiB
Python
139 lines
4.3 KiB
Python
"""Tests of classes in element.py.
|
|
|
|
The really big classes -- Tag, PageElement, and NavigableString --
|
|
are tested in separate files.
|
|
"""
|
|
|
|
import pytest
|
|
from bs4.element import (
|
|
HTMLAttributeDict,
|
|
XMLAttributeDict,
|
|
CharsetMetaAttributeValue,
|
|
ContentMetaAttributeValue,
|
|
NamespacedAttribute,
|
|
ResultSet,
|
|
)
|
|
|
|
class TestNamedspacedAttribute:
|
|
def test_name_may_be_none_or_missing(self):
|
|
a = NamespacedAttribute("xmlns", None)
|
|
assert a == "xmlns"
|
|
|
|
a = NamespacedAttribute("xmlns", "")
|
|
assert a == "xmlns"
|
|
|
|
a = NamespacedAttribute("xmlns")
|
|
assert a == "xmlns"
|
|
|
|
def test_namespace_may_be_none_or_missing(self):
|
|
a = NamespacedAttribute(None, "tag")
|
|
assert a == "tag"
|
|
|
|
a = NamespacedAttribute("", "tag")
|
|
assert a == "tag"
|
|
|
|
def test_attribute_is_equivalent_to_colon_separated_string(self):
|
|
a = NamespacedAttribute("a", "b")
|
|
assert "a:b" == a
|
|
|
|
def test_attributes_are_equivalent_if_prefix_and_name_identical(self):
|
|
a = NamespacedAttribute("a", "b", "c")
|
|
b = NamespacedAttribute("a", "b", "c")
|
|
assert a == b
|
|
|
|
# The actual namespace is not considered.
|
|
c = NamespacedAttribute("a", "b", None)
|
|
assert a == c
|
|
|
|
# But name and prefix are important.
|
|
d = NamespacedAttribute("a", "z", "c")
|
|
assert a != d
|
|
|
|
e = NamespacedAttribute("z", "b", "c")
|
|
assert a != e
|
|
|
|
|
|
class TestAttributeValueWithCharsetSubstitution:
|
|
"""Certain attributes are designed to have the charset of the
|
|
final document substituted into their value.
|
|
"""
|
|
|
|
def test_charset_meta_attribute_value(self):
|
|
# The value of a CharsetMetaAttributeValue is whatever
|
|
# encoding the string is in.
|
|
value = CharsetMetaAttributeValue("euc-jp")
|
|
assert "euc-jp" == value
|
|
assert "euc-jp" == value.original_value
|
|
assert "utf8" == value.substitute_encoding("utf8")
|
|
assert "ascii" == value.substitute_encoding("ascii")
|
|
|
|
# If the target encoding is a Python internal encoding,
|
|
# no encoding will be mentioned in the output HTML.
|
|
assert "" == value.substitute_encoding("palmos")
|
|
|
|
def test_content_meta_attribute_value(self):
|
|
value = ContentMetaAttributeValue("text/html; charset=euc-jp")
|
|
assert "text/html; charset=euc-jp" == value
|
|
assert "text/html; charset=euc-jp" == value.original_value
|
|
assert "text/html; charset=utf8" == value.substitute_encoding("utf8")
|
|
assert "text/html; charset=ascii" == value.substitute_encoding("ascii")
|
|
|
|
# If the target encoding is a Python internal encoding, the
|
|
# charset argument will be omitted altogether.
|
|
assert "text/html" == value.substitute_encoding("palmos")
|
|
|
|
|
|
class TestAttributeDicts:
|
|
def test_xml_attribute_value_handling(self):
|
|
# Verify that attribute values are processed according to the
|
|
# XML spec's rules.
|
|
d = XMLAttributeDict()
|
|
d["v"] = 100
|
|
assert d["v"] == "100"
|
|
d["v"] = 100.123
|
|
assert d["v"] == "100.123"
|
|
|
|
# This preserves Beautiful Soup's old behavior in the absence of
|
|
# guidance from the spec.
|
|
d["v"] = False
|
|
assert d["v"] is False
|
|
|
|
d["v"] = True
|
|
assert d["v"] is True
|
|
|
|
d["v"] = None
|
|
assert d["v"] == ""
|
|
|
|
def test_html_attribute_value_handling(self):
|
|
# Verify that attribute values are processed according to the
|
|
# HTML spec's rules.
|
|
d = HTMLAttributeDict()
|
|
d["v"] = 100
|
|
assert d["v"] == "100"
|
|
d["v"] = 100.123
|
|
assert d["v"] == "100.123"
|
|
|
|
d["v"] = False
|
|
assert "v" not in d
|
|
|
|
d["v"] = None
|
|
assert "v" not in d
|
|
|
|
d["v"] = True
|
|
assert d["v"] == "v"
|
|
|
|
attribute = NamespacedAttribute("prefix", "name", "namespace")
|
|
d[attribute] = True
|
|
assert d[attribute] == "name"
|
|
|
|
|
|
class TestResultSet:
|
|
def test_getattr_exception(self):
|
|
rs = ResultSet(None)
|
|
with pytest.raises(AttributeError) as e:
|
|
rs.name
|
|
assert (
|
|
"""ResultSet object has no attribute "name". You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?"""
|
|
== str(e.value)
|
|
)
|