1
0
mirror of https://git.yoctoproject.org/poky synced 2026-05-30 00:20:08 +00:00

bitbake: bitbake/bs4: Upgrade 4.3.2 -> 4.4.1 (python 3 version)

Upgrade to 4.4.1 which has been run through 2to3 as per the maintainers
recommendation for v3 use.

(Bitbake rev: 2f4b98af93c971a8c466ffaf3c09cca0edb6e3ad)

Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Richard Purdie
2016-05-06 09:06:51 +01:00
parent 4f8959324d
commit 822eabf32d
15 changed files with 972 additions and 361 deletions
+30 -26
View File
@@ -4,10 +4,16 @@ __all__ = [
'HTMLParserTreeBuilder',
]
from HTMLParser import (
HTMLParser,
HTMLParseError,
)
from html.parser import HTMLParser
try:
from html.parser import HTMLParseError
except ImportError as e:
# HTMLParseError is removed in Python 3.5. Since it can never be
# thrown in 3.5, we can just define our own class as a placeholder.
class HTMLParseError(Exception):
pass
import sys
import warnings
@@ -19,10 +25,10 @@ import warnings
# At the end of this file, we monkeypatch HTMLParser so that
# strict=True works well on Python 3.2.2.
major, minor, release = sys.version_info[:3]
CONSTRUCTOR_TAKES_STRICT = (
major > 3
or (major == 3 and minor > 2)
or (major == 3 and minor == 2 and release >= 3))
CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3
CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
from bs4.element import (
CData,
@@ -63,7 +69,8 @@ class BeautifulSoupHTMLParser(HTMLParser):
def handle_charref(self, name):
# XXX workaround for a bug in HTMLParser. Remove this once
# it's fixed.
# it's fixed in all supported versions.
# http://bugs.python.org/issue13633
if name.startswith('x'):
real_name = int(name.lstrip('x'), 16)
elif name.startswith('X'):
@@ -72,9 +79,9 @@ class BeautifulSoupHTMLParser(HTMLParser):
real_name = int(name)
try:
data = unichr(real_name)
except (ValueError, OverflowError), e:
data = u"\N{REPLACEMENT CHARACTER}"
data = chr(real_name)
except (ValueError, OverflowError) as e:
data = "\N{REPLACEMENT CHARACTER}"
self.handle_data(data)
@@ -113,14 +120,6 @@ class BeautifulSoupHTMLParser(HTMLParser):
def handle_pi(self, data):
self.soup.endData()
if data.endswith("?") and data.lower().startswith("xml"):
# "An XHTML processing instruction using the trailing '?'
# will cause the '?' to be included in data." - HTMLParser
# docs.
#
# Strip the question mark so we don't end up with two
# question marks.
data = data[:-1]
self.soup.handle_data(data)
self.soup.endData(ProcessingInstruction)
@@ -128,26 +127,31 @@ class BeautifulSoupHTMLParser(HTMLParser):
class HTMLParserTreeBuilder(HTMLTreeBuilder):
is_xml = False
features = [HTML, STRICT, HTMLPARSER]
picklable = True
NAME = HTMLPARSER
features = [NAME, HTML, STRICT]
def __init__(self, *args, **kwargs):
if CONSTRUCTOR_TAKES_STRICT:
if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED:
kwargs['strict'] = False
if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
kwargs['convert_charrefs'] = False
self.parser_args = (args, kwargs)
def prepare_markup(self, markup, user_specified_encoding=None,
document_declared_encoding=None):
document_declared_encoding=None, exclude_encodings=None):
"""
:return: A 4-tuple (markup, original encoding, encoding
declared within markup, whether any characters had to be
replaced with REPLACEMENT CHARACTER).
"""
if isinstance(markup, unicode):
if isinstance(markup, str):
yield (markup, None, None, False)
return
try_encodings = [user_specified_encoding, document_declared_encoding]
dammit = UnicodeDammit(markup, try_encodings, is_html=True)
dammit = UnicodeDammit(markup, try_encodings, is_html=True,
exclude_encodings=exclude_encodings)
yield (dammit.markup, dammit.original_encoding,
dammit.declared_html_encoding,
dammit.contains_replacement_characters)
@@ -158,7 +162,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
parser.soup = self.soup
try:
parser.feed(markup)
except HTMLParseError, e:
except HTMLParseError as e:
warnings.warn(RuntimeWarning(
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
raise e