1
0
mirror of https://git.yoctoproject.org/poky synced 2026-06-03 13:49:49 +00:00

bitbake: bitbake/bs4: Upgrade 4.3.2 -> 4.4.1 (python 3 version)

Upgrade to 4.4.1 which has been run through 2to3 as per the maintainers
recommendation for v3 use.

(Bitbake rev: 2f4b98af93c971a8c466ffaf3c09cca0edb6e3ad)

Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Richard Purdie
2016-05-06 09:06:51 +01:00
parent 4f8959324d
commit 822eabf32d
15 changed files with 972 additions and 361 deletions
+31 -16
View File
@@ -4,10 +4,15 @@ __all__ = [
]
from io import BytesIO
from StringIO import StringIO
from io import StringIO
import collections
from lxml import etree
from bs4.element import Comment, Doctype, NamespacedAttribute
from bs4.element import (
Comment,
Doctype,
NamespacedAttribute,
ProcessingInstruction,
)
from bs4.builder import (
FAST,
HTML,
@@ -25,8 +30,11 @@ class LXMLTreeBuilderForXML(TreeBuilder):
is_xml = True
NAME = "lxml-xml"
ALTERNATE_NAMES = ["xml"]
# Well, it's permissive by XML parser standards.
features = [LXML, XML, FAST, PERMISSIVE]
features = [NAME, LXML, XML, FAST, PERMISSIVE]
CHUNK_SIZE = 512
@@ -70,6 +78,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
return (None, tag)
def prepare_markup(self, markup, user_specified_encoding=None,
exclude_encodings=None,
document_declared_encoding=None):
"""
:yield: A series of 4-tuples.
@@ -78,12 +87,12 @@ class LXMLTreeBuilderForXML(TreeBuilder):
Each 4-tuple represents a strategy for parsing the document.
"""
if isinstance(markup, unicode):
if isinstance(markup, str):
# We were given Unicode. Maybe lxml can parse Unicode on
# this system?
yield markup, None, document_declared_encoding, False
if isinstance(markup, unicode):
if isinstance(markup, str):
# No, apparently not. Convert the Unicode to UTF-8 and
# tell lxml to parse it as UTF-8.
yield (markup.encode("utf8"), "utf8",
@@ -95,14 +104,15 @@ class LXMLTreeBuilderForXML(TreeBuilder):
# the document as each one in turn.
is_html = not self.is_xml
try_encodings = [user_specified_encoding, document_declared_encoding]
detector = EncodingDetector(markup, try_encodings, is_html)
detector = EncodingDetector(
markup, try_encodings, is_html, exclude_encodings)
for encoding in detector.encodings:
yield (detector.markup, encoding, document_declared_encoding, False)
def feed(self, markup):
if isinstance(markup, bytes):
markup = BytesIO(markup)
elif isinstance(markup, unicode):
elif isinstance(markup, str):
markup = StringIO(markup)
# Call feed() at least once, even if the markup is empty,
@@ -117,7 +127,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
if len(data) != 0:
self.parser.feed(data)
self.parser.close()
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
raise ParserRejectedMarkup(str(e))
def close(self):
@@ -135,12 +145,12 @@ class LXMLTreeBuilderForXML(TreeBuilder):
self.nsmaps.append(None)
elif len(nsmap) > 0:
# A new namespace mapping has come into play.
inverted_nsmap = dict((value, key) for key, value in nsmap.items())
inverted_nsmap = dict((value, key) for key, value in list(nsmap.items()))
self.nsmaps.append(inverted_nsmap)
# Also treat the namespace mapping as a set of attributes on the
# tag, so we can recreate it later.
attrs = attrs.copy()
for prefix, namespace in nsmap.items():
for prefix, namespace in list(nsmap.items()):
attribute = NamespacedAttribute(
"xmlns", prefix, "http://www.w3.org/2000/xmlns/")
attrs[attribute] = namespace
@@ -149,7 +159,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
# from lxml with namespaces attached to their names, and
# turn then into NamespacedAttribute objects.
new_attrs = {}
for attr, value in attrs.items():
for attr, value in list(attrs.items()):
namespace, attr = self._getNsTag(attr)
if namespace is None:
new_attrs[attr] = value
@@ -189,7 +199,9 @@ class LXMLTreeBuilderForXML(TreeBuilder):
self.nsmaps.pop()
def pi(self, target, data):
pass
self.soup.endData()
self.soup.handle_data(target + ' ' + data)
self.soup.endData(ProcessingInstruction)
def data(self, content):
self.soup.handle_data(content)
@@ -207,12 +219,15 @@ class LXMLTreeBuilderForXML(TreeBuilder):
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
return u'<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
return '<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
features = [LXML, HTML, FAST, PERMISSIVE]
NAME = LXML
ALTERNATE_NAMES = ["lxml-html"]
features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE]
is_xml = False
def default_parser(self, encoding):
@@ -224,10 +239,10 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
self.parser = self.parser_for(encoding)
self.parser.feed(markup)
self.parser.close()
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
raise ParserRejectedMarkup(str(e))
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
return u'<html><body>%s</body></html>' % fragment
return '<html><body>%s</body></html>' % fragment