1
0
mirror of https://git.yoctoproject.org/poky synced 2026-05-30 12:29:55 +00:00

bitbake: wget.py: parse only <a> tags

For two reasons:
1) The important one: we hit the following bug when doing upstream version checks
on some webpages:
https://bugs.launchpad.net/beautifulsoup/+bug/1471755

2) Also, documentation for beautifulsoup states that memory usage and
speed is improved that way.

(Bitbake rev: 7546d4aeb3ba8fda9832081b84d93138dc5e58d6)

Signed-off-by: Alexander Kanavin <alexander.kanavin@linux.intel.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Alexander Kanavin
2015-12-04 13:00:20 +02:00
committed by Richard Purdie
parent 71ede7b689
commit 9d19dd9bd7
+3 -2
View File
@@ -38,6 +38,7 @@ from bb.fetch2 import FetchError
from bb.fetch2 import logger from bb.fetch2 import logger
from bb.fetch2 import runfetchcmd from bb.fetch2 import runfetchcmd
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from bs4 import SoupStrainer
class Wget(FetchMethod): class Wget(FetchMethod):
"""Class to fetch urls via 'wget'""" """Class to fetch urls via 'wget'"""
@@ -367,7 +368,7 @@ class Wget(FetchMethod):
version = ['', '', ''] version = ['', '', '']
bb.debug(3, "VersionURL: %s" % (url)) bb.debug(3, "VersionURL: %s" % (url))
soup = BeautifulSoup(self._fetch_index(url, ud, d)) soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
if not soup: if not soup:
bb.debug(3, "*** %s NO SOUP" % (url)) bb.debug(3, "*** %s NO SOUP" % (url))
return "" return ""
@@ -417,7 +418,7 @@ class Wget(FetchMethod):
ud.path.split(dirver)[0], ud.user, ud.pswd, {}]) ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package)) bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d)) soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
if not soup: if not soup:
return version[1] return version[1]