1
0
mirror of https://git.yoctoproject.org/poky synced 2026-06-01 13:09:50 +00:00

oe/license_finder: rewrite license checksum loading, scan more licenses

Rewrite the license checksum generation and loading of CSV files to be
clearer.

This also expands the scan of COMMON_LICENSE_DIR to include LICENSE_PATH,
which can be extended by layers to provide more license texts.

(From OE-Core rev: 417240ba7a9b3985530988940a222b079b503b64)

Signed-off-by: Ross Burton <ross.burton@arm.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Ross Burton
2025-06-13 14:16:18 +01:00
committed by Richard Purdie
parent dd8f320184
commit 06f91813cc
+29 -36
View File
@@ -11,24 +11,18 @@ import os
import re import re
import bb import bb
import bb.utils
logger = logging.getLogger("BitBake.OE.LicenseFinder") logger = logging.getLogger("BitBake.OE.LicenseFinder")
def get_license_md5sums(d): def _load_hash_csv(d):
import bb.utils """
Load a mapping of (checksum: license name) from all files/license-hashes.csv
files that can be found in the available layers.
"""
import csv import csv
md5sums = {} md5sums = {}
# Gather md5sums of license files in common license dir
commonlicdir = d.getVar('COMMON_LICENSE_DIR')
for fn in os.listdir(commonlicdir):
md5value = bb.utils.md5_file(os.path.join(commonlicdir, fn))
md5sums[md5value] = fn
# The following were extracted from common values in various recipes
# (double checking the license against the license file itself, not just
# the LICENSE value in the recipe)
# Read license md5sums from csv file # Read license md5sums from csv file
for path in d.getVar('BBPATH').split(':'): for path in d.getVar('BBPATH').split(':'):
csv_path = os.path.join(path, 'files', 'license-hashes.csv') csv_path = os.path.join(path, 'files', 'license-hashes.csv')
@@ -41,28 +35,28 @@ def get_license_md5sums(d):
return md5sums return md5sums
def crunch_known_licenses(d): def _crunch_known_licenses(d):
''' """
Calculate the MD5 checksums for the crunched versions of all common Calculate the MD5 checksums for the original and "crunched" versions of all
licenses. Also add additional known checksums. known licenses.
''' """
md5sums = {}
crunched_md5sums = {}
commonlicdir = d.getVar('COMMON_LICENSE_DIR') lic_dirs = [d.getVar('COMMON_LICENSE_DIR')] + (d.getVar('LICENSE_PATH') or "").split()
for fn in sorted(os.listdir(commonlicdir)): for lic_dir in lic_dirs:
md5value = crunch_license(os.path.join(commonlicdir, fn)) for fn in os.listdir(lic_dir):
if md5value not in crunched_md5sums: path = os.path.join(lic_dir, fn)
crunched_md5sums[md5value] = fn # Hash the exact contents
elif fn != crunched_md5sums[md5value]: md5value = bb.utils.md5_file(path)
bb.debug(2, "crunched_md5sums['%s'] is already set to '%s' rather than '%s'" % (md5value, crunched_md5sums[md5value], fn)) md5sums[md5value] = fn
else: # Also hash a "crunched" version
bb.debug(2, "crunched_md5sums['%s'] is already set to '%s'" % (md5value, crunched_md5sums[md5value])) md5value = _crunch_license(path)
md5sums[md5value] = fn
return crunched_md5sums return md5sums
def crunch_license(licfile): def _crunch_license(licfile):
''' '''
Remove non-material text from a license file and then calculate its Remove non-material text from a license file and then calculate its
md5sum. This works well for licenses that contain a copyright statement, md5sum. This works well for licenses that contain a copyright statement,
@@ -152,10 +146,9 @@ def find_license_files(srctree, first_only=False):
def match_licenses(licfiles, srctree, d): def match_licenses(licfiles, srctree, d):
import bb md5sums = {}
md5sums = get_license_md5sums(d) md5sums.update(_load_hash_csv(d))
md5sums.update(_crunch_known_licenses(d))
crunched_md5sums = crunch_known_licenses(d)
licenses = [] licenses = []
for licfile in sorted(licfiles): for licfile in sorted(licfiles):
@@ -163,8 +156,8 @@ def match_licenses(licfiles, srctree, d):
md5value = bb.utils.md5_file(resolved_licfile) md5value = bb.utils.md5_file(resolved_licfile)
license = md5sums.get(md5value, None) license = md5sums.get(md5value, None)
if not license: if not license:
crunched_md5 = crunch_license(resolved_licfile) crunched_md5 = _crunch_license(resolved_licfile)
license = crunched_md5sums.get(crunched_md5, None) license = md5sums.get(crunched_md5, None)
if not license: if not license:
license = 'Unknown' license = 'Unknown'
logger.info("Please add the following line for '%s' to a 'license-hashes.csv' " \ logger.info("Please add the following line for '%s' to a 'license-hashes.csv' " \