1
0
mirror of https://git.yoctoproject.org/poky synced 2026-05-07 16:59:22 +00:00

oe/license_finder: rewrite license checksum loading, scan more licenses

Rewrite the license checksum generation and loading of CSV files to be
clearer.

This also expands the scan of COMMON_LICENSE_DIR to include LICENSE_PATH,
which can be extended by layers to provide more license texts.

(From OE-Core rev: 417240ba7a9b3985530988940a222b079b503b64)

Signed-off-by: Ross Burton <ross.burton@arm.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Ross Burton
2025-06-13 14:16:18 +01:00
committed by Richard Purdie
parent dd8f320184
commit 06f91813cc
+29 -36
View File
@@ -11,24 +11,18 @@ import os
import re
import bb
import bb.utils
logger = logging.getLogger("BitBake.OE.LicenseFinder")
def get_license_md5sums(d):
import bb.utils
def _load_hash_csv(d):
"""
Load a mapping of (checksum: license name) from all files/license-hashes.csv
files that can be found in the available layers.
"""
import csv
md5sums = {}
# Gather md5sums of license files in common license dir
commonlicdir = d.getVar('COMMON_LICENSE_DIR')
for fn in os.listdir(commonlicdir):
md5value = bb.utils.md5_file(os.path.join(commonlicdir, fn))
md5sums[md5value] = fn
# The following were extracted from common values in various recipes
# (double checking the license against the license file itself, not just
# the LICENSE value in the recipe)
# Read license md5sums from csv file
for path in d.getVar('BBPATH').split(':'):
csv_path = os.path.join(path, 'files', 'license-hashes.csv')
@@ -41,28 +35,28 @@ def get_license_md5sums(d):
return md5sums
def crunch_known_licenses(d):
'''
Calculate the MD5 checksums for the crunched versions of all common
licenses. Also add additional known checksums.
'''
crunched_md5sums = {}
def _crunch_known_licenses(d):
"""
Calculate the MD5 checksums for the original and "crunched" versions of all
known licenses.
"""
md5sums = {}
commonlicdir = d.getVar('COMMON_LICENSE_DIR')
for fn in sorted(os.listdir(commonlicdir)):
md5value = crunch_license(os.path.join(commonlicdir, fn))
if md5value not in crunched_md5sums:
crunched_md5sums[md5value] = fn
elif fn != crunched_md5sums[md5value]:
bb.debug(2, "crunched_md5sums['%s'] is already set to '%s' rather than '%s'" % (md5value, crunched_md5sums[md5value], fn))
else:
bb.debug(2, "crunched_md5sums['%s'] is already set to '%s'" % (md5value, crunched_md5sums[md5value]))
lic_dirs = [d.getVar('COMMON_LICENSE_DIR')] + (d.getVar('LICENSE_PATH') or "").split()
for lic_dir in lic_dirs:
for fn in os.listdir(lic_dir):
path = os.path.join(lic_dir, fn)
# Hash the exact contents
md5value = bb.utils.md5_file(path)
md5sums[md5value] = fn
# Also hash a "crunched" version
md5value = _crunch_license(path)
md5sums[md5value] = fn
return crunched_md5sums
return md5sums
def crunch_license(licfile):
def _crunch_license(licfile):
'''
Remove non-material text from a license file and then calculate its
md5sum. This works well for licenses that contain a copyright statement,
@@ -152,10 +146,9 @@ def find_license_files(srctree, first_only=False):
def match_licenses(licfiles, srctree, d):
import bb
md5sums = get_license_md5sums(d)
crunched_md5sums = crunch_known_licenses(d)
md5sums = {}
md5sums.update(_load_hash_csv(d))
md5sums.update(_crunch_known_licenses(d))
licenses = []
for licfile in sorted(licfiles):
@@ -163,8 +156,8 @@ def match_licenses(licfiles, srctree, d):
md5value = bb.utils.md5_file(resolved_licfile)
license = md5sums.get(md5value, None)
if not license:
crunched_md5 = crunch_license(resolved_licfile)
license = crunched_md5sums.get(crunched_md5, None)
crunched_md5 = _crunch_license(resolved_licfile)
license = md5sums.get(crunched_md5, None)
if not license:
license = 'Unknown'
logger.info("Please add the following line for '%s' to a 'license-hashes.csv' " \