1
0
mirror of https://git.yoctoproject.org/poky synced 2026-06-01 00:59:48 +00:00

bitbake: cooker: Use shared counter for processing parser jobs

Instead of pre-partitioning which jobs will go to which parser
processes, pass the list of all jobs to all the parser processes
(efficiently via fork()), then used a shared counter of the next index
in the list that needs to be processed. This allows the parser processes
to run independently of needing to be feed by the parent process, and
load balances them much better.

(Bitbake rev: 373c4ddaf0e8128cc4f7d47aefa9860bd477a00f)

Signed-off-by: Joshua Watt <JPEWhacker@gmail.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Joshua Watt
2025-07-08 09:42:22 -06:00
committed by Richard Purdie
parent 51d825b367
commit 5adeefd63f
+18 -12
View File
@@ -26,6 +26,7 @@ import json
import pickle import pickle
import codecs import codecs
import hashserv import hashserv
import ctypes
logger = logging.getLogger("BitBake") logger = logging.getLogger("BitBake")
collectlog = logging.getLogger("BitBake.Collection") collectlog = logging.getLogger("BitBake.Collection")
@@ -1998,8 +1999,9 @@ class ParsingFailure(Exception):
Exception.__init__(self, realexception, recipe) Exception.__init__(self, realexception, recipe)
class Parser(multiprocessing.Process): class Parser(multiprocessing.Process):
def __init__(self, jobs, results, quit, profile): def __init__(self, jobs, next_job_id, results, quit, profile):
self.jobs = jobs self.jobs = jobs
self.next_job_id = next_job_id
self.results = results self.results = results
self.quit = quit self.quit = quit
multiprocessing.Process.__init__(self) multiprocessing.Process.__init__(self)
@@ -2065,10 +2067,14 @@ class Parser(multiprocessing.Process):
break break
job = None job = None
try: if havejobs:
job = self.jobs.pop() with self.next_job_id.get_lock():
except IndexError: if self.next_job_id.value < len(self.jobs):
havejobs = False job = self.jobs[self.next_job_id.value]
self.next_job_id.value += 1
else:
havejobs = False
if job: if job:
result = self.parse(*job) result = self.parse(*job)
# Clear the siggen cache after parsing to control memory usage, its huge # Clear the siggen cache after parsing to control memory usage, its huge
@@ -2134,13 +2140,13 @@ class CookerParser(object):
self.bb_caches = bb.cache.MulticonfigCache(self.cfgbuilder, self.cfghash, cooker.caches_array) self.bb_caches = bb.cache.MulticonfigCache(self.cfgbuilder, self.cfghash, cooker.caches_array)
self.fromcache = set() self.fromcache = set()
self.willparse = set() self.willparse = []
for mc in self.cooker.multiconfigs: for mc in self.cooker.multiconfigs:
for filename in self.mcfilelist[mc]: for filename in self.mcfilelist[mc]:
appends = self.cooker.collections[mc].get_file_appends(filename) appends = self.cooker.collections[mc].get_file_appends(filename)
layername = self.cooker.collections[mc].calc_bbfile_priority(filename)[2] layername = self.cooker.collections[mc].calc_bbfile_priority(filename)[2]
if not self.bb_caches[mc].cacheValid(filename, appends): if not self.bb_caches[mc].cacheValid(filename, appends):
self.willparse.add((mc, self.bb_caches[mc], filename, appends, layername)) self.willparse.append((mc, self.bb_caches[mc], filename, appends, layername))
else: else:
self.fromcache.add((mc, self.bb_caches[mc], filename, appends, layername)) self.fromcache.add((mc, self.bb_caches[mc], filename, appends, layername))
@@ -2159,18 +2165,18 @@ class CookerParser(object):
def start(self): def start(self):
self.results = self.load_cached() self.results = self.load_cached()
self.processes = [] self.processes = []
if self.toparse: if self.toparse:
bb.event.fire(bb.event.ParseStarted(self.toparse), self.cfgdata) bb.event.fire(bb.event.ParseStarted(self.toparse), self.cfgdata)
next_job_id = multiprocessing.Value(ctypes.c_int, 0)
self.parser_quit = multiprocessing.Event() self.parser_quit = multiprocessing.Event()
self.result_queue = multiprocessing.Queue() self.result_queue = multiprocessing.Queue()
def chunkify(lst,n): # Have to pass in willparse at fork time so all parsing processes have the unpickleable data
return [lst[i::n] for i in range(n)] # then access it by index from the parse queue.
self.jobs = chunkify(list(self.willparse), self.num_processes)
for i in range(0, self.num_processes): for i in range(0, self.num_processes):
parser = Parser(self.jobs[i], self.result_queue, self.parser_quit, self.cooker.configuration.profile) parser = Parser(self.willparse, next_job_id, self.result_queue, self.parser_quit, self.cooker.configuration.profile)
parser.start() parser.start()
self.process_names.append(parser.name) self.process_names.append(parser.name)
self.processes.append(parser) self.processes.append(parser)