bitbake: cooker: Use shared counter for processing parser jobs

Instead of pre-partitioning which jobs will go to which parser processes, pass the list of all jobs to all the parser processes (efficiently via fork()), then used a shared counter of the next index in the list that needs to be processed. This allows the parser processes to run independently of needing to be feed by the parent process, and load balances them much better. (Bitbake rev: 373c4ddaf0e8128cc4f7d47aefa9860bd477a00f) Signed-off-by: Joshua Watt <JPEWhacker@gmail.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
2026-06-01 00:59:48 +00:00 · 2025-07-08 09:42:22 -06:00
parent 51d825b367
commit 5adeefd63f
1 changed files with 18 additions and 12 deletions
@@ -26,6 +26,7 @@ import json
 import pickle
 import codecs
 import hashserv
 import ctypes
 logger      = logging.getLogger("BitBake")
 collectlog  = logging.getLogger("BitBake.Collection")
@@ -1998,8 +1999,9 @@ class ParsingFailure(Exception):
        Exception.__init__(self, realexception, recipe)
 class Parser(multiprocessing.Process):
-    def __init__(self, jobs, results, quit, profile):
+    def __init__(self, jobs, next_job_id, results, quit, profile):
        self.jobs = jobs
        self.next_job_id = next_job_id
        self.results = results
        self.quit = quit
        multiprocessing.Process.__init__(self)
@@ -2065,10 +2067,14 @@ class Parser(multiprocessing.Process):
                    break
                job = None
-                try:
+                if havejobs:
-                    job = self.jobs.pop()
+                    with self.next_job_id.get_lock():
-                except IndexError:
+                        if self.next_job_id.value < len(self.jobs):
-                    havejobs = False
+                            job = self.jobs[self.next_job_id.value]
                            self.next_job_id.value += 1
                        else:
                            havejobs = False
                if job:
                    result = self.parse(*job)
                    # Clear the siggen cache after parsing to control memory usage, its huge
@@ -2134,13 +2140,13 @@ class CookerParser(object):
        self.bb_caches = bb.cache.MulticonfigCache(self.cfgbuilder, self.cfghash, cooker.caches_array)
        self.fromcache = set()
-        self.willparse = set()
+        self.willparse = []
        for mc in self.cooker.multiconfigs:
            for filename in self.mcfilelist[mc]:
                appends = self.cooker.collections[mc].get_file_appends(filename)
                layername = self.cooker.collections[mc].calc_bbfile_priority(filename)[2]
                if not self.bb_caches[mc].cacheValid(filename, appends):
-                    self.willparse.add((mc, self.bb_caches[mc], filename, appends, layername))
+                    self.willparse.append((mc, self.bb_caches[mc], filename, appends, layername))
                else:
                    self.fromcache.add((mc, self.bb_caches[mc], filename, appends, layername))
@@ -2159,18 +2165,18 @@ class CookerParser(object):
    def start(self):
        self.results = self.load_cached()
        self.processes = []
        if self.toparse:
            bb.event.fire(bb.event.ParseStarted(self.toparse), self.cfgdata)
            next_job_id = multiprocessing.Value(ctypes.c_int, 0)
            self.parser_quit = multiprocessing.Event()
            self.result_queue = multiprocessing.Queue()
-            def chunkify(lst,n):
+            # Have to pass in willparse at fork time so all parsing processes have the unpickleable data
-                return [lst[i::n] for i in range(n)]
+            # then access it by index from the parse queue.
            self.jobs = chunkify(list(self.willparse), self.num_processes)
            for i in range(0, self.num_processes):
-                parser = Parser(self.jobs[i], self.result_queue, self.parser_quit, self.cooker.configuration.profile)
+                parser = Parser(self.willparse, next_job_id, self.result_queue, self.parser_quit, self.cooker.configuration.profile)
                parser.start()
                self.process_names.append(parser.name)
                self.processes.append(parser)