From 08aa69a1fd7983dcf93d283e47e3c5ac9bd377b1 Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Wed, 21 Feb 2024 13:10:04 +0000 Subject: [PATCH] bitbake: runqueue: Add support for BB_LOADFACTOR_MAX Some ditros don't enable /proc/pressure and it tends to be those which we see bitbake timeout issues on, seemingly as load gets too high and the bitbake processes don't get scheduled in for minutes at a time. Add support for stopping running extra tasks if the system load average goes above a certain threshold by setting BB_LOADFACTOR_MAX. The value used is scaled by CPU number, so a value of 1 would be when the load average equals the number of cpu cores of the system, under one only starts tasks when the load average is below the number of cores. This means you can centrally set a value such as 1.5 which will then scale correctly to different sized machines with differing numbers of CPUs. The pressure regulation is probably more accurate and responsive, however our graphs do show singificant load spikes on some workers and this patch is aimed at trying to avoid those. Pressure regulation is used where available in preference to this load factor regulation when both are set. (Bitbake rev: 14a27306f6dceb4999c2804ccae5a09cc3d8dd49) Signed-off-by: Richard Purdie --- bitbake/lib/bb/runqueue.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/bitbake/lib/bb/runqueue.py b/bitbake/lib/bb/runqueue.py index e86ccd8c61..6987de3e29 100644 --- a/bitbake/lib/bb/runqueue.py +++ b/bitbake/lib/bb/runqueue.py @@ -220,6 +220,16 @@ class RunQueueScheduler(object): bb.note("Pressure status changed to CPU: %s, IO: %s, Mem: %s (CPU: %s/%s, IO: %s/%s, Mem: %s/%s) - using %s/%s bitbake threads" % (pressure_state + pressure_values + (len(self.rq.runq_running.difference(self.rq.runq_complete)), self.rq.number_tasks))) self.pressure_state = pressure_state return (exceeds_cpu_pressure or exceeds_io_pressure or exceeds_memory_pressure) + elif self.rq.max_loadfactor: + limit = False + loadfactor = float(os.getloadavg()[0]) / os.cpu_count() + # bb.warn("Comparing %s to %s" % (loadfactor, self.rq.max_loadfactor)) + if loadfactor > self.rq.max_loadfactor: + limit = True + if hasattr(self, "loadfactor_limit") and limit != self.loadfactor_limit: + bb.note("Load average limiting set to %s as load average: %s - using %s/%s bitbake threads" % (limit, loadfactor, len(self.rq.runq_running.difference(self.rq.runq_complete)), self.rq.number_tasks)) + self.loadfactor_limit = limit + return limit return False def next_buildable_task(self): @@ -1822,6 +1832,7 @@ class RunQueueExecute: self.max_cpu_pressure = self.cfgData.getVar("BB_PRESSURE_MAX_CPU") self.max_io_pressure = self.cfgData.getVar("BB_PRESSURE_MAX_IO") self.max_memory_pressure = self.cfgData.getVar("BB_PRESSURE_MAX_MEMORY") + self.max_loadfactor = self.cfgData.getVar("BB_LOADFACTOR_MAX") self.sq_buildable = set() self.sq_running = set() @@ -1875,6 +1886,11 @@ class RunQueueExecute: bb.fatal("Invalid BB_PRESSURE_MAX_MEMORY %s, minimum value is %s." % (self.max_memory_pressure, lower_limit)) if self.max_memory_pressure > upper_limit: bb.warn("Your build will be largely unregulated since BB_PRESSURE_MAX_MEMORY is set to %s. It is very unlikely that such high pressure will be experienced." % (self.max_io_pressure)) + + if self.max_loadfactor: + self.max_loadfactor = float(self.max_loadfactor) + if self.max_loadfactor <= 0: + bb.fatal("Invalid BB_LOADFACTOR_MAX %s, needs to be greater than zero." % (self.max_loadfactor)) # List of setscene tasks which we've covered self.scenequeue_covered = set()