mirror of
https://git.yoctoproject.org/meta-ti
synced 2026-01-12 01:20:20 +00:00
505 lines
16 KiB
Diff
505 lines
16 KiB
Diff
From ce4f1f734efd638af01f1849ffffdc2746ad4a55 Mon Sep 17 00:00:00 2001
|
|
From: Mike Galbraith <efault@gmx.de>
|
|
Date: Fri, 19 Nov 2010 12:52:42 +0100
|
|
Subject: [PATCH 10/28] Miracle patch
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
On Sun, 2010-11-14 at 16:26 -0800, Linus Torvalds wrote:
|
|
> On Sun, Nov 14, 2010 at 4:15 PM, Linus Torvalds
|
|
> <torvalds@linux-foundation.org> wrote:
|
|
> >
|
|
> > THAT is why I think it's so silly to try to be so strict and walk over
|
|
> > all processes while holding a couple of spinlocks.
|
|
>
|
|
> Btw, let me say that I think the patch is great even with that thing
|
|
> in. It looks clean, the thing I'm complaining about is not a big deal,
|
|
> and it seems to perform very much as advertized. The difference with
|
|
> autogroup scheduling is very noticeable with a simple "make -j64"
|
|
> kernel compile.
|
|
>
|
|
> So I really don't think it's a big deal. The sysctl handler isn't even
|
|
> complicated. But boy does it hurt my eyes to see a spinlock held
|
|
> around a "do_each_thread()". And I do get the feeling that the
|
|
> simplest way to fix it would be to just remove the code entirely, and
|
|
> just say that "enabling/disabling may be delayed for old processes
|
|
> with existing autogroups".
|
|
|
|
Which is what I just did. If the oddball case isn't a big deal, the
|
|
patch shrinks, which is a good thing. I just wanted to cover all bases.
|
|
|
|
Patchlet with handler whacked:
|
|
|
|
A recurring complaint from CFS users is that parallel kbuild has a negative
|
|
impact on desktop interactivity. This patch implements an idea from Linus,
|
|
to automatically create task groups. This patch only implements Linus' per
|
|
tty task group suggestion, and only for fair class tasks, but leaves the way
|
|
open for enhancement.
|
|
|
|
Implementation: each task's signal struct contains an inherited pointer to a
|
|
refcounted autogroup struct containing a task group pointer, the default for
|
|
all tasks pointing to the init_task_group. When a task calls __proc_set_tty(),
|
|
the process wide reference to the default group is dropped, a new task group is
|
|
created, and the process is moved into the new task group. Children thereafter
|
|
inherit this task group, and increase it's refcount. On exit, a reference to the
|
|
current task group is dropped when the last reference to each signal struct is
|
|
dropped. The task group is destroyed when the last signal struct referencing
|
|
it is freed. At runqueue selection time, IFF a task has no cgroup assignment,
|
|
it's current autogroup is used.
|
|
|
|
The feature is enabled from boot by default if CONFIG_SCHED_AUTOGROUP is
|
|
selected, but can be disabled via the boot option noautogroup, and can be
|
|
also be turned on/off on the fly via..
|
|
echo [01] > /proc/sys/kernel/sched_autogroup_enabled.
|
|
..which will automatically move tasks to/from the root task group.
|
|
|
|
Some numbers.
|
|
|
|
A 100% hog overhead measurement proggy pinned to the same CPU as a make -j10
|
|
|
|
About measurement proggy:
|
|
pert/sec = perturbations/sec
|
|
min/max/avg = scheduler service latencies in usecs
|
|
sum/s = time accrued by the competition per sample period (1 sec here)
|
|
overhead = %CPU received by the competition per sample period
|
|
|
|
pert/s: 31 >40475.37us: 3 min: 0.37 max:48103.60 avg:29573.74 sum/s:916786us overhead:90.24%
|
|
pert/s: 23 >41237.70us: 12 min: 0.36 max:56010.39 avg:40187.01 sum/s:924301us overhead:91.99%
|
|
pert/s: 24 >42150.22us: 12 min: 8.86 max:61265.91 avg:39459.91 sum/s:947038us overhead:92.20%
|
|
pert/s: 26 >42344.91us: 11 min: 3.83 max:52029.60 avg:36164.70 sum/s:940282us overhead:91.12%
|
|
pert/s: 24 >44262.90us: 14 min: 5.05 max:82735.15 avg:40314.33 sum/s:967544us overhead:92.22%
|
|
|
|
Same load with this patch applied.
|
|
|
|
pert/s: 229 >5484.43us: 41 min: 0.15 max:12069.42 avg:2193.81 sum/s:502382us overhead:50.24%
|
|
pert/s: 222 >5652.28us: 43 min: 0.46 max:12077.31 avg:2248.56 sum/s:499181us overhead:49.92%
|
|
pert/s: 211 >5809.38us: 43 min: 0.16 max:12064.78 avg:2381.70 sum/s:502538us overhead:50.25%
|
|
pert/s: 223 >6147.92us: 43 min: 0.15 max:16107.46 avg:2282.17 sum/s:508925us overhead:50.49%
|
|
pert/s: 218 >6252.64us: 43 min: 0.16 max:12066.13 avg:2324.11 sum/s:506656us overhead:50.27%
|
|
|
|
Average service latency is an order of magnitude better with autogroup.
|
|
(Imagine that pert were Xorg or whatnot instead)
|
|
|
|
Using Mathieu Desnoyers' wakeup-latency testcase:
|
|
|
|
With taskset -c 3 make -j 10 running..
|
|
|
|
taskset -c 3 ./wakeup-latency& sleep 30;killall wakeup-latency
|
|
|
|
without:
|
|
maximum latency: 42963.2 µs
|
|
average latency: 9077.0 µs
|
|
missed timer events: 0
|
|
|
|
with:
|
|
maximum latency: 4160.7 µs
|
|
average latency: 149.4 µs
|
|
missed timer events: 0
|
|
|
|
Signed-off-by: Mike Galbraith <efault@gmx.de>
|
|
---
|
|
Documentation/kernel-parameters.txt | 2 +
|
|
drivers/tty/tty_io.c | 1 +
|
|
include/linux/sched.h | 19 +++++
|
|
init/Kconfig | 12 +++
|
|
kernel/fork.c | 5 +-
|
|
kernel/sched.c | 25 ++++--
|
|
kernel/sched_autogroup.c | 140 +++++++++++++++++++++++++++++++++++
|
|
kernel/sched_autogroup.h | 18 +++++
|
|
kernel/sysctl.c | 11 +++
|
|
9 files changed, 224 insertions(+), 9 deletions(-)
|
|
create mode 100644 kernel/sched_autogroup.c
|
|
create mode 100644 kernel/sched_autogroup.h
|
|
|
|
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
|
|
index 01ece1b..1031923 100644
|
|
--- a/Documentation/kernel-parameters.txt
|
|
+++ b/Documentation/kernel-parameters.txt
|
|
@@ -1622,6 +1622,8 @@ and is between 256 and 4096 characters. It is defined in the file
|
|
noapic [SMP,APIC] Tells the kernel to not make use of any
|
|
IOAPICs that may be present in the system.
|
|
|
|
+ noautogroup Disable scheduler automatic task group creation.
|
|
+
|
|
nobats [PPC] Do not use BATs for mapping kernel lowmem
|
|
on "Classic" PPC cores.
|
|
|
|
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
|
|
index 35480dd..1849f4a 100644
|
|
--- a/drivers/tty/tty_io.c
|
|
+++ b/drivers/tty/tty_io.c
|
|
@@ -3169,6 +3169,7 @@ static void __proc_set_tty(struct task_struct *tsk, struct tty_struct *tty)
|
|
put_pid(tsk->signal->tty_old_pgrp);
|
|
tsk->signal->tty = tty_kref_get(tty);
|
|
tsk->signal->tty_old_pgrp = NULL;
|
|
+ sched_autogroup_create_attach(tsk);
|
|
}
|
|
|
|
static void proc_set_tty(struct task_struct *tsk, struct tty_struct *tty)
|
|
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
|
index 2238745..3a775e3 100644
|
|
--- a/include/linux/sched.h
|
|
+++ b/include/linux/sched.h
|
|
@@ -509,6 +509,8 @@ struct thread_group_cputimer {
|
|
spinlock_t lock;
|
|
};
|
|
|
|
+struct autogroup;
|
|
+
|
|
/*
|
|
* NOTE! "signal_struct" does not have it's own
|
|
* locking, because a shared signal_struct always
|
|
@@ -576,6 +578,9 @@ struct signal_struct {
|
|
|
|
struct tty_struct *tty; /* NULL if no tty */
|
|
|
|
+#ifdef CONFIG_SCHED_AUTOGROUP
|
|
+ struct autogroup *autogroup;
|
|
+#endif
|
|
/*
|
|
* Cumulative resource counters for dead threads in the group,
|
|
* and for reaped dead child processes forked by this group.
|
|
@@ -1931,6 +1936,20 @@ int sched_rt_handler(struct ctl_table *table, int write,
|
|
|
|
extern unsigned int sysctl_sched_compat_yield;
|
|
|
|
+#ifdef CONFIG_SCHED_AUTOGROUP
|
|
+extern unsigned int sysctl_sched_autogroup_enabled;
|
|
+
|
|
+extern void sched_autogroup_create_attach(struct task_struct *p);
|
|
+extern void sched_autogroup_detach(struct task_struct *p);
|
|
+extern void sched_autogroup_fork(struct signal_struct *sig);
|
|
+extern void sched_autogroup_exit(struct signal_struct *sig);
|
|
+#else
|
|
+static inline void sched_autogroup_create_attach(struct task_struct *p) { }
|
|
+static inline void sched_autogroup_detach(struct task_struct *p) { }
|
|
+static inline void sched_autogroup_fork(struct signal_struct *sig) { }
|
|
+static inline void sched_autogroup_exit(struct signal_struct *sig) { }
|
|
+#endif
|
|
+
|
|
#ifdef CONFIG_RT_MUTEXES
|
|
extern int rt_mutex_getprio(struct task_struct *p);
|
|
extern void rt_mutex_setprio(struct task_struct *p, int prio);
|
|
diff --git a/init/Kconfig b/init/Kconfig
|
|
index c972899..a4985d9 100644
|
|
--- a/init/Kconfig
|
|
+++ b/init/Kconfig
|
|
@@ -741,6 +741,18 @@ config NET_NS
|
|
|
|
endif # NAMESPACES
|
|
|
|
+config SCHED_AUTOGROUP
|
|
+ bool "Automatic process group scheduling"
|
|
+ select CGROUPS
|
|
+ select CGROUP_SCHED
|
|
+ select FAIR_GROUP_SCHED
|
|
+ help
|
|
+ This option optimizes the scheduler for common desktop workloads by
|
|
+ automatically creating and populating task groups. This separation
|
|
+ of workloads isolates aggressive CPU burners (like build jobs) from
|
|
+ desktop applications. Task group autogeneration is currently based
|
|
+ upon task tty association.
|
|
+
|
|
config MM_OWNER
|
|
bool
|
|
|
|
diff --git a/kernel/fork.c b/kernel/fork.c
|
|
index 5447dc7..70ea75f 100644
|
|
--- a/kernel/fork.c
|
|
+++ b/kernel/fork.c
|
|
@@ -174,8 +174,10 @@ static inline void free_signal_struct(struct signal_struct *sig)
|
|
|
|
static inline void put_signal_struct(struct signal_struct *sig)
|
|
{
|
|
- if (atomic_dec_and_test(&sig->sigcnt))
|
|
+ if (atomic_dec_and_test(&sig->sigcnt)) {
|
|
+ sched_autogroup_exit(sig);
|
|
free_signal_struct(sig);
|
|
+ }
|
|
}
|
|
|
|
void __put_task_struct(struct task_struct *tsk)
|
|
@@ -905,6 +907,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
|
|
posix_cpu_timers_init_group(sig);
|
|
|
|
tty_audit_fork(sig);
|
|
+ sched_autogroup_fork(sig);
|
|
|
|
sig->oom_adj = current->signal->oom_adj;
|
|
sig->oom_score_adj = current->signal->oom_score_adj;
|
|
diff --git a/kernel/sched.c b/kernel/sched.c
|
|
index 297d1a0..53ff9a1 100644
|
|
--- a/kernel/sched.c
|
|
+++ b/kernel/sched.c
|
|
@@ -78,6 +78,7 @@
|
|
|
|
#include "sched_cpupri.h"
|
|
#include "workqueue_sched.h"
|
|
+#include "sched_autogroup.h"
|
|
|
|
#define CREATE_TRACE_POINTS
|
|
#include <trace/events/sched.h>
|
|
@@ -605,11 +606,14 @@ static inline int cpu_of(struct rq *rq)
|
|
*/
|
|
static inline struct task_group *task_group(struct task_struct *p)
|
|
{
|
|
+ struct task_group *tg;
|
|
struct cgroup_subsys_state *css;
|
|
|
|
css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
|
|
lockdep_is_held(&task_rq(p)->lock));
|
|
- return container_of(css, struct task_group, css);
|
|
+ tg = container_of(css, struct task_group, css);
|
|
+
|
|
+ return autogroup_task_group(p, tg);
|
|
}
|
|
|
|
/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
|
|
@@ -2063,6 +2067,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
|
|
#include "sched_idletask.c"
|
|
#include "sched_fair.c"
|
|
#include "sched_rt.c"
|
|
+#include "sched_autogroup.c"
|
|
#include "sched_stoptask.c"
|
|
#ifdef CONFIG_SCHED_DEBUG
|
|
# include "sched_debug.c"
|
|
@@ -8164,7 +8169,7 @@ void __init sched_init(void)
|
|
#ifdef CONFIG_CGROUP_SCHED
|
|
list_add(&init_task_group.list, &task_groups);
|
|
INIT_LIST_HEAD(&init_task_group.children);
|
|
-
|
|
+ autogroup_init(&init_task);
|
|
#endif /* CONFIG_CGROUP_SCHED */
|
|
|
|
#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
|
|
@@ -8694,15 +8699,11 @@ void sched_destroy_group(struct task_group *tg)
|
|
/* change task's runqueue when it moves between groups.
|
|
* The caller of this function should have put the task in its new group
|
|
* by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to
|
|
- * reflect its new group.
|
|
+ * reflect its new group. Called with the runqueue lock held.
|
|
*/
|
|
-void sched_move_task(struct task_struct *tsk)
|
|
+void __sched_move_task(struct task_struct *tsk, struct rq *rq)
|
|
{
|
|
int on_rq, running;
|
|
- unsigned long flags;
|
|
- struct rq *rq;
|
|
-
|
|
- rq = task_rq_lock(tsk, &flags);
|
|
|
|
running = task_current(rq, tsk);
|
|
on_rq = tsk->se.on_rq;
|
|
@@ -8723,7 +8724,15 @@ void sched_move_task(struct task_struct *tsk)
|
|
tsk->sched_class->set_curr_task(rq);
|
|
if (on_rq)
|
|
enqueue_task(rq, tsk, 0);
|
|
+}
|
|
|
|
+void sched_move_task(struct task_struct *tsk)
|
|
+{
|
|
+ struct rq *rq;
|
|
+ unsigned long flags;
|
|
+
|
|
+ rq = task_rq_lock(tsk, &flags);
|
|
+ __sched_move_task(tsk, rq);
|
|
task_rq_unlock(rq, &flags);
|
|
}
|
|
#endif /* CONFIG_CGROUP_SCHED */
|
|
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c
|
|
new file mode 100644
|
|
index 0000000..62f1d0e
|
|
--- /dev/null
|
|
+++ b/kernel/sched_autogroup.c
|
|
@@ -0,0 +1,140 @@
|
|
+#ifdef CONFIG_SCHED_AUTOGROUP
|
|
+
|
|
+unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
|
|
+
|
|
+struct autogroup {
|
|
+ struct kref kref;
|
|
+ struct task_group *tg;
|
|
+};
|
|
+
|
|
+static struct autogroup autogroup_default;
|
|
+
|
|
+static void autogroup_init(struct task_struct *init_task)
|
|
+{
|
|
+ autogroup_default.tg = &init_task_group;
|
|
+ kref_init(&autogroup_default.kref);
|
|
+ init_task->signal->autogroup = &autogroup_default;
|
|
+}
|
|
+
|
|
+static inline void autogroup_destroy(struct kref *kref)
|
|
+{
|
|
+ struct autogroup *ag = container_of(kref, struct autogroup, kref);
|
|
+ struct task_group *tg = ag->tg;
|
|
+
|
|
+ kfree(ag);
|
|
+ sched_destroy_group(tg);
|
|
+}
|
|
+
|
|
+static inline void autogroup_kref_put(struct autogroup *ag)
|
|
+{
|
|
+ kref_put(&ag->kref, autogroup_destroy);
|
|
+}
|
|
+
|
|
+static inline struct autogroup *autogroup_kref_get(struct autogroup *ag)
|
|
+{
|
|
+ kref_get(&ag->kref);
|
|
+ return ag;
|
|
+}
|
|
+
|
|
+static inline struct autogroup *autogroup_create(void)
|
|
+{
|
|
+ struct autogroup *ag = kmalloc(sizeof(*ag), GFP_KERNEL);
|
|
+
|
|
+ if (!ag)
|
|
+ goto out_fail;
|
|
+
|
|
+ ag->tg = sched_create_group(&init_task_group);
|
|
+ kref_init(&ag->kref);
|
|
+
|
|
+ if (!(IS_ERR(ag->tg)))
|
|
+ return ag;
|
|
+
|
|
+out_fail:
|
|
+ if (ag) {
|
|
+ kfree(ag);
|
|
+ WARN_ON(1);
|
|
+ } else
|
|
+ WARN_ON(1);
|
|
+
|
|
+ return autogroup_kref_get(&autogroup_default);
|
|
+}
|
|
+
|
|
+static inline struct task_group *
|
|
+autogroup_task_group(struct task_struct *p, struct task_group *tg)
|
|
+{
|
|
+ int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
|
|
+
|
|
+ enabled &= (tg == &root_task_group);
|
|
+ enabled &= (p->sched_class == &fair_sched_class);
|
|
+ enabled &= (!(p->flags & PF_EXITING));
|
|
+
|
|
+ if (enabled)
|
|
+ return p->signal->autogroup->tg;
|
|
+
|
|
+ return tg;
|
|
+}
|
|
+
|
|
+static void
|
|
+autogroup_move_group(struct task_struct *p, struct autogroup *ag)
|
|
+{
|
|
+ struct autogroup *prev;
|
|
+ struct task_struct *t;
|
|
+ struct rq *rq;
|
|
+ unsigned long flags;
|
|
+
|
|
+ rq = task_rq_lock(p, &flags);
|
|
+ prev = p->signal->autogroup;
|
|
+ if (prev == ag) {
|
|
+ task_rq_unlock(rq, &flags);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ p->signal->autogroup = autogroup_kref_get(ag);
|
|
+ __sched_move_task(p, rq);
|
|
+ task_rq_unlock(rq, &flags);
|
|
+
|
|
+ rcu_read_lock();
|
|
+ list_for_each_entry_rcu(t, &p->thread_group, thread_group) {
|
|
+ sched_move_task(t);
|
|
+ }
|
|
+ rcu_read_unlock();
|
|
+
|
|
+ autogroup_kref_put(prev);
|
|
+}
|
|
+
|
|
+void sched_autogroup_create_attach(struct task_struct *p)
|
|
+{
|
|
+ struct autogroup *ag = autogroup_create();
|
|
+
|
|
+ autogroup_move_group(p, ag);
|
|
+ /* drop extra refrence added by autogroup_create() */
|
|
+ autogroup_kref_put(ag);
|
|
+}
|
|
+EXPORT_SYMBOL(sched_autogroup_create_attach);
|
|
+
|
|
+/* currently has no users */
|
|
+void sched_autogroup_detach(struct task_struct *p)
|
|
+{
|
|
+ autogroup_move_group(p, &autogroup_default);
|
|
+}
|
|
+EXPORT_SYMBOL(sched_autogroup_detach);
|
|
+
|
|
+void sched_autogroup_fork(struct signal_struct *sig)
|
|
+{
|
|
+ sig->autogroup = autogroup_kref_get(current->signal->autogroup);
|
|
+}
|
|
+
|
|
+void sched_autogroup_exit(struct signal_struct *sig)
|
|
+{
|
|
+ autogroup_kref_put(sig->autogroup);
|
|
+}
|
|
+
|
|
+static int __init setup_autogroup(char *str)
|
|
+{
|
|
+ sysctl_sched_autogroup_enabled = 0;
|
|
+
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+__setup("noautogroup", setup_autogroup);
|
|
+#endif
|
|
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h
|
|
new file mode 100644
|
|
index 0000000..6048f5d
|
|
--- /dev/null
|
|
+++ b/kernel/sched_autogroup.h
|
|
@@ -0,0 +1,18 @@
|
|
+#ifdef CONFIG_SCHED_AUTOGROUP
|
|
+
|
|
+static void __sched_move_task(struct task_struct *tsk, struct rq *rq);
|
|
+
|
|
+static inline struct task_group *
|
|
+autogroup_task_group(struct task_struct *p, struct task_group *tg);
|
|
+
|
|
+#else /* !CONFIG_SCHED_AUTOGROUP */
|
|
+
|
|
+static inline void autogroup_init(struct task_struct *init_task) { }
|
|
+
|
|
+static inline struct task_group *
|
|
+autogroup_task_group(struct task_struct *p, struct task_group *tg)
|
|
+{
|
|
+ return tg;
|
|
+}
|
|
+
|
|
+#endif /* CONFIG_SCHED_AUTOGROUP */
|
|
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
|
|
index 5abfa15..b162f65 100644
|
|
--- a/kernel/sysctl.c
|
|
+++ b/kernel/sysctl.c
|
|
@@ -382,6 +382,17 @@ static struct ctl_table kern_table[] = {
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec,
|
|
},
|
|
+#ifdef CONFIG_SCHED_AUTOGROUP
|
|
+ {
|
|
+ .procname = "sched_autogroup_enabled",
|
|
+ .data = &sysctl_sched_autogroup_enabled,
|
|
+ .maxlen = sizeof(unsigned int),
|
|
+ .mode = 0644,
|
|
+ .proc_handler = proc_dointvec,
|
|
+ .extra1 = &zero,
|
|
+ .extra2 = &one,
|
|
+ },
|
|
+#endif
|
|
#ifdef CONFIG_PROVE_LOCKING
|
|
{
|
|
.procname = "prove_locking",
|
|
--
|
|
1.6.6.1
|
|
|