mirror of
https://git.yoctoproject.org/meta-ti
synced 2026-06-06 10:50:37 +00:00
linux-ti33x-psp 3.2: update to 3.2.25
Signed-off-by: Koen Kooi <koen@dominion.thruhere.net> Signed-off-by: Denys Dmytriyenko <denys@ti.com>
This commit is contained in:
committed by
Denys Dmytriyenko
parent
e4ea426bb5
commit
6203cbca5c
@@ -4,7 +4,7 @@ require conf/machine/include/soc-family.inc
|
||||
require conf/machine/include/tune-cortexa8.inc
|
||||
PREFERRED_PROVIDER_virtual/kernel = "linux-ti33x-psp"
|
||||
# Increase this everytime you change something in the kernel
|
||||
MACHINE_KERNEL_PR = "r14"
|
||||
MACHINE_KERNEL_PR = "r15"
|
||||
|
||||
KERNEL_IMAGETYPE = "uImage"
|
||||
|
||||
|
||||
+306
@@ -0,0 +1,306 @@
|
||||
From fe36d7279bb09c09b2c07b1b8bfe786a3ab12486 Mon Sep 17 00:00:00 2001
|
||||
From: Corentin Chary <corentincj@iksaif.net>
|
||||
Date: Sat, 26 Nov 2011 11:00:10 +0100
|
||||
Subject: [PATCH 001/109] samsung-laptop: make the dmi check less strict
|
||||
|
||||
commit 3be324a94df0c3f032178d04549dbfbf6cccb09a upstream.
|
||||
|
||||
This enable the driver for everything that look like
|
||||
a laptop and is from vendor "SAMSUNG ELECTRONICS CO., LTD.".
|
||||
Note that laptop supported by samsung-q10 seem to have a different
|
||||
vendor strict.
|
||||
|
||||
Also remove every log output until we know that we have a SABI interface
|
||||
(except if the driver is forced to load, or debug is enabled).
|
||||
|
||||
Keeping a whitelist of laptop with a model granularity is something that can't
|
||||
work without close vendor cooperation (and we don't have that).
|
||||
|
||||
Signed-off-by: Corentin Chary <corentincj@iksaif.net>
|
||||
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
|
||||
Signed-off-by: Matthew Garrett <mjg@redhat.com>
|
||||
[bwh: Backported to 3.2:
|
||||
- Adjust context
|
||||
- Drop changes relating to ACPI video]
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/platform/x86/samsung-laptop.c | 225 ++-------------------------------
|
||||
1 files changed, 8 insertions(+), 217 deletions(-)
|
||||
|
||||
diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c
|
||||
index 09e26bf..af1e296 100644
|
||||
--- a/drivers/platform/x86/samsung-laptop.c
|
||||
+++ b/drivers/platform/x86/samsung-laptop.c
|
||||
@@ -540,245 +540,34 @@ static DEVICE_ATTR(performance_level, S_IWUSR | S_IRUGO,
|
||||
get_performance_level, set_performance_level);
|
||||
|
||||
|
||||
-static int __init dmi_check_cb(const struct dmi_system_id *id)
|
||||
-{
|
||||
- pr_info("found laptop model '%s'\n",
|
||||
- id->ident);
|
||||
- return 1;
|
||||
-}
|
||||
-
|
||||
static struct dmi_system_id __initdata samsung_dmi_table[] = {
|
||||
{
|
||||
- .ident = "N128",
|
||||
- .matches = {
|
||||
- DMI_MATCH(DMI_SYS_VENDOR,
|
||||
- "SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "N128"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "N128"),
|
||||
- },
|
||||
- .callback = dmi_check_cb,
|
||||
- },
|
||||
- {
|
||||
- .ident = "N130",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR,
|
||||
"SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "N130"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "N130"),
|
||||
+ DMI_MATCH(DMI_CHASSIS_TYPE, "8"), /* Portable */
|
||||
},
|
||||
- .callback = dmi_check_cb,
|
||||
},
|
||||
{
|
||||
- .ident = "N510",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR,
|
||||
"SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "N510"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "N510"),
|
||||
+ DMI_MATCH(DMI_CHASSIS_TYPE, "9"), /* Laptop */
|
||||
},
|
||||
- .callback = dmi_check_cb,
|
||||
},
|
||||
{
|
||||
- .ident = "X125",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR,
|
||||
"SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "X125"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "X125"),
|
||||
+ DMI_MATCH(DMI_CHASSIS_TYPE, "10"), /* Notebook */
|
||||
},
|
||||
- .callback = dmi_check_cb,
|
||||
},
|
||||
{
|
||||
- .ident = "X120/X170",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR,
|
||||
"SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "X120/X170"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "X120/X170"),
|
||||
- },
|
||||
- .callback = dmi_check_cb,
|
||||
- },
|
||||
- {
|
||||
- .ident = "NC10",
|
||||
- .matches = {
|
||||
- DMI_MATCH(DMI_SYS_VENDOR,
|
||||
- "SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "NC10"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "NC10"),
|
||||
- },
|
||||
- .callback = dmi_check_cb,
|
||||
- },
|
||||
- {
|
||||
- .ident = "NP-Q45",
|
||||
- .matches = {
|
||||
- DMI_MATCH(DMI_SYS_VENDOR,
|
||||
- "SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "SQ45S70S"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "SQ45S70S"),
|
||||
- },
|
||||
- .callback = dmi_check_cb,
|
||||
- },
|
||||
- {
|
||||
- .ident = "X360",
|
||||
- .matches = {
|
||||
- DMI_MATCH(DMI_SYS_VENDOR,
|
||||
- "SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "X360"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "X360"),
|
||||
- },
|
||||
- .callback = dmi_check_cb,
|
||||
- },
|
||||
- {
|
||||
- .ident = "R410 Plus",
|
||||
- .matches = {
|
||||
- DMI_MATCH(DMI_SYS_VENDOR,
|
||||
- "SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "R410P"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "R460"),
|
||||
- },
|
||||
- .callback = dmi_check_cb,
|
||||
- },
|
||||
- {
|
||||
- .ident = "R518",
|
||||
- .matches = {
|
||||
- DMI_MATCH(DMI_SYS_VENDOR,
|
||||
- "SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "R518"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "R518"),
|
||||
- },
|
||||
- .callback = dmi_check_cb,
|
||||
- },
|
||||
- {
|
||||
- .ident = "R519/R719",
|
||||
- .matches = {
|
||||
- DMI_MATCH(DMI_SYS_VENDOR,
|
||||
- "SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "R519/R719"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "R519/R719"),
|
||||
- },
|
||||
- .callback = dmi_check_cb,
|
||||
- },
|
||||
- {
|
||||
- .ident = "N150/N210/N220",
|
||||
- .matches = {
|
||||
- DMI_MATCH(DMI_SYS_VENDOR,
|
||||
- "SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "N150/N210/N220"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "N150/N210/N220"),
|
||||
- },
|
||||
- .callback = dmi_check_cb,
|
||||
- },
|
||||
- {
|
||||
- .ident = "N220",
|
||||
- .matches = {
|
||||
- DMI_MATCH(DMI_SYS_VENDOR,
|
||||
- "SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "N220"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "N220"),
|
||||
- },
|
||||
- .callback = dmi_check_cb,
|
||||
- },
|
||||
- {
|
||||
- .ident = "N150/N210/N220/N230",
|
||||
- .matches = {
|
||||
- DMI_MATCH(DMI_SYS_VENDOR,
|
||||
- "SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "N150/N210/N220/N230"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "N150/N210/N220/N230"),
|
||||
- },
|
||||
- .callback = dmi_check_cb,
|
||||
- },
|
||||
- {
|
||||
- .ident = "N150P/N210P/N220P",
|
||||
- .matches = {
|
||||
- DMI_MATCH(DMI_SYS_VENDOR,
|
||||
- "SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "N150P/N210P/N220P"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "N150P/N210P/N220P"),
|
||||
- },
|
||||
- .callback = dmi_check_cb,
|
||||
- },
|
||||
- {
|
||||
- .ident = "R700",
|
||||
- .matches = {
|
||||
- DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "SR700"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "SR700"),
|
||||
- },
|
||||
- .callback = dmi_check_cb,
|
||||
- },
|
||||
- {
|
||||
- .ident = "R530/R730",
|
||||
- .matches = {
|
||||
- DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "R530/R730"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "R530/R730"),
|
||||
- },
|
||||
- .callback = dmi_check_cb,
|
||||
- },
|
||||
- {
|
||||
- .ident = "NF110/NF210/NF310",
|
||||
- .matches = {
|
||||
- DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "NF110/NF210/NF310"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "NF110/NF210/NF310"),
|
||||
- },
|
||||
- .callback = dmi_check_cb,
|
||||
- },
|
||||
- {
|
||||
- .ident = "N145P/N250P/N260P",
|
||||
- .matches = {
|
||||
- DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "N145P/N250P/N260P"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "N145P/N250P/N260P"),
|
||||
- },
|
||||
- .callback = dmi_check_cb,
|
||||
- },
|
||||
- {
|
||||
- .ident = "R70/R71",
|
||||
- .matches = {
|
||||
- DMI_MATCH(DMI_SYS_VENDOR,
|
||||
- "SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "R70/R71"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "R70/R71"),
|
||||
- },
|
||||
- .callback = dmi_check_cb,
|
||||
- },
|
||||
- {
|
||||
- .ident = "P460",
|
||||
- .matches = {
|
||||
- DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "P460"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "P460"),
|
||||
- },
|
||||
- .callback = dmi_check_cb,
|
||||
- },
|
||||
- {
|
||||
- .ident = "R528/R728",
|
||||
- .matches = {
|
||||
- DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "R528/R728"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "R528/R728"),
|
||||
- },
|
||||
- .callback = dmi_check_cb,
|
||||
- },
|
||||
- {
|
||||
- .ident = "NC210/NC110",
|
||||
- .matches = {
|
||||
- DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "NC210/NC110"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "NC210/NC110"),
|
||||
- },
|
||||
- .callback = dmi_check_cb,
|
||||
- },
|
||||
- {
|
||||
- .ident = "X520",
|
||||
- .matches = {
|
||||
- DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
|
||||
- DMI_MATCH(DMI_PRODUCT_NAME, "X520"),
|
||||
- DMI_MATCH(DMI_BOARD_NAME, "X520"),
|
||||
+ DMI_MATCH(DMI_CHASSIS_TYPE, "14"), /* Sub-Notebook */
|
||||
},
|
||||
- .callback = dmi_check_cb,
|
||||
},
|
||||
{ },
|
||||
};
|
||||
@@ -819,7 +608,8 @@ static int __init samsung_init(void)
|
||||
|
||||
f0000_segment = ioremap_nocache(0xf0000, 0xffff);
|
||||
if (!f0000_segment) {
|
||||
- pr_err("Can't map the segment at 0xf0000\n");
|
||||
+ if (debug || force)
|
||||
+ pr_err("Can't map the segment at 0xf0000\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -832,7 +622,8 @@ static int __init samsung_init(void)
|
||||
}
|
||||
|
||||
if (loca == 0xffff) {
|
||||
- pr_err("This computer does not support SABI\n");
|
||||
+ if (debug || force)
|
||||
+ pr_err("This computer does not support SABI\n");
|
||||
goto error_no_signature;
|
||||
}
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
From c3041d04e675a5d38a1d57fee3c59d4f073f512e Mon Sep 17 00:00:00 2001
|
||||
From: Shaohua Li <shli@kernel.org>
|
||||
Date: Tue, 3 Jul 2012 15:57:19 +1000
|
||||
Subject: [PATCH 002/109] raid5: delayed stripe fix
|
||||
|
||||
commit fab363b5ff502d1b39ddcfec04271f5858d9f26e upstream.
|
||||
|
||||
There isn't locking setting STRIPE_DELAYED and STRIPE_PREREAD_ACTIVE bits, but
|
||||
the two bits have relationship. A delayed stripe can be moved to hold list only
|
||||
when preread active stripe count is below IO_THRESHOLD. If a stripe has both
|
||||
the bits set, such stripe will be in delayed list and preread count not 0,
|
||||
which will make such stripe never leave delayed list.
|
||||
|
||||
Signed-off-by: Shaohua Li <shli@fusionio.com>
|
||||
Signed-off-by: NeilBrown <neilb@suse.de>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/md/raid5.c | 4 +++-
|
||||
1 files changed, 3 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
|
||||
index 6ba4954..26ef63a 100644
|
||||
--- a/drivers/md/raid5.c
|
||||
+++ b/drivers/md/raid5.c
|
||||
@@ -196,12 +196,14 @@ static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
|
||||
BUG_ON(!list_empty(&sh->lru));
|
||||
BUG_ON(atomic_read(&conf->active_stripes)==0);
|
||||
if (test_bit(STRIPE_HANDLE, &sh->state)) {
|
||||
- if (test_bit(STRIPE_DELAYED, &sh->state))
|
||||
+ if (test_bit(STRIPE_DELAYED, &sh->state) &&
|
||||
+ !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
|
||||
list_add_tail(&sh->lru, &conf->delayed_list);
|
||||
else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
|
||||
sh->bm_seq - conf->seq_write > 0)
|
||||
list_add_tail(&sh->lru, &conf->bitmap_list);
|
||||
else {
|
||||
+ clear_bit(STRIPE_DELAYED, &sh->state);
|
||||
clear_bit(STRIPE_BIT_DELAY, &sh->state);
|
||||
list_add_tail(&sh->lru, &conf->handle_list);
|
||||
}
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
From ebf148a0ca2e0c9fb824a069c0fd5311bb6ae297 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Dumazet <eric.dumazet@gmail.com>
|
||||
Date: Fri, 2 Dec 2011 23:41:42 +0000
|
||||
Subject: [PATCH 003/109] tcp: drop SYN+FIN messages
|
||||
|
||||
commit fdf5af0daf8019cec2396cdef8fb042d80fe71fa upstream.
|
||||
|
||||
Denys Fedoryshchenko reported that SYN+FIN attacks were bringing his
|
||||
linux machines to their limits.
|
||||
|
||||
Dont call conn_request() if the TCP flags includes SYN flag
|
||||
|
||||
Reported-by: Denys Fedoryshchenko <denys@visp.net.lb>
|
||||
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
net/ipv4/tcp_input.c | 2 ++
|
||||
1 files changed, 2 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
|
||||
index 9726927..32e6ca2 100644
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -5836,6 +5836,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
|
||||
goto discard;
|
||||
|
||||
if (th->syn) {
|
||||
+ if (th->fin)
|
||||
+ goto discard;
|
||||
if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
|
||||
return 1;
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+35
@@ -0,0 +1,35 @@
|
||||
From 389c56a2dfc90eecb97841668a8d61fc3424f2c8 Mon Sep 17 00:00:00 2001
|
||||
From: Matt Carlson <mcarlson@broadcom.com>
|
||||
Date: Thu, 7 Jun 2012 12:56:54 +0000
|
||||
Subject: [PATCH 004/109] tg3: Apply short DMA frag workaround to 5906
|
||||
|
||||
commit b7abee6ef888117f92db370620ebf116a38e3f4d upstream.
|
||||
|
||||
5906 devices also need the short DMA fragment workaround. This patch
|
||||
makes the necessary change.
|
||||
|
||||
Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
|
||||
Tested-by: Christian Kujau <lists@nerdbynature.de>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/net/ethernet/broadcom/tg3.c | 3 ++-
|
||||
1 files changed, 2 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
|
||||
index 2dcac28..6b258d9 100644
|
||||
--- a/drivers/net/ethernet/broadcom/tg3.c
|
||||
+++ b/drivers/net/ethernet/broadcom/tg3.c
|
||||
@@ -14046,7 +14046,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
|
||||
}
|
||||
}
|
||||
|
||||
- if (tg3_flag(tp, 5755_PLUS))
|
||||
+ if (tg3_flag(tp, 5755_PLUS) ||
|
||||
+ GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906)
|
||||
tg3_flag_set(tp, SHORT_DMA_BUG);
|
||||
|
||||
if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719)
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+56
@@ -0,0 +1,56 @@
|
||||
From c8ad2074ce769ad8b16677e0a9bee9232be03acc Mon Sep 17 00:00:00 2001
|
||||
From: Stanislaw Gruszka <sgruszka@redhat.com>
|
||||
Date: Wed, 16 May 2012 11:06:21 +0200
|
||||
Subject: [PATCH 005/109] rtl8187: ->brightness_set can not sleep
|
||||
|
||||
commit 0fde0a8cfd0ede7f310d6a681c8e5a7cb3e32406 upstream.
|
||||
|
||||
Fix:
|
||||
|
||||
BUG: sleeping function called from invalid context at kernel/workqueue.c:2547
|
||||
in_atomic(): 1, irqs_disabled(): 0, pid: 629, name: wpa_supplicant
|
||||
2 locks held by wpa_supplicant/629:
|
||||
#0: (rtnl_mutex){+.+.+.}, at: [<c08b2b84>] rtnl_lock+0x14/0x20
|
||||
#1: (&trigger->leddev_list_lock){.+.?..}, at: [<c0867f41>] led_trigger_event+0x21/0x80
|
||||
Pid: 629, comm: wpa_supplicant Not tainted 3.3.0-0.rc3.git5.1.fc17.i686
|
||||
Call Trace:
|
||||
[<c046a9f6>] __might_sleep+0x126/0x1d0
|
||||
[<c0457d6c>] wait_on_work+0x2c/0x1d0
|
||||
[<c045a09a>] __cancel_work_timer+0x6a/0x120
|
||||
[<c045a160>] cancel_delayed_work_sync+0x10/0x20
|
||||
[<f7dd3c22>] rtl8187_led_brightness_set+0x82/0xf0 [rtl8187]
|
||||
[<c0867f7c>] led_trigger_event+0x5c/0x80
|
||||
[<f7ff5e6d>] ieee80211_led_radio+0x1d/0x40 [mac80211]
|
||||
[<f7ff3583>] ieee80211_stop_device+0x13/0x230 [mac80211]
|
||||
|
||||
Removing _sync is ok, because if led_on work is currently running
|
||||
it will be finished before led_off work start to perform, since
|
||||
they are always queued on the same mac80211 local->workqueue.
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=795176
|
||||
|
||||
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
|
||||
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
|
||||
Acked-by: Hin-Tak Leung <htl10@users.sourceforge.net>
|
||||
Signed-off-by: John W. Linville <linville@tuxdriver.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/net/wireless/rtl818x/rtl8187/leds.c | 2 +-
|
||||
1 files changed, 1 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/drivers/net/wireless/rtl818x/rtl8187/leds.c b/drivers/net/wireless/rtl818x/rtl8187/leds.c
|
||||
index 2e0de2f..c2d5b49 100644
|
||||
--- a/drivers/net/wireless/rtl818x/rtl8187/leds.c
|
||||
+++ b/drivers/net/wireless/rtl818x/rtl8187/leds.c
|
||||
@@ -117,7 +117,7 @@ static void rtl8187_led_brightness_set(struct led_classdev *led_dev,
|
||||
radio_on = true;
|
||||
} else if (radio_on) {
|
||||
radio_on = false;
|
||||
- cancel_delayed_work_sync(&priv->led_on);
|
||||
+ cancel_delayed_work(&priv->led_on);
|
||||
ieee80211_queue_delayed_work(hw, &priv->led_off, 0);
|
||||
}
|
||||
} else if (radio_on) {
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+102
@@ -0,0 +1,102 @@
|
||||
From 54490f32c07630f7c6bd6429bf73d7507b06e3db Mon Sep 17 00:00:00 2001
|
||||
From: Stanislav Yakovlev <stas.yakovlev@gmail.com>
|
||||
Date: Tue, 10 Apr 2012 21:44:47 -0400
|
||||
Subject: [PATCH 006/109] net/wireless: ipw2x00: add supported cipher suites
|
||||
to wiphy initialization
|
||||
|
||||
commit a141e6a0097118bb35024485f1faffc0d9042f5c upstream.
|
||||
|
||||
Driver doesn't report its supported cipher suites through cfg80211
|
||||
interface. It still uses wext interface and probably will not work
|
||||
through nl80211, but will at least correctly advertise supported
|
||||
features.
|
||||
|
||||
Bug was reported by Omar Siam.
|
||||
https://bugzilla.kernel.org/show_bug.cgi?id=43049
|
||||
|
||||
Signed-off-by: Stanislav Yakovlev <stas.yakovlev@gmail.com>
|
||||
Signed-off-by: John W. Linville <linville@tuxdriver.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/net/wireless/ipw2x00/ipw.h | 23 +++++++++++++++++++++++
|
||||
drivers/net/wireless/ipw2x00/ipw2100.c | 4 ++++
|
||||
drivers/net/wireless/ipw2x00/ipw2200.c | 4 ++++
|
||||
3 files changed, 31 insertions(+), 0 deletions(-)
|
||||
create mode 100644 drivers/net/wireless/ipw2x00/ipw.h
|
||||
|
||||
diff --git a/drivers/net/wireless/ipw2x00/ipw.h b/drivers/net/wireless/ipw2x00/ipw.h
|
||||
new file mode 100644
|
||||
index 0000000..4007bf5
|
||||
--- /dev/null
|
||||
+++ b/drivers/net/wireless/ipw2x00/ipw.h
|
||||
@@ -0,0 +1,23 @@
|
||||
+/*
|
||||
+ * Intel Pro/Wireless 2100, 2200BG, 2915ABG network connection driver
|
||||
+ *
|
||||
+ * Copyright 2012 Stanislav Yakovlev <stas.yakovlev@gmail.com>
|
||||
+ *
|
||||
+ * This program is free software; you can redistribute it and/or modify
|
||||
+ * it under the terms of the GNU General Public License version 2 as
|
||||
+ * published by the Free Software Foundation.
|
||||
+ */
|
||||
+
|
||||
+#ifndef __IPW_H__
|
||||
+#define __IPW_H__
|
||||
+
|
||||
+#include <linux/ieee80211.h>
|
||||
+
|
||||
+static const u32 ipw_cipher_suites[] = {
|
||||
+ WLAN_CIPHER_SUITE_WEP40,
|
||||
+ WLAN_CIPHER_SUITE_WEP104,
|
||||
+ WLAN_CIPHER_SUITE_TKIP,
|
||||
+ WLAN_CIPHER_SUITE_CCMP,
|
||||
+};
|
||||
+
|
||||
+#endif
|
||||
diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c
|
||||
index 127e9c6..10862d4 100644
|
||||
--- a/drivers/net/wireless/ipw2x00/ipw2100.c
|
||||
+++ b/drivers/net/wireless/ipw2x00/ipw2100.c
|
||||
@@ -166,6 +166,7 @@ that only one external action is invoked at a time.
|
||||
#include <net/lib80211.h>
|
||||
|
||||
#include "ipw2100.h"
|
||||
+#include "ipw.h"
|
||||
|
||||
#define IPW2100_VERSION "git-1.2.2"
|
||||
|
||||
@@ -1955,6 +1956,9 @@ static int ipw2100_wdev_init(struct net_device *dev)
|
||||
wdev->wiphy->bands[IEEE80211_BAND_2GHZ] = bg_band;
|
||||
}
|
||||
|
||||
+ wdev->wiphy->cipher_suites = ipw_cipher_suites;
|
||||
+ wdev->wiphy->n_cipher_suites = ARRAY_SIZE(ipw_cipher_suites);
|
||||
+
|
||||
set_wiphy_dev(wdev->wiphy, &priv->pci_dev->dev);
|
||||
if (wiphy_register(wdev->wiphy)) {
|
||||
ipw2100_down(priv);
|
||||
diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c
|
||||
index 827889b..56bd370 100644
|
||||
--- a/drivers/net/wireless/ipw2x00/ipw2200.c
|
||||
+++ b/drivers/net/wireless/ipw2x00/ipw2200.c
|
||||
@@ -34,6 +34,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <net/cfg80211-wext.h>
|
||||
#include "ipw2200.h"
|
||||
+#include "ipw.h"
|
||||
|
||||
|
||||
#ifndef KBUILD_EXTMOD
|
||||
@@ -11535,6 +11536,9 @@ static int ipw_wdev_init(struct net_device *dev)
|
||||
wdev->wiphy->bands[IEEE80211_BAND_5GHZ] = a_band;
|
||||
}
|
||||
|
||||
+ wdev->wiphy->cipher_suites = ipw_cipher_suites;
|
||||
+ wdev->wiphy->n_cipher_suites = ARRAY_SIZE(ipw_cipher_suites);
|
||||
+
|
||||
set_wiphy_dev(wdev->wiphy, &priv->pci_dev->dev);
|
||||
|
||||
/* With that information in place, we can now register the wiphy... */
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+47
@@ -0,0 +1,47 @@
|
||||
From ae8e28c4c0aaba535e88908a7a2c560bb55061f9 Mon Sep 17 00:00:00 2001
|
||||
From: Eugeni Dodonov <eugeni.dodonov@intel.com>
|
||||
Date: Tue, 14 Feb 2012 11:44:48 -0200
|
||||
Subject: [PATCH 007/109] drm/i915: do not enable RC6p on Sandy Bridge
|
||||
|
||||
commit 1c8ecf80fdee4e7b23a9e7da7ff9bd59ba2dcf96 upstream.
|
||||
|
||||
With base on latest findings, RC6p seems to be respondible for RC6-related
|
||||
issues on Sandy Bridge platform. To work-around those issues, the previous
|
||||
solution was to completely disable RC6 on Sandy Bridge for the past few
|
||||
releases, even if plain RC6 was not giving any issues.
|
||||
|
||||
What this patch does is preventing RC6p from being enabled on Sandy Bridge
|
||||
even if users enable RC6 via a kernel parameter. So it won't change the
|
||||
defaults in any way, but will ensure that if users do enable RC6 manually
|
||||
it won't break their machines by enabling this extra state.
|
||||
|
||||
Proper fix for this (enabling specific RC6 states according to the GPU
|
||||
generation) were proposed for the -next kernel, but we are too late in the
|
||||
release process now to pick such changes.
|
||||
|
||||
Acked-by: Keith Packard <keithp@keithp.com>
|
||||
Signed-off-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
|
||||
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/gpu/drm/i915/intel_display.c | 4 ++--
|
||||
1 files changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
|
||||
index 6aa7716..c63ca5f 100644
|
||||
--- a/drivers/gpu/drm/i915/intel_display.c
|
||||
+++ b/drivers/gpu/drm/i915/intel_display.c
|
||||
@@ -8043,8 +8043,8 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
|
||||
I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
|
||||
|
||||
if (intel_enable_rc6(dev_priv->dev))
|
||||
- rc6_mask = GEN6_RC_CTL_RC6p_ENABLE |
|
||||
- GEN6_RC_CTL_RC6_ENABLE;
|
||||
+ rc6_mask = GEN6_RC_CTL_RC6_ENABLE |
|
||||
+ (IS_GEN7(dev_priv->dev)) ? GEN6_RC_CTL_RC6p_ENABLE : 0;
|
||||
|
||||
I915_WRITE(GEN6_RC_CONTROL,
|
||||
rc6_mask |
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+34
@@ -0,0 +1,34 @@
|
||||
From 2c5df93ac56c5ec76f87a0daf418966abb25b03b Mon Sep 17 00:00:00 2001
|
||||
From: Eugeni Dodonov <eugeni.dodonov@intel.com>
|
||||
Date: Thu, 23 Feb 2012 23:57:06 -0200
|
||||
Subject: [PATCH 008/109] drm/i915: fix operator precedence when enabling RC6p
|
||||
|
||||
commit c0e2ee1bc0cf82eec89e26b7afe7e4db0561b7d9 upstream.
|
||||
|
||||
As noticed by Torsten Kaiser, the operator precedence can play tricks with
|
||||
us here.
|
||||
|
||||
CC: Dave Airlie <airlied@redhat.com>
|
||||
Signed-off-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
|
||||
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/gpu/drm/i915/intel_display.c | 2 +-
|
||||
1 files changed, 1 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
|
||||
index c63ca5f..cc75c4b 100644
|
||||
--- a/drivers/gpu/drm/i915/intel_display.c
|
||||
+++ b/drivers/gpu/drm/i915/intel_display.c
|
||||
@@ -8044,7 +8044,7 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
|
||||
|
||||
if (intel_enable_rc6(dev_priv->dev))
|
||||
rc6_mask = GEN6_RC_CTL_RC6_ENABLE |
|
||||
- (IS_GEN7(dev_priv->dev)) ? GEN6_RC_CTL_RC6p_ENABLE : 0;
|
||||
+ ((IS_GEN7(dev_priv->dev)) ? GEN6_RC_CTL_RC6p_ENABLE : 0);
|
||||
|
||||
I915_WRITE(GEN6_RC_CONTROL,
|
||||
rc6_mask |
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+47
@@ -0,0 +1,47 @@
|
||||
From 6ad602fb1ad21f96e203b4525aa56c7e0cc6ac4f Mon Sep 17 00:00:00 2001
|
||||
From: Lucas De Marchi <lucas.demarchi@profusion.mobi>
|
||||
Date: Tue, 17 Jan 2012 14:50:51 -0200
|
||||
Subject: [PATCH 009/109] kbuild: do not check for ancient modutils tools
|
||||
|
||||
commit 620c231c7a7f48745094727bb612f6321cfc8844 upstream.
|
||||
|
||||
scripts/depmod.sh checks for the output of '-V' expecting that it has
|
||||
module-init-tools in it. It's a hack to prevent users from using
|
||||
modutils instead of module-init-tools, that only works with 2.4.x
|
||||
kernels. This however prints an annoying warning for kmod tool, that is
|
||||
currently replacing module-init-tools.
|
||||
|
||||
Rather than putting another check for kmod's version, just remove it
|
||||
since users of 2.4.x kernel are unlikely to upgrade to 3.x, and if they
|
||||
do, let depmod fail in that case because they should know what they are
|
||||
doing.
|
||||
|
||||
Signed-off-by: Lucas De Marchi <lucas.demarchi@profusion.mobi>
|
||||
Acked-by: WANG Cong <amwang@redhat.com>
|
||||
Acked-By: Kay Sievers <kay.sievers@vrfy.org>
|
||||
Signed-off-by: Michal Marek <mmarek@suse.cz>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
scripts/depmod.sh | 6 ------
|
||||
1 files changed, 0 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/scripts/depmod.sh b/scripts/depmod.sh
|
||||
index a272356..2ae4817 100755
|
||||
--- a/scripts/depmod.sh
|
||||
+++ b/scripts/depmod.sh
|
||||
@@ -9,12 +9,6 @@ fi
|
||||
DEPMOD=$1
|
||||
KERNELRELEASE=$2
|
||||
|
||||
-if ! "$DEPMOD" -V 2>/dev/null | grep -q module-init-tools; then
|
||||
- echo "Warning: you may need to install module-init-tools" >&2
|
||||
- echo "See http://www.codemonkey.org.uk/docs/post-halloween-2.6.txt" >&2
|
||||
- sleep 1
|
||||
-fi
|
||||
-
|
||||
if ! test -r System.map -a -x "$DEPMOD"; then
|
||||
exit 0
|
||||
fi
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+45
@@ -0,0 +1,45 @@
|
||||
From bc3f81b80966fcd6e91b61c76408eed675a1b364 Mon Sep 17 00:00:00 2001
|
||||
From: Eldad Zack <eldad@fogrefinery.com>
|
||||
Date: Sun, 22 Apr 2012 00:48:04 +0200
|
||||
Subject: [PATCH 010/109] brcmsmac: "INTERMEDIATE but not AMPDU" only when
|
||||
tracing
|
||||
|
||||
commit 6ead629b27269c553c9092c47cd8f5ab0309ee3b upstream.
|
||||
|
||||
I keep getting the following messages on the log buffer:
|
||||
[ 2167.097507] ieee80211 phy0: brcms_c_dotxstatus: INTERMEDIATE but not AMPDU
|
||||
[ 2281.331305] ieee80211 phy0: brcms_c_dotxstatus: INTERMEDIATE but not AMPDU
|
||||
[ 2281.332539] ieee80211 phy0: brcms_c_dotxstatus: INTERMEDIATE but not AMPDU
|
||||
[ 2329.876605] ieee80211 phy0: brcms_c_dotxstatus: INTERMEDIATE but not AMPDU
|
||||
[ 2329.877354] ieee80211 phy0: brcms_c_dotxstatus: INTERMEDIATE but not AMPDU
|
||||
[ 2462.280756] ieee80211 phy0: brcms_c_dotxstatus: INTERMEDIATE but not AMPDU
|
||||
[ 2615.651689] ieee80211 phy0: brcms_c_dotxstatus: INTERMEDIATE but not AMPDU
|
||||
|
||||
From the code comment I understand that this something that can -
|
||||
and does, quite frequently - happen.
|
||||
|
||||
Signed-off-by: Eldad Zack <eldad@fogrefinery.com>
|
||||
Acked-by: Franky Lin<frankyl@broadcom.com>
|
||||
Signed-off-by: John W. Linville <linville@tuxdriver.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/net/wireless/brcm80211/brcmsmac/main.c | 3 +--
|
||||
1 files changed, 1 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/drivers/net/wireless/brcm80211/brcmsmac/main.c b/drivers/net/wireless/brcm80211/brcmsmac/main.c
|
||||
index 833cbef..8a40ff9 100644
|
||||
--- a/drivers/net/wireless/brcm80211/brcmsmac/main.c
|
||||
+++ b/drivers/net/wireless/brcm80211/brcmsmac/main.c
|
||||
@@ -900,8 +900,7 @@ brcms_c_dotxstatus(struct brcms_c_info *wlc, struct tx_status *txs)
|
||||
*/
|
||||
if (!(txs->status & TX_STATUS_AMPDU)
|
||||
&& (txs->status & TX_STATUS_INTERMEDIATE)) {
|
||||
- wiphy_err(wlc->wiphy, "%s: INTERMEDIATE but not AMPDU\n",
|
||||
- __func__);
|
||||
+ BCMMSG(wlc->wiphy, "INTERMEDIATE but not AMPDU\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+37
@@ -0,0 +1,37 @@
|
||||
From 9c6259f9486461da338a097ae1655d55b7bead3a Mon Sep 17 00:00:00 2001
|
||||
From: William Dauchy <wdauchy@gmail.com>
|
||||
Date: Wed, 14 Mar 2012 12:32:04 +0100
|
||||
Subject: [PATCH 011/109] NFSv4: Rate limit the state manager for lock reclaim
|
||||
warning messages
|
||||
|
||||
commit 96dcadc2fdd111dca90d559f189a30c65394451a upstream.
|
||||
|
||||
Adding rate limit on `Lock reclaim failed` messages since it could fill
|
||||
up system logs
|
||||
Signed-off-by: William Dauchy <wdauchy@gmail.com>
|
||||
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
|
||||
[bwh: Backported to 3.2: add the 'NFS:' prefix at the same time]
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
fs/nfs/nfs4state.c | 5 +++--
|
||||
1 files changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
|
||||
index 66020ac..07354b7 100644
|
||||
--- a/fs/nfs/nfs4state.c
|
||||
+++ b/fs/nfs/nfs4state.c
|
||||
@@ -1186,8 +1186,9 @@ restart:
|
||||
spin_lock(&state->state_lock);
|
||||
list_for_each_entry(lock, &state->lock_states, ls_locks) {
|
||||
if (!(lock->ls_flags & NFS_LOCK_INITIALIZED))
|
||||
- printk("%s: Lock reclaim failed!\n",
|
||||
- __func__);
|
||||
+ pr_warn_ratelimited("NFS: "
|
||||
+ "%s: Lock reclaim "
|
||||
+ "failed!\n", __func__);
|
||||
}
|
||||
spin_unlock(&state->state_lock);
|
||||
nfs4_put_open_state(state);
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+32
@@ -0,0 +1,32 @@
|
||||
From 877ee75ef7f45fd1022c37f6a8a957e9d1b098b7 Mon Sep 17 00:00:00 2001
|
||||
From: Ben Hutchings <ben@decadent.org.uk>
|
||||
Date: Wed, 4 Jan 2012 21:22:51 -0500
|
||||
Subject: [PATCH 012/109] ext4: Report max_batch_time option correctly
|
||||
|
||||
commit 1d526fc91bea04ee35b7599bf8b82f86c0aaf46c upstream.
|
||||
|
||||
Currently the value reported for max_batch_time is really the
|
||||
value of min_batch_time.
|
||||
|
||||
Reported-by: Russell Coker <russell@coker.com.au>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
fs/ext4/super.c | 2 +-
|
||||
1 files changed, 1 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
|
||||
index ab7aa3f..a93486e 100644
|
||||
--- a/fs/ext4/super.c
|
||||
+++ b/fs/ext4/super.c
|
||||
@@ -1097,7 +1097,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
|
||||
}
|
||||
if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
|
||||
seq_printf(seq, ",max_batch_time=%u",
|
||||
- (unsigned) sbi->s_min_batch_time);
|
||||
+ (unsigned) sbi->s_max_batch_time);
|
||||
}
|
||||
|
||||
/*
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+464
@@ -0,0 +1,464 @@
|
||||
From 5babdc7487f6c78c06d8e085efe841d91a77ff48 Mon Sep 17 00:00:00 2001
|
||||
From: David Gibson <david@gibson.dropbear.id.au>
|
||||
Date: Wed, 21 Mar 2012 16:34:12 -0700
|
||||
Subject: [PATCH 013/109] hugepages: fix use after free bug in "quota"
|
||||
handling
|
||||
|
||||
commit 90481622d75715bfcb68501280a917dbfe516029 upstream.
|
||||
|
||||
hugetlbfs_{get,put}_quota() are badly named. They don't interact with the
|
||||
general quota handling code, and they don't much resemble its behaviour.
|
||||
Rather than being about maintaining limits on on-disk block usage by
|
||||
particular users, they are instead about maintaining limits on in-memory
|
||||
page usage (including anonymous MAP_PRIVATE copied-on-write pages)
|
||||
associated with a particular hugetlbfs filesystem instance.
|
||||
|
||||
Worse, they work by having callbacks to the hugetlbfs filesystem code from
|
||||
the low-level page handling code, in particular from free_huge_page().
|
||||
This is a layering violation of itself, but more importantly, if the
|
||||
kernel does a get_user_pages() on hugepages (which can happen from KVM
|
||||
amongst others), then the free_huge_page() can be delayed until after the
|
||||
associated inode has already been freed. If an unmount occurs at the
|
||||
wrong time, even the hugetlbfs superblock where the "quota" limits are
|
||||
stored may have been freed.
|
||||
|
||||
Andrew Barry proposed a patch to fix this by having hugepages, instead of
|
||||
storing a pointer to their address_space and reaching the superblock from
|
||||
there, had the hugepages store pointers directly to the superblock,
|
||||
bumping the reference count as appropriate to avoid it being freed.
|
||||
Andrew Morton rejected that version, however, on the grounds that it made
|
||||
the existing layering violation worse.
|
||||
|
||||
This is a reworked version of Andrew's patch, which removes the extra, and
|
||||
some of the existing, layering violation. It works by introducing the
|
||||
concept of a hugepage "subpool" at the lower hugepage mm layer - that is a
|
||||
finite logical pool of hugepages to allocate from. hugetlbfs now creates
|
||||
a subpool for each filesystem instance with a page limit set, and a
|
||||
pointer to the subpool gets added to each allocated hugepage, instead of
|
||||
the address_space pointer used now. The subpool has its own lifetime and
|
||||
is only freed once all pages in it _and_ all other references to it (i.e.
|
||||
superblocks) are gone.
|
||||
|
||||
subpools are optional - a NULL subpool pointer is taken by the code to
|
||||
mean that no subpool limits are in effect.
|
||||
|
||||
Previous discussion of this bug found in: "Fix refcounting in hugetlbfs
|
||||
quota handling.". See: https://lkml.org/lkml/2011/8/11/28 or
|
||||
http://marc.info/?l=linux-mm&m=126928970510627&w=1
|
||||
|
||||
v2: Fixed a bug spotted by Hillf Danton, and removed the extra parameter to
|
||||
alloc_huge_page() - since it already takes the vma, it is not necessary.
|
||||
|
||||
Signed-off-by: Andrew Barry <abarry@cray.com>
|
||||
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
|
||||
Cc: Hugh Dickins <hughd@google.com>
|
||||
Cc: Mel Gorman <mgorman@suse.de>
|
||||
Cc: Minchan Kim <minchan.kim@gmail.com>
|
||||
Cc: Hillf Danton <dhillf@gmail.com>
|
||||
Cc: Paul Mackerras <paulus@samba.org>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
[bwh: Backported to 3.2: adjust context to apply after commit
|
||||
c50ac050811d6485616a193eb0f37bfbd191cc89 'hugetlb: fix resv_map leak in
|
||||
error path', backported in 3.2.20]
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
fs/hugetlbfs/inode.c | 54 +++++++-----------
|
||||
include/linux/hugetlb.h | 14 ++++--
|
||||
mm/hugetlb.c | 135 +++++++++++++++++++++++++++++++++++++---------
|
||||
3 files changed, 139 insertions(+), 64 deletions(-)
|
||||
|
||||
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
|
||||
index 2d0ca24..ebc2f4d 100644
|
||||
--- a/fs/hugetlbfs/inode.c
|
||||
+++ b/fs/hugetlbfs/inode.c
|
||||
@@ -592,9 +592,15 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||
spin_lock(&sbinfo->stat_lock);
|
||||
/* If no limits set, just report 0 for max/free/used
|
||||
* blocks, like simple_statfs() */
|
||||
- if (sbinfo->max_blocks >= 0) {
|
||||
- buf->f_blocks = sbinfo->max_blocks;
|
||||
- buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
|
||||
+ if (sbinfo->spool) {
|
||||
+ long free_pages;
|
||||
+
|
||||
+ spin_lock(&sbinfo->spool->lock);
|
||||
+ buf->f_blocks = sbinfo->spool->max_hpages;
|
||||
+ free_pages = sbinfo->spool->max_hpages
|
||||
+ - sbinfo->spool->used_hpages;
|
||||
+ buf->f_bavail = buf->f_bfree = free_pages;
|
||||
+ spin_unlock(&sbinfo->spool->lock);
|
||||
buf->f_files = sbinfo->max_inodes;
|
||||
buf->f_ffree = sbinfo->free_inodes;
|
||||
}
|
||||
@@ -610,6 +616,10 @@ static void hugetlbfs_put_super(struct super_block *sb)
|
||||
|
||||
if (sbi) {
|
||||
sb->s_fs_info = NULL;
|
||||
+
|
||||
+ if (sbi->spool)
|
||||
+ hugepage_put_subpool(sbi->spool);
|
||||
+
|
||||
kfree(sbi);
|
||||
}
|
||||
}
|
||||
@@ -841,10 +851,14 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
|
||||
sb->s_fs_info = sbinfo;
|
||||
sbinfo->hstate = config.hstate;
|
||||
spin_lock_init(&sbinfo->stat_lock);
|
||||
- sbinfo->max_blocks = config.nr_blocks;
|
||||
- sbinfo->free_blocks = config.nr_blocks;
|
||||
sbinfo->max_inodes = config.nr_inodes;
|
||||
sbinfo->free_inodes = config.nr_inodes;
|
||||
+ sbinfo->spool = NULL;
|
||||
+ if (config.nr_blocks != -1) {
|
||||
+ sbinfo->spool = hugepage_new_subpool(config.nr_blocks);
|
||||
+ if (!sbinfo->spool)
|
||||
+ goto out_free;
|
||||
+ }
|
||||
sb->s_maxbytes = MAX_LFS_FILESIZE;
|
||||
sb->s_blocksize = huge_page_size(config.hstate);
|
||||
sb->s_blocksize_bits = huge_page_shift(config.hstate);
|
||||
@@ -864,38 +878,12 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
|
||||
sb->s_root = root;
|
||||
return 0;
|
||||
out_free:
|
||||
+ if (sbinfo->spool)
|
||||
+ kfree(sbinfo->spool);
|
||||
kfree(sbinfo);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
-int hugetlb_get_quota(struct address_space *mapping, long delta)
|
||||
-{
|
||||
- int ret = 0;
|
||||
- struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
|
||||
-
|
||||
- if (sbinfo->free_blocks > -1) {
|
||||
- spin_lock(&sbinfo->stat_lock);
|
||||
- if (sbinfo->free_blocks - delta >= 0)
|
||||
- sbinfo->free_blocks -= delta;
|
||||
- else
|
||||
- ret = -ENOMEM;
|
||||
- spin_unlock(&sbinfo->stat_lock);
|
||||
- }
|
||||
-
|
||||
- return ret;
|
||||
-}
|
||||
-
|
||||
-void hugetlb_put_quota(struct address_space *mapping, long delta)
|
||||
-{
|
||||
- struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
|
||||
-
|
||||
- if (sbinfo->free_blocks > -1) {
|
||||
- spin_lock(&sbinfo->stat_lock);
|
||||
- sbinfo->free_blocks += delta;
|
||||
- spin_unlock(&sbinfo->stat_lock);
|
||||
- }
|
||||
-}
|
||||
-
|
||||
static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
|
||||
int flags, const char *dev_name, void *data)
|
||||
{
|
||||
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
|
||||
index d9d6c86..c5ed2f1 100644
|
||||
--- a/include/linux/hugetlb.h
|
||||
+++ b/include/linux/hugetlb.h
|
||||
@@ -14,6 +14,15 @@ struct user_struct;
|
||||
#include <linux/shm.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
+struct hugepage_subpool {
|
||||
+ spinlock_t lock;
|
||||
+ long count;
|
||||
+ long max_hpages, used_hpages;
|
||||
+};
|
||||
+
|
||||
+struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
|
||||
+void hugepage_put_subpool(struct hugepage_subpool *spool);
|
||||
+
|
||||
int PageHuge(struct page *page);
|
||||
|
||||
void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
|
||||
@@ -138,12 +147,11 @@ struct hugetlbfs_config {
|
||||
};
|
||||
|
||||
struct hugetlbfs_sb_info {
|
||||
- long max_blocks; /* blocks allowed */
|
||||
- long free_blocks; /* blocks free */
|
||||
long max_inodes; /* inodes allowed */
|
||||
long free_inodes; /* inodes free */
|
||||
spinlock_t stat_lock;
|
||||
struct hstate *hstate;
|
||||
+ struct hugepage_subpool *spool;
|
||||
};
|
||||
|
||||
|
||||
@@ -166,8 +174,6 @@ extern const struct file_operations hugetlbfs_file_operations;
|
||||
extern const struct vm_operations_struct hugetlb_vm_ops;
|
||||
struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
|
||||
struct user_struct **user, int creat_flags);
|
||||
-int hugetlb_get_quota(struct address_space *mapping, long delta);
|
||||
-void hugetlb_put_quota(struct address_space *mapping, long delta);
|
||||
|
||||
static inline int is_file_hugepages(struct file *file)
|
||||
{
|
||||
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
|
||||
index 5f5c545..7c535b0 100644
|
||||
--- a/mm/hugetlb.c
|
||||
+++ b/mm/hugetlb.c
|
||||
@@ -53,6 +53,84 @@ static unsigned long __initdata default_hstate_size;
|
||||
*/
|
||||
static DEFINE_SPINLOCK(hugetlb_lock);
|
||||
|
||||
+static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
|
||||
+{
|
||||
+ bool free = (spool->count == 0) && (spool->used_hpages == 0);
|
||||
+
|
||||
+ spin_unlock(&spool->lock);
|
||||
+
|
||||
+ /* If no pages are used, and no other handles to the subpool
|
||||
+ * remain, free the subpool the subpool remain */
|
||||
+ if (free)
|
||||
+ kfree(spool);
|
||||
+}
|
||||
+
|
||||
+struct hugepage_subpool *hugepage_new_subpool(long nr_blocks)
|
||||
+{
|
||||
+ struct hugepage_subpool *spool;
|
||||
+
|
||||
+ spool = kmalloc(sizeof(*spool), GFP_KERNEL);
|
||||
+ if (!spool)
|
||||
+ return NULL;
|
||||
+
|
||||
+ spin_lock_init(&spool->lock);
|
||||
+ spool->count = 1;
|
||||
+ spool->max_hpages = nr_blocks;
|
||||
+ spool->used_hpages = 0;
|
||||
+
|
||||
+ return spool;
|
||||
+}
|
||||
+
|
||||
+void hugepage_put_subpool(struct hugepage_subpool *spool)
|
||||
+{
|
||||
+ spin_lock(&spool->lock);
|
||||
+ BUG_ON(!spool->count);
|
||||
+ spool->count--;
|
||||
+ unlock_or_release_subpool(spool);
|
||||
+}
|
||||
+
|
||||
+static int hugepage_subpool_get_pages(struct hugepage_subpool *spool,
|
||||
+ long delta)
|
||||
+{
|
||||
+ int ret = 0;
|
||||
+
|
||||
+ if (!spool)
|
||||
+ return 0;
|
||||
+
|
||||
+ spin_lock(&spool->lock);
|
||||
+ if ((spool->used_hpages + delta) <= spool->max_hpages) {
|
||||
+ spool->used_hpages += delta;
|
||||
+ } else {
|
||||
+ ret = -ENOMEM;
|
||||
+ }
|
||||
+ spin_unlock(&spool->lock);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static void hugepage_subpool_put_pages(struct hugepage_subpool *spool,
|
||||
+ long delta)
|
||||
+{
|
||||
+ if (!spool)
|
||||
+ return;
|
||||
+
|
||||
+ spin_lock(&spool->lock);
|
||||
+ spool->used_hpages -= delta;
|
||||
+ /* If hugetlbfs_put_super couldn't free spool due to
|
||||
+ * an outstanding quota reference, free it now. */
|
||||
+ unlock_or_release_subpool(spool);
|
||||
+}
|
||||
+
|
||||
+static inline struct hugepage_subpool *subpool_inode(struct inode *inode)
|
||||
+{
|
||||
+ return HUGETLBFS_SB(inode->i_sb)->spool;
|
||||
+}
|
||||
+
|
||||
+static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
|
||||
+{
|
||||
+ return subpool_inode(vma->vm_file->f_dentry->d_inode);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Region tracking -- allows tracking of reservations and instantiated pages
|
||||
* across the pages in a mapping.
|
||||
@@ -533,9 +611,9 @@ static void free_huge_page(struct page *page)
|
||||
*/
|
||||
struct hstate *h = page_hstate(page);
|
||||
int nid = page_to_nid(page);
|
||||
- struct address_space *mapping;
|
||||
+ struct hugepage_subpool *spool =
|
||||
+ (struct hugepage_subpool *)page_private(page);
|
||||
|
||||
- mapping = (struct address_space *) page_private(page);
|
||||
set_page_private(page, 0);
|
||||
page->mapping = NULL;
|
||||
BUG_ON(page_count(page));
|
||||
@@ -551,8 +629,7 @@ static void free_huge_page(struct page *page)
|
||||
enqueue_huge_page(h, page);
|
||||
}
|
||||
spin_unlock(&hugetlb_lock);
|
||||
- if (mapping)
|
||||
- hugetlb_put_quota(mapping, 1);
|
||||
+ hugepage_subpool_put_pages(spool, 1);
|
||||
}
|
||||
|
||||
static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
|
||||
@@ -966,11 +1043,12 @@ static void return_unused_surplus_pages(struct hstate *h,
|
||||
/*
|
||||
* Determine if the huge page at addr within the vma has an associated
|
||||
* reservation. Where it does not we will need to logically increase
|
||||
- * reservation and actually increase quota before an allocation can occur.
|
||||
- * Where any new reservation would be required the reservation change is
|
||||
- * prepared, but not committed. Once the page has been quota'd allocated
|
||||
- * an instantiated the change should be committed via vma_commit_reservation.
|
||||
- * No action is required on failure.
|
||||
+ * reservation and actually increase subpool usage before an allocation
|
||||
+ * can occur. Where any new reservation would be required the
|
||||
+ * reservation change is prepared, but not committed. Once the page
|
||||
+ * has been allocated from the subpool and instantiated the change should
|
||||
+ * be committed via vma_commit_reservation. No action is required on
|
||||
+ * failure.
|
||||
*/
|
||||
static long vma_needs_reservation(struct hstate *h,
|
||||
struct vm_area_struct *vma, unsigned long addr)
|
||||
@@ -1019,24 +1097,24 @@ static void vma_commit_reservation(struct hstate *h,
|
||||
static struct page *alloc_huge_page(struct vm_area_struct *vma,
|
||||
unsigned long addr, int avoid_reserve)
|
||||
{
|
||||
+ struct hugepage_subpool *spool = subpool_vma(vma);
|
||||
struct hstate *h = hstate_vma(vma);
|
||||
struct page *page;
|
||||
- struct address_space *mapping = vma->vm_file->f_mapping;
|
||||
- struct inode *inode = mapping->host;
|
||||
long chg;
|
||||
|
||||
/*
|
||||
- * Processes that did not create the mapping will have no reserves and
|
||||
- * will not have accounted against quota. Check that the quota can be
|
||||
- * made before satisfying the allocation
|
||||
- * MAP_NORESERVE mappings may also need pages and quota allocated
|
||||
- * if no reserve mapping overlaps.
|
||||
+ * Processes that did not create the mapping will have no
|
||||
+ * reserves and will not have accounted against subpool
|
||||
+ * limit. Check that the subpool limit can be made before
|
||||
+ * satisfying the allocation MAP_NORESERVE mappings may also
|
||||
+ * need pages and subpool limit allocated allocated if no reserve
|
||||
+ * mapping overlaps.
|
||||
*/
|
||||
chg = vma_needs_reservation(h, vma, addr);
|
||||
if (chg < 0)
|
||||
return ERR_PTR(-VM_FAULT_OOM);
|
||||
if (chg)
|
||||
- if (hugetlb_get_quota(inode->i_mapping, chg))
|
||||
+ if (hugepage_subpool_get_pages(spool, chg))
|
||||
return ERR_PTR(-VM_FAULT_SIGBUS);
|
||||
|
||||
spin_lock(&hugetlb_lock);
|
||||
@@ -1046,12 +1124,12 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
|
||||
if (!page) {
|
||||
page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
|
||||
if (!page) {
|
||||
- hugetlb_put_quota(inode->i_mapping, chg);
|
||||
+ hugepage_subpool_put_pages(spool, chg);
|
||||
return ERR_PTR(-VM_FAULT_SIGBUS);
|
||||
}
|
||||
}
|
||||
|
||||
- set_page_private(page, (unsigned long) mapping);
|
||||
+ set_page_private(page, (unsigned long)spool);
|
||||
|
||||
vma_commit_reservation(h, vma, addr);
|
||||
|
||||
@@ -2081,6 +2159,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
|
||||
{
|
||||
struct hstate *h = hstate_vma(vma);
|
||||
struct resv_map *reservations = vma_resv_map(vma);
|
||||
+ struct hugepage_subpool *spool = subpool_vma(vma);
|
||||
unsigned long reserve;
|
||||
unsigned long start;
|
||||
unsigned long end;
|
||||
@@ -2096,7 +2175,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
|
||||
|
||||
if (reserve) {
|
||||
hugetlb_acct_memory(h, -reserve);
|
||||
- hugetlb_put_quota(vma->vm_file->f_mapping, reserve);
|
||||
+ hugepage_subpool_put_pages(spool, reserve);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2326,7 +2405,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
address = address & huge_page_mask(h);
|
||||
pgoff = ((address - vma->vm_start) >> PAGE_SHIFT)
|
||||
+ (vma->vm_pgoff >> PAGE_SHIFT);
|
||||
- mapping = (struct address_space *)page_private(page);
|
||||
+ mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
|
||||
|
||||
/*
|
||||
* Take the mapping lock for the duration of the table walk. As
|
||||
@@ -2865,11 +2944,12 @@ int hugetlb_reserve_pages(struct inode *inode,
|
||||
{
|
||||
long ret, chg;
|
||||
struct hstate *h = hstate_inode(inode);
|
||||
+ struct hugepage_subpool *spool = subpool_inode(inode);
|
||||
|
||||
/*
|
||||
* Only apply hugepage reservation if asked. At fault time, an
|
||||
* attempt will be made for VM_NORESERVE to allocate a page
|
||||
- * and filesystem quota without using reserves
|
||||
+ * without using reserves
|
||||
*/
|
||||
if (vm_flags & VM_NORESERVE)
|
||||
return 0;
|
||||
@@ -2898,19 +2978,19 @@ int hugetlb_reserve_pages(struct inode *inode,
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
- /* There must be enough filesystem quota for the mapping */
|
||||
- if (hugetlb_get_quota(inode->i_mapping, chg)) {
|
||||
+ /* There must be enough pages in the subpool for the mapping */
|
||||
+ if (hugepage_subpool_get_pages(spool, chg)) {
|
||||
ret = -ENOSPC;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check enough hugepages are available for the reservation.
|
||||
- * Hand back the quota if there are not
|
||||
+ * Hand the pages back to the subpool if there are not
|
||||
*/
|
||||
ret = hugetlb_acct_memory(h, chg);
|
||||
if (ret < 0) {
|
||||
- hugetlb_put_quota(inode->i_mapping, chg);
|
||||
+ hugepage_subpool_put_pages(spool, chg);
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
@@ -2938,12 +3018,13 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
|
||||
{
|
||||
struct hstate *h = hstate_inode(inode);
|
||||
long chg = region_truncate(&inode->i_mapping->private_list, offset);
|
||||
+ struct hugepage_subpool *spool = subpool_inode(inode);
|
||||
|
||||
spin_lock(&inode->i_lock);
|
||||
inode->i_blocks -= (blocks_per_huge_page(h) * freed);
|
||||
spin_unlock(&inode->i_lock);
|
||||
|
||||
- hugetlb_put_quota(inode->i_mapping, (chg - freed));
|
||||
+ hugepage_subpool_put_pages(spool, (chg - freed));
|
||||
hugetlb_acct_memory(h, -(chg - freed));
|
||||
}
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+69
@@ -0,0 +1,69 @@
|
||||
From e45792228b6a4487d859334c757322554c960397 Mon Sep 17 00:00:00 2001
|
||||
From: Trond Myklebust <Trond.Myklebust@netapp.com>
|
||||
Date: Tue, 7 Feb 2012 14:59:05 -0500
|
||||
Subject: [PATCH 014/109] NFSv4: Reduce the footprint of the idmapper
|
||||
|
||||
commit d073e9b541e1ac3f52d72c3a153855d9a9ee3278 upstream.
|
||||
|
||||
Instead of pre-allocating the storage for all the strings, we can
|
||||
significantly reduce the size of that table by doing the allocation
|
||||
when we do the downcall.
|
||||
|
||||
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
|
||||
Reviewed-by: Jeff Layton <jlayton@redhat.com>
|
||||
[bwh: Backported to 3.2: adjust context in nfs_idmap_delete()]
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
fs/nfs/idmap.c | 16 +++++++++++++---
|
||||
1 files changed, 13 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
|
||||
index 47d1c6f..b8c41c3 100644
|
||||
--- a/fs/nfs/idmap.c
|
||||
+++ b/fs/nfs/idmap.c
|
||||
@@ -318,7 +318,7 @@ struct idmap_hashent {
|
||||
unsigned long ih_expires;
|
||||
__u32 ih_id;
|
||||
size_t ih_namelen;
|
||||
- char ih_name[IDMAP_NAMESZ];
|
||||
+ const char *ih_name;
|
||||
};
|
||||
|
||||
struct idmap_hashtable {
|
||||
@@ -382,11 +382,16 @@ void
|
||||
nfs_idmap_delete(struct nfs_client *clp)
|
||||
{
|
||||
struct idmap *idmap = clp->cl_idmap;
|
||||
+ int i;
|
||||
|
||||
if (!idmap)
|
||||
return;
|
||||
rpc_unlink(idmap->idmap_dentry);
|
||||
clp->cl_idmap = NULL;
|
||||
+ for (i = 0; i < ARRAY_SIZE(idmap->idmap_user_hash.h_entries); i++)
|
||||
+ kfree(idmap->idmap_user_hash.h_entries[i].ih_name);
|
||||
+ for (i = 0; i < ARRAY_SIZE(idmap->idmap_group_hash.h_entries); i++)
|
||||
+ kfree(idmap->idmap_group_hash.h_entries[i].ih_name);
|
||||
kfree(idmap);
|
||||
}
|
||||
|
||||
@@ -449,9 +454,14 @@ static void
|
||||
idmap_update_entry(struct idmap_hashent *he, const char *name,
|
||||
size_t namelen, __u32 id)
|
||||
{
|
||||
+ char *str = kmalloc(namelen + 1, GFP_KERNEL);
|
||||
+ if (str == NULL)
|
||||
+ return;
|
||||
+ kfree(he->ih_name);
|
||||
he->ih_id = id;
|
||||
- memcpy(he->ih_name, name, namelen);
|
||||
- he->ih_name[namelen] = '\0';
|
||||
+ memcpy(str, name, namelen);
|
||||
+ str[namelen] = '\0';
|
||||
+ he->ih_name = str;
|
||||
he->ih_namelen = namelen;
|
||||
he->ih_expires = jiffies + nfs_idmap_cache_timeout;
|
||||
}
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+131
@@ -0,0 +1,131 @@
|
||||
From c297b1ec9340ec265bceeb8c1b8198ee476f0573 Mon Sep 17 00:00:00 2001
|
||||
From: Trond Myklebust <Trond.Myklebust@netapp.com>
|
||||
Date: Wed, 8 Feb 2012 13:39:15 -0500
|
||||
Subject: [PATCH 015/109] NFSv4: Further reduce the footprint of the idmapper
|
||||
|
||||
commit 685f50f9188ac1e8244d0340a9d6ea36b6136cec upstream.
|
||||
|
||||
Don't allocate the legacy idmapper tables until we actually need
|
||||
them.
|
||||
|
||||
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
|
||||
Reviewed-by: Jeff Layton <jlayton@redhat.com>
|
||||
[bwh: Backported to 3.2: adjust context in nfs_idmap_delete()]
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
fs/nfs/idmap.c | 42 ++++++++++++++++++++++++++++++++++++------
|
||||
1 files changed, 36 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
|
||||
index b8c41c3..b122af8 100644
|
||||
--- a/fs/nfs/idmap.c
|
||||
+++ b/fs/nfs/idmap.c
|
||||
@@ -323,7 +323,7 @@ struct idmap_hashent {
|
||||
|
||||
struct idmap_hashtable {
|
||||
__u8 h_type;
|
||||
- struct idmap_hashent h_entries[IDMAP_HASH_SZ];
|
||||
+ struct idmap_hashent *h_entries;
|
||||
};
|
||||
|
||||
struct idmap {
|
||||
@@ -378,20 +378,39 @@ nfs_idmap_new(struct nfs_client *clp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static void
|
||||
+idmap_alloc_hashtable(struct idmap_hashtable *h)
|
||||
+{
|
||||
+ if (h->h_entries != NULL)
|
||||
+ return;
|
||||
+ h->h_entries = kcalloc(IDMAP_HASH_SZ,
|
||||
+ sizeof(*h->h_entries),
|
||||
+ GFP_KERNEL);
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+idmap_free_hashtable(struct idmap_hashtable *h)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ if (h->h_entries == NULL)
|
||||
+ return;
|
||||
+ for (i = 0; i < IDMAP_HASH_SZ; i++)
|
||||
+ kfree(h->h_entries[i].ih_name);
|
||||
+ kfree(h->h_entries);
|
||||
+}
|
||||
+
|
||||
void
|
||||
nfs_idmap_delete(struct nfs_client *clp)
|
||||
{
|
||||
struct idmap *idmap = clp->cl_idmap;
|
||||
- int i;
|
||||
|
||||
if (!idmap)
|
||||
return;
|
||||
rpc_unlink(idmap->idmap_dentry);
|
||||
clp->cl_idmap = NULL;
|
||||
- for (i = 0; i < ARRAY_SIZE(idmap->idmap_user_hash.h_entries); i++)
|
||||
- kfree(idmap->idmap_user_hash.h_entries[i].ih_name);
|
||||
- for (i = 0; i < ARRAY_SIZE(idmap->idmap_group_hash.h_entries); i++)
|
||||
- kfree(idmap->idmap_group_hash.h_entries[i].ih_name);
|
||||
+ idmap_free_hashtable(&idmap->idmap_user_hash);
|
||||
+ idmap_free_hashtable(&idmap->idmap_group_hash);
|
||||
kfree(idmap);
|
||||
}
|
||||
|
||||
@@ -401,6 +420,8 @@ nfs_idmap_delete(struct nfs_client *clp)
|
||||
static inline struct idmap_hashent *
|
||||
idmap_name_hash(struct idmap_hashtable* h, const char *name, size_t len)
|
||||
{
|
||||
+ if (h->h_entries == NULL)
|
||||
+ return NULL;
|
||||
return &h->h_entries[fnvhash32(name, len) % IDMAP_HASH_SZ];
|
||||
}
|
||||
|
||||
@@ -409,6 +430,8 @@ idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len)
|
||||
{
|
||||
struct idmap_hashent *he = idmap_name_hash(h, name, len);
|
||||
|
||||
+ if (he == NULL)
|
||||
+ return NULL;
|
||||
if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0)
|
||||
return NULL;
|
||||
if (time_after(jiffies, he->ih_expires))
|
||||
@@ -419,6 +442,8 @@ idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len)
|
||||
static inline struct idmap_hashent *
|
||||
idmap_id_hash(struct idmap_hashtable* h, __u32 id)
|
||||
{
|
||||
+ if (h->h_entries == NULL)
|
||||
+ return NULL;
|
||||
return &h->h_entries[fnvhash32(&id, sizeof(id)) % IDMAP_HASH_SZ];
|
||||
}
|
||||
|
||||
@@ -426,6 +451,9 @@ static struct idmap_hashent *
|
||||
idmap_lookup_id(struct idmap_hashtable *h, __u32 id)
|
||||
{
|
||||
struct idmap_hashent *he = idmap_id_hash(h, id);
|
||||
+
|
||||
+ if (he == NULL)
|
||||
+ return NULL;
|
||||
if (he->ih_id != id || he->ih_namelen == 0)
|
||||
return NULL;
|
||||
if (time_after(jiffies, he->ih_expires))
|
||||
@@ -441,12 +469,14 @@ idmap_lookup_id(struct idmap_hashtable *h, __u32 id)
|
||||
static inline struct idmap_hashent *
|
||||
idmap_alloc_name(struct idmap_hashtable *h, char *name, size_t len)
|
||||
{
|
||||
+ idmap_alloc_hashtable(h);
|
||||
return idmap_name_hash(h, name, len);
|
||||
}
|
||||
|
||||
static inline struct idmap_hashent *
|
||||
idmap_alloc_id(struct idmap_hashtable *h, __u32 id)
|
||||
{
|
||||
+ idmap_alloc_hashtable(h);
|
||||
return idmap_id_hash(h, id);
|
||||
}
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+67
@@ -0,0 +1,67 @@
|
||||
From 6c4e2ff1b19d58c2a2f016d25b96eee0f733d7aa Mon Sep 17 00:00:00 2001
|
||||
From: Jason Wang <jasowang@redhat.com>
|
||||
Date: Wed, 2 May 2012 11:41:30 +0800
|
||||
Subject: [PATCH 016/109] macvtap: zerocopy: fix offset calculation when
|
||||
building skb
|
||||
|
||||
commit 3afc9621f15701c557e60f61eba9242bac2771dd upstream.
|
||||
|
||||
This patch fixes the offset calculation when building skb:
|
||||
|
||||
- offset1 were used as skb data offset not vector offset
|
||||
- reset offset to zero only when we advance to next vector
|
||||
|
||||
Signed-off-by: Jason Wang <jasowang@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/net/macvtap.c | 13 +++++++------
|
||||
1 files changed, 7 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
|
||||
index 1b7082d..4505008 100644
|
||||
--- a/drivers/net/macvtap.c
|
||||
+++ b/drivers/net/macvtap.c
|
||||
@@ -504,10 +504,11 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
|
||||
if (copy > size) {
|
||||
++from;
|
||||
--count;
|
||||
- }
|
||||
+ offset = 0;
|
||||
+ } else
|
||||
+ offset += size;
|
||||
copy -= size;
|
||||
offset1 += size;
|
||||
- offset = 0;
|
||||
}
|
||||
|
||||
if (len == offset1)
|
||||
@@ -518,13 +519,13 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
|
||||
int num_pages;
|
||||
unsigned long base;
|
||||
|
||||
- len = from->iov_len - offset1;
|
||||
+ len = from->iov_len - offset;
|
||||
if (!len) {
|
||||
- offset1 = 0;
|
||||
+ offset = 0;
|
||||
++from;
|
||||
continue;
|
||||
}
|
||||
- base = (unsigned long)from->iov_base + offset1;
|
||||
+ base = (unsigned long)from->iov_base + offset;
|
||||
size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
|
||||
num_pages = get_user_pages_fast(base, size, 0, &page[i]);
|
||||
if ((num_pages != size) ||
|
||||
@@ -545,7 +546,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
|
||||
len -= size;
|
||||
i++;
|
||||
}
|
||||
- offset1 = 0;
|
||||
+ offset = 0;
|
||||
++from;
|
||||
}
|
||||
return 0;
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+46
@@ -0,0 +1,46 @@
|
||||
From e2261c8945dd5af5a0627ac72f7a39f676f3b657 Mon Sep 17 00:00:00 2001
|
||||
From: Jason Wang <jasowang@redhat.com>
|
||||
Date: Wed, 2 May 2012 11:41:44 +0800
|
||||
Subject: [PATCH 017/109] macvtap: zerocopy: fix truesize underestimation
|
||||
|
||||
commit 4ef67ebedffa44ed9939b34708ac2fee06d2f65f upstream.
|
||||
|
||||
As the skb fragment were pinned/built from user pages, we should
|
||||
account the page instead of length for truesize.
|
||||
|
||||
Signed-off-by: Jason Wang <jasowang@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/net/macvtap.c | 6 ++++--
|
||||
1 files changed, 4 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
|
||||
index 4505008..c7a84eb 100644
|
||||
--- a/drivers/net/macvtap.c
|
||||
+++ b/drivers/net/macvtap.c
|
||||
@@ -518,6 +518,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
|
||||
struct page *page[MAX_SKB_FRAGS];
|
||||
int num_pages;
|
||||
unsigned long base;
|
||||
+ unsigned long truesize;
|
||||
|
||||
len = from->iov_len - offset;
|
||||
if (!len) {
|
||||
@@ -532,10 +533,11 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
|
||||
(num_pages > MAX_SKB_FRAGS - skb_shinfo(skb)->nr_frags))
|
||||
/* put_page is in skb free */
|
||||
return -EFAULT;
|
||||
+ truesize = size * PAGE_SIZE;
|
||||
skb->data_len += len;
|
||||
skb->len += len;
|
||||
- skb->truesize += len;
|
||||
- atomic_add(len, &skb->sk->sk_wmem_alloc);
|
||||
+ skb->truesize += truesize;
|
||||
+ atomic_add(truesize, &skb->sk->sk_wmem_alloc);
|
||||
while (len) {
|
||||
int off = base & ~PAGE_MASK;
|
||||
int size = min_t(int, len, PAGE_SIZE - off);
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+40
@@ -0,0 +1,40 @@
|
||||
From 13d71d7ee644607d525480330c8b6a4268c18c0e Mon Sep 17 00:00:00 2001
|
||||
From: Jason Wang <jasowang@redhat.com>
|
||||
Date: Wed, 2 May 2012 11:41:58 +0800
|
||||
Subject: [PATCH 018/109] macvtap: zerocopy: put page when fail to get all
|
||||
requested user pages
|
||||
|
||||
commit 02ce04bb3d28c3333231f43bca677228dbc686fe upstream.
|
||||
|
||||
When get_user_pages_fast() fails to get all requested pages, we could not use
|
||||
kfree_skb() to free it as it has not been put in the skb fragments. So we need
|
||||
to call put_page() instead.
|
||||
|
||||
Signed-off-by: Jason Wang <jasowang@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/net/macvtap.c | 6 ++++--
|
||||
1 files changed, 4 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
|
||||
index c7a84eb..f217247 100644
|
||||
--- a/drivers/net/macvtap.c
|
||||
+++ b/drivers/net/macvtap.c
|
||||
@@ -530,9 +530,11 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
|
||||
size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
|
||||
num_pages = get_user_pages_fast(base, size, 0, &page[i]);
|
||||
if ((num_pages != size) ||
|
||||
- (num_pages > MAX_SKB_FRAGS - skb_shinfo(skb)->nr_frags))
|
||||
- /* put_page is in skb free */
|
||||
+ (num_pages > MAX_SKB_FRAGS - skb_shinfo(skb)->nr_frags)) {
|
||||
+ for (i = 0; i < num_pages; i++)
|
||||
+ put_page(page[i]);
|
||||
return -EFAULT;
|
||||
+ }
|
||||
truesize = size * PAGE_SIZE;
|
||||
skb->data_len += len;
|
||||
skb->len += len;
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+53
@@ -0,0 +1,53 @@
|
||||
From c57df8c118c4c11ef6023034aa454636dd8780db Mon Sep 17 00:00:00 2001
|
||||
From: Jason Wang <jasowang@redhat.com>
|
||||
Date: Wed, 2 May 2012 11:42:06 +0800
|
||||
Subject: [PATCH 019/109] macvtap: zerocopy: set SKBTX_DEV_ZEROCOPY only when
|
||||
skb is built successfully
|
||||
|
||||
commit 01d6657b388438def19c8baaea28e742b6ed32ec upstream.
|
||||
|
||||
Current the SKBTX_DEV_ZEROCOPY is set unconditionally after
|
||||
zerocopy_sg_from_iovec(), this would lead NULL pointer when macvtap
|
||||
fails to build zerocopy skb because destructor_arg was not
|
||||
initialized. Solve this by set this flag after the skb were built
|
||||
successfully.
|
||||
|
||||
Signed-off-by: Jason Wang <jasowang@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/net/macvtap.c | 9 +++++----
|
||||
1 files changed, 5 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
|
||||
index f217247..7fecd66 100644
|
||||
--- a/drivers/net/macvtap.c
|
||||
+++ b/drivers/net/macvtap.c
|
||||
@@ -698,10 +698,9 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
|
||||
if (!skb)
|
||||
goto err;
|
||||
|
||||
- if (zerocopy) {
|
||||
+ if (zerocopy)
|
||||
err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count);
|
||||
- skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
|
||||
- } else
|
||||
+ else
|
||||
err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len,
|
||||
len);
|
||||
if (err)
|
||||
@@ -720,8 +719,10 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
|
||||
rcu_read_lock_bh();
|
||||
vlan = rcu_dereference_bh(q->vlan);
|
||||
/* copy skb_ubuf_info for callback when skb has no error */
|
||||
- if (zerocopy)
|
||||
+ if (zerocopy) {
|
||||
skb_shinfo(skb)->destructor_arg = m->msg_control;
|
||||
+ skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
|
||||
+ }
|
||||
if (vlan)
|
||||
macvlan_start_xmit(skb, vlan->dev);
|
||||
else
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+84
@@ -0,0 +1,84 @@
|
||||
From c93ad33631e3efbb6f02f24c6b6817227b2c9252 Mon Sep 17 00:00:00 2001
|
||||
From: Jason Wang <jasowang@redhat.com>
|
||||
Date: Wed, 2 May 2012 11:42:15 +0800
|
||||
Subject: [PATCH 020/109] macvtap: zerocopy: validate vectors before building
|
||||
skb
|
||||
|
||||
commit b92946e2919134ebe2a4083e4302236295ea2a73 upstream.
|
||||
|
||||
There're several reasons that the vectors need to be validated:
|
||||
|
||||
- Return error when caller provides vectors whose num is greater than UIO_MAXIOV.
|
||||
- Linearize part of skb when userspace provides vectors grater than MAX_SKB_FRAGS.
|
||||
- Return error when userspace provides vectors whose total length may exceed
|
||||
- MAX_SKB_FRAGS * PAGE_SIZE.
|
||||
|
||||
Signed-off-by: Jason Wang <jasowang@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/net/macvtap.c | 25 +++++++++++++++++++++----
|
||||
1 files changed, 21 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
|
||||
index 7fecd66..26106c0 100644
|
||||
--- a/drivers/net/macvtap.c
|
||||
+++ b/drivers/net/macvtap.c
|
||||
@@ -528,9 +528,10 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
|
||||
}
|
||||
base = (unsigned long)from->iov_base + offset;
|
||||
size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
|
||||
+ if (i + size > MAX_SKB_FRAGS)
|
||||
+ return -EMSGSIZE;
|
||||
num_pages = get_user_pages_fast(base, size, 0, &page[i]);
|
||||
- if ((num_pages != size) ||
|
||||
- (num_pages > MAX_SKB_FRAGS - skb_shinfo(skb)->nr_frags)) {
|
||||
+ if (num_pages != size) {
|
||||
for (i = 0; i < num_pages; i++)
|
||||
put_page(page[i]);
|
||||
return -EFAULT;
|
||||
@@ -650,7 +651,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
|
||||
int err;
|
||||
struct virtio_net_hdr vnet_hdr = { 0 };
|
||||
int vnet_hdr_len = 0;
|
||||
- int copylen;
|
||||
+ int copylen = 0;
|
||||
bool zerocopy = false;
|
||||
|
||||
if (q->flags & IFF_VNET_HDR) {
|
||||
@@ -679,15 +680,31 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
|
||||
if (unlikely(len < ETH_HLEN))
|
||||
goto err;
|
||||
|
||||
+ err = -EMSGSIZE;
|
||||
+ if (unlikely(count > UIO_MAXIOV))
|
||||
+ goto err;
|
||||
+
|
||||
if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY))
|
||||
zerocopy = true;
|
||||
|
||||
if (zerocopy) {
|
||||
+ /* Userspace may produce vectors with count greater than
|
||||
+ * MAX_SKB_FRAGS, so we need to linearize parts of the skb
|
||||
+ * to let the rest of data to be fit in the frags.
|
||||
+ */
|
||||
+ if (count > MAX_SKB_FRAGS) {
|
||||
+ copylen = iov_length(iv, count - MAX_SKB_FRAGS);
|
||||
+ if (copylen < vnet_hdr_len)
|
||||
+ copylen = 0;
|
||||
+ else
|
||||
+ copylen -= vnet_hdr_len;
|
||||
+ }
|
||||
/* There are 256 bytes to be copied in skb, so there is enough
|
||||
* room for skb expand head in case it is used.
|
||||
* The rest buffer is mapped from userspace.
|
||||
*/
|
||||
- copylen = vnet_hdr.hdr_len;
|
||||
+ if (copylen < vnet_hdr.hdr_len)
|
||||
+ copylen = vnet_hdr.hdr_len;
|
||||
if (!copylen)
|
||||
copylen = GOODCOPY_LEN;
|
||||
} else
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+35
@@ -0,0 +1,35 @@
|
||||
From 274c1b4b54a12df73eb5fc2763a294ff2a04669c Mon Sep 17 00:00:00 2001
|
||||
From: Avi Kivity <avi@redhat.com>
|
||||
Date: Sun, 22 Apr 2012 17:02:11 +0300
|
||||
Subject: [PATCH 021/109] KVM: Fix buffer overflow in kvm_set_irq()
|
||||
|
||||
commit f2ebd422f71cda9c791f76f85d2ca102ae34a1ed upstream.
|
||||
|
||||
kvm_set_irq() has an internal buffer of three irq routing entries, allowing
|
||||
connecting a GSI to three IRQ chips or on MSI. However setup_routing_entry()
|
||||
does not properly enforce this, allowing three irqchip routes followed by
|
||||
an MSI route to overflow the buffer.
|
||||
|
||||
Fix by ensuring that an MSI entry is added to an empty list.
|
||||
|
||||
Signed-off-by: Avi Kivity <avi@redhat.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
virt/kvm/irq_comm.c | 1 +
|
||||
1 files changed, 1 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
|
||||
index 9f614b4..272407c 100644
|
||||
--- a/virt/kvm/irq_comm.c
|
||||
+++ b/virt/kvm/irq_comm.c
|
||||
@@ -318,6 +318,7 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
|
||||
*/
|
||||
hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link)
|
||||
if (ei->type == KVM_IRQ_ROUTING_MSI ||
|
||||
+ ue->type == KVM_IRQ_ROUTING_MSI ||
|
||||
ue->u.irqchip.irqchip == ei->irqchip.irqchip)
|
||||
return r;
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+54
@@ -0,0 +1,54 @@
|
||||
From d0ef0e99137f622218e0395ccc12210e4804b5ed Mon Sep 17 00:00:00 2001
|
||||
From: Jan Kara <jack@suse.cz>
|
||||
Date: Fri, 15 Jun 2012 12:52:46 +0200
|
||||
Subject: [PATCH 022/109] scsi: Silence unnecessary warnings about ioctl to
|
||||
partition
|
||||
|
||||
commit 6d9359280753d2955f86d6411047516a9431eb51 upstream.
|
||||
|
||||
Sometimes, warnings about ioctls to partition happen often enough that they
|
||||
form majority of the warnings in the kernel log and users complain. In some
|
||||
cases warnings are about ioctls such as SG_IO so it's not good to get rid of
|
||||
the warnings completely as they can ease debugging of userspace problems
|
||||
when ioctl is refused.
|
||||
|
||||
Since I have seen warnings from lots of commands, including some proprietary
|
||||
userspace applications, I don't think disallowing the ioctls for processes
|
||||
with CAP_SYS_RAWIO will happen in the near future if ever. So lets just
|
||||
stop warning for processes with CAP_SYS_RAWIO for which ioctl is allowed.
|
||||
|
||||
CC: Paolo Bonzini <pbonzini@redhat.com>
|
||||
CC: James Bottomley <JBottomley@parallels.com>
|
||||
CC: linux-scsi@vger.kernel.org
|
||||
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Jan Kara <jack@suse.cz>
|
||||
Signed-off-by: Jens Axboe <axboe@kernel.dk>
|
||||
[bwh: Backported to 3.2: use ENOTTY, not ENOIOCTLCMD]
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
block/scsi_ioctl.c | 5 ++++-
|
||||
1 files changed, 4 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
|
||||
index 688be8a..9e76a32 100644
|
||||
--- a/block/scsi_ioctl.c
|
||||
+++ b/block/scsi_ioctl.c
|
||||
@@ -721,11 +721,14 @@ int scsi_verify_blk_ioctl(struct block_device *bd, unsigned int cmd)
|
||||
break;
|
||||
}
|
||||
|
||||
+ if (capable(CAP_SYS_RAWIO))
|
||||
+ return 0;
|
||||
+
|
||||
/* In particular, rule out all resets and host-specific ioctls. */
|
||||
printk_ratelimited(KERN_WARNING
|
||||
"%s: sending ioctl %x to a partition!\n", current->comm, cmd);
|
||||
|
||||
- return capable(CAP_SYS_RAWIO) ? 0 : -ENOTTY;
|
||||
+ return -ENOTTY;
|
||||
}
|
||||
EXPORT_SYMBOL(scsi_verify_blk_ioctl);
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+43
@@ -0,0 +1,43 @@
|
||||
From 4255dce0ae728fe63f19ded56b5dc2c324d6f18d Mon Sep 17 00:00:00 2001
|
||||
From: Jason Baron <jbaron@redhat.com>
|
||||
Date: Wed, 25 Apr 2012 16:01:47 -0700
|
||||
Subject: [PATCH 023/109] epoll: clear the tfile_check_list on -ELOOP
|
||||
|
||||
commit 13d518074a952d33d47c428419693f63389547e9 upstream.
|
||||
|
||||
An epoll_ctl(,EPOLL_CTL_ADD,,) operation can return '-ELOOP' to prevent
|
||||
circular epoll dependencies from being created. However, in that case we
|
||||
do not properly clear the 'tfile_check_list'. Thus, add a call to
|
||||
clear_tfile_check_list() for the -ELOOP case.
|
||||
|
||||
Signed-off-by: Jason Baron <jbaron@redhat.com>
|
||||
Reported-by: Yurij M. Plotnikov <Yurij.Plotnikov@oktetlabs.ru>
|
||||
Cc: Nelson Elhage <nelhage@nelhage.com>
|
||||
Cc: Davide Libenzi <davidel@xmailserver.org>
|
||||
Tested-by: Alexandra N. Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
fs/eventpoll.c | 4 +++-
|
||||
1 files changed, 3 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
|
||||
index 4d9d3a4..a6f3763 100644
|
||||
--- a/fs/eventpoll.c
|
||||
+++ b/fs/eventpoll.c
|
||||
@@ -1629,8 +1629,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
|
||||
if (op == EPOLL_CTL_ADD) {
|
||||
if (is_file_epoll(tfile)) {
|
||||
error = -ELOOP;
|
||||
- if (ep_loop_check(ep, tfile) != 0)
|
||||
+ if (ep_loop_check(ep, tfile) != 0) {
|
||||
+ clear_tfile_check_list();
|
||||
goto error_tgt_fput;
|
||||
+ }
|
||||
} else
|
||||
list_add(&tfile->f_tfile_llink, &tfile_check_list);
|
||||
}
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+45
@@ -0,0 +1,45 @@
|
||||
From be1ab01a00ec19b273050ad8f3fbb9472238b026 Mon Sep 17 00:00:00 2001
|
||||
From: Shuah Khan <shuah.khan@hp.com>
|
||||
Date: Wed, 6 Jun 2012 10:50:06 -0600
|
||||
Subject: [PATCH 024/109] iommu/amd: Fix missing iommu_shutdown initialization
|
||||
in passthrough mode
|
||||
|
||||
commit f2f12b6fc032c7b1419fd6db84e2868b5f05a878 upstream.
|
||||
|
||||
The iommu_shutdown callback is not initialized when the AMD
|
||||
IOMMU driver runs in passthrough mode. Fix that by moving
|
||||
the callback initialization before the check for
|
||||
passthrough mode.
|
||||
|
||||
Signed-off-by: Shuah Khan <shuah.khan@hp.com>
|
||||
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
|
||||
[bwh: Backported to 3.2: adjust context]
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/iommu/amd_iommu_init.c | 3 ++-
|
||||
1 files changed, 2 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
|
||||
index 6269eb0..ef2d493 100644
|
||||
--- a/drivers/iommu/amd_iommu_init.c
|
||||
+++ b/drivers/iommu/amd_iommu_init.c
|
||||
@@ -1468,6 +1468,8 @@ static int __init amd_iommu_init(void)
|
||||
|
||||
register_syscore_ops(&amd_iommu_syscore_ops);
|
||||
|
||||
+ x86_platform.iommu_shutdown = disable_iommus;
|
||||
+
|
||||
if (iommu_pass_through)
|
||||
goto out;
|
||||
|
||||
@@ -1476,7 +1478,6 @@ static int __init amd_iommu_init(void)
|
||||
else
|
||||
printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n");
|
||||
|
||||
- x86_platform.iommu_shutdown = disable_iommus;
|
||||
out:
|
||||
return ret;
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+53
@@ -0,0 +1,53 @@
|
||||
From 60eec119060fbd35f569fd77fd448dbcd8d8f011 Mon Sep 17 00:00:00 2001
|
||||
From: Joerg Roedel <joerg.roedel@amd.com>
|
||||
Date: Thu, 21 Jun 2012 14:52:40 +0200
|
||||
Subject: [PATCH 025/109] iommu/amd: Initialize dma_ops for hotplug and sriov
|
||||
devices
|
||||
|
||||
commit ac1534a55d1e87d59a21c09c570605933b551480 upstream.
|
||||
|
||||
When a device is added to the system at runtime the AMD
|
||||
IOMMU driver initializes the necessary data structures to
|
||||
handle translation for it. But it forgets to change the
|
||||
per-device dma_ops to point to the AMD IOMMU driver. So
|
||||
mapping actually never happens and all DMA accesses end in
|
||||
an IO_PAGE_FAULT. Fix this.
|
||||
|
||||
Reported-by: Stefan Assmann <sassmann@redhat.com>
|
||||
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
|
||||
[bwh: Backported to 3.2:
|
||||
- Adjust context
|
||||
- Use global iommu_pass_through; there is no per-device pass_through]
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/iommu/amd_iommu.c | 7 +++++++
|
||||
1 files changed, 7 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
|
||||
index f1d5408..a1b8caa 100644
|
||||
--- a/drivers/iommu/amd_iommu.c
|
||||
+++ b/drivers/iommu/amd_iommu.c
|
||||
@@ -59,6 +59,8 @@ static struct protection_domain *pt_domain;
|
||||
|
||||
static struct iommu_ops amd_iommu_ops;
|
||||
|
||||
+static struct dma_map_ops amd_iommu_dma_ops;
|
||||
+
|
||||
/*
|
||||
* general struct to manage commands send to an IOMMU
|
||||
*/
|
||||
@@ -1878,6 +1880,11 @@ static int device_change_notifier(struct notifier_block *nb,
|
||||
list_add_tail(&dma_domain->list, &iommu_pd_list);
|
||||
spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
|
||||
|
||||
+ if (!iommu_pass_through)
|
||||
+ dev->archdata.dma_ops = &amd_iommu_dma_ops;
|
||||
+ else
|
||||
+ dev->archdata.dma_ops = &nommu_dma_ops;
|
||||
+
|
||||
break;
|
||||
case BUS_NOTIFY_DEL_DEVICE:
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+170
@@ -0,0 +1,170 @@
|
||||
From 6455e1a5be4eb27a1587322e0ed3a7ff6c3d8e4f Mon Sep 17 00:00:00 2001
|
||||
From: Stanislaw Ledwon <staszek.ledwon@linux.jf.intel.com>
|
||||
Date: Mon, 18 Jun 2012 15:20:00 +0200
|
||||
Subject: [PATCH 026/109] usb: Add support for root hub port status CAS
|
||||
|
||||
commit 8bea2bd37df08aaa599aa361a9f8b836ba98e554 upstream.
|
||||
|
||||
The host controller port status register supports CAS (Cold Attach
|
||||
Status) bit. This bit could be set when USB3.0 device is connected
|
||||
when system is in Sx state. When the system wakes to S0 this port
|
||||
status with CAS bit is reported and this port can't be used by any
|
||||
device.
|
||||
|
||||
When CAS bit is set the port should be reset by warm reset. This
|
||||
was not supported by xhci driver.
|
||||
|
||||
The issue was found when pendrive was connected to suspended
|
||||
platform. The link state of "Compliance Mode" was reported together
|
||||
with CAS bit. This link state was also not supported by xhci and
|
||||
core/hub.c.
|
||||
|
||||
The CAS bit is defined only for xhci root hub port and it is
|
||||
not supported on regular hubs. The link status is used to force
|
||||
warm reset on port. Make the USB core issue a warm reset when port
|
||||
is in ether the 'inactive' or 'compliance mode'. Change the xHCI driver
|
||||
to report 'compliance mode' when the CAS is set. This force warm reset
|
||||
on the root hub port.
|
||||
|
||||
This patch should be backported to stable kernels as old as 3.2, that
|
||||
contain the commit 10d674a82e553cb8a1f41027bb3c3e309b3f6804 "USB: When
|
||||
hot reset for USB3 fails, try warm reset."
|
||||
|
||||
Signed-off-by: Stanislaw Ledwon <staszek.ledwon@linux.intel.com>
|
||||
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
|
||||
Acked-by: Andiry Xu <andiry.xu@amd.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/usb/core/hub.c | 18 +++++++++-------
|
||||
drivers/usb/host/xhci-hub.c | 44 +++++++++++++++++++++++++++++++++++++-----
|
||||
drivers/usb/host/xhci.h | 6 ++++-
|
||||
3 files changed, 53 insertions(+), 15 deletions(-)
|
||||
|
||||
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
|
||||
index 50cf41a..2bc736f 100644
|
||||
--- a/drivers/usb/core/hub.c
|
||||
+++ b/drivers/usb/core/hub.c
|
||||
@@ -2039,12 +2039,16 @@ static unsigned hub_is_wusb(struct usb_hub *hub)
|
||||
static int hub_port_reset(struct usb_hub *hub, int port1,
|
||||
struct usb_device *udev, unsigned int delay, bool warm);
|
||||
|
||||
-/* Is a USB 3.0 port in the Inactive state? */
|
||||
-static bool hub_port_inactive(struct usb_hub *hub, u16 portstatus)
|
||||
+/* Is a USB 3.0 port in the Inactive or Complinance Mode state?
|
||||
+ * Port worm reset is required to recover
|
||||
+ */
|
||||
+static bool hub_port_warm_reset_required(struct usb_hub *hub, u16 portstatus)
|
||||
{
|
||||
return hub_is_superspeed(hub->hdev) &&
|
||||
- (portstatus & USB_PORT_STAT_LINK_STATE) ==
|
||||
- USB_SS_PORT_LS_SS_INACTIVE;
|
||||
+ (((portstatus & USB_PORT_STAT_LINK_STATE) ==
|
||||
+ USB_SS_PORT_LS_SS_INACTIVE) ||
|
||||
+ ((portstatus & USB_PORT_STAT_LINK_STATE) ==
|
||||
+ USB_SS_PORT_LS_COMP_MOD)) ;
|
||||
}
|
||||
|
||||
static int hub_port_wait_reset(struct usb_hub *hub, int port1,
|
||||
@@ -2080,7 +2084,7 @@ static int hub_port_wait_reset(struct usb_hub *hub, int port1,
|
||||
*
|
||||
* See https://bugzilla.kernel.org/show_bug.cgi?id=41752
|
||||
*/
|
||||
- if (hub_port_inactive(hub, portstatus)) {
|
||||
+ if (hub_port_warm_reset_required(hub, portstatus)) {
|
||||
int ret;
|
||||
|
||||
if ((portchange & USB_PORT_STAT_C_CONNECTION))
|
||||
@@ -3646,9 +3650,7 @@ static void hub_events(void)
|
||||
/* Warm reset a USB3 protocol port if it's in
|
||||
* SS.Inactive state.
|
||||
*/
|
||||
- if (hub_is_superspeed(hub->hdev) &&
|
||||
- (portstatus & USB_PORT_STAT_LINK_STATE)
|
||||
- == USB_SS_PORT_LS_SS_INACTIVE) {
|
||||
+ if (hub_port_warm_reset_required(hub, portstatus)) {
|
||||
dev_dbg(hub_dev, "warm reset port %d\n", i);
|
||||
hub_port_reset(hub, i, NULL,
|
||||
HUB_BH_RESET_TIME, true);
|
||||
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
|
||||
index a8b2980..fd8a2c2 100644
|
||||
--- a/drivers/usb/host/xhci-hub.c
|
||||
+++ b/drivers/usb/host/xhci-hub.c
|
||||
@@ -438,6 +438,42 @@ void xhci_test_and_clear_bit(struct xhci_hcd *xhci, __le32 __iomem **port_array,
|
||||
}
|
||||
}
|
||||
|
||||
+/* Updates Link Status for super Speed port */
|
||||
+static void xhci_hub_report_link_state(u32 *status, u32 status_reg)
|
||||
+{
|
||||
+ u32 pls = status_reg & PORT_PLS_MASK;
|
||||
+
|
||||
+ /* resume state is a xHCI internal state.
|
||||
+ * Do not report it to usb core.
|
||||
+ */
|
||||
+ if (pls == XDEV_RESUME)
|
||||
+ return;
|
||||
+
|
||||
+ /* When the CAS bit is set then warm reset
|
||||
+ * should be performed on port
|
||||
+ */
|
||||
+ if (status_reg & PORT_CAS) {
|
||||
+ /* The CAS bit can be set while the port is
|
||||
+ * in any link state.
|
||||
+ * Only roothubs have CAS bit, so we
|
||||
+ * pretend to be in compliance mode
|
||||
+ * unless we're already in compliance
|
||||
+ * or the inactive state.
|
||||
+ */
|
||||
+ if (pls != USB_SS_PORT_LS_COMP_MOD &&
|
||||
+ pls != USB_SS_PORT_LS_SS_INACTIVE) {
|
||||
+ pls = USB_SS_PORT_LS_COMP_MOD;
|
||||
+ }
|
||||
+ /* Return also connection bit -
|
||||
+ * hub state machine resets port
|
||||
+ * when this bit is set.
|
||||
+ */
|
||||
+ pls |= USB_PORT_STAT_CONNECTION;
|
||||
+ }
|
||||
+ /* update status field */
|
||||
+ *status |= pls;
|
||||
+}
|
||||
+
|
||||
int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
|
||||
u16 wIndex, char *buf, u16 wLength)
|
||||
{
|
||||
@@ -579,13 +615,9 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
|
||||
else
|
||||
status |= USB_PORT_STAT_POWER;
|
||||
}
|
||||
- /* Port Link State */
|
||||
+ /* Update Port Link State for super speed ports*/
|
||||
if (hcd->speed == HCD_USB3) {
|
||||
- /* resume state is a xHCI internal state.
|
||||
- * Do not report it to usb core.
|
||||
- */
|
||||
- if ((temp & PORT_PLS_MASK) != XDEV_RESUME)
|
||||
- status |= (temp & PORT_PLS_MASK);
|
||||
+ xhci_hub_report_link_state(&status, temp);
|
||||
}
|
||||
if (bus_state->port_c_suspend & (1 << wIndex))
|
||||
status |= 1 << USB_PORT_FEAT_C_SUSPEND;
|
||||
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
|
||||
index 363b141..7a56805 100644
|
||||
--- a/drivers/usb/host/xhci.h
|
||||
+++ b/drivers/usb/host/xhci.h
|
||||
@@ -341,7 +341,11 @@ struct xhci_op_regs {
|
||||
#define PORT_PLC (1 << 22)
|
||||
/* port configure error change - port failed to configure its link partner */
|
||||
#define PORT_CEC (1 << 23)
|
||||
-/* bit 24 reserved */
|
||||
+/* Cold Attach Status - xHC can set this bit to report device attached during
|
||||
+ * Sx state. Warm port reset should be perfomed to clear this bit and move port
|
||||
+ * to connected state.
|
||||
+ */
|
||||
+#define PORT_CAS (1 << 24)
|
||||
/* wake on connect (enable) */
|
||||
#define PORT_WKCONN_E (1 << 25)
|
||||
/* wake on disconnect (enable) */
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+40
@@ -0,0 +1,40 @@
|
||||
From 5cdc2897a0bb7b11585d5b14eb3f2faa1505348c Mon Sep 17 00:00:00 2001
|
||||
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
|
||||
Date: Sat, 9 Jun 2012 11:07:56 +0800
|
||||
Subject: [PATCH 027/109] gpiolib: wm8994: Pay attention to the value set when
|
||||
enabling as output
|
||||
|
||||
commit 8cd578b6e28693f357867a77598a88ef3deb6b39 upstream.
|
||||
|
||||
Not paying attention to the value being set is a bad thing because it
|
||||
means that we'll not set the hardware up to reflect what was requested.
|
||||
Not setting the hardware up to reflect what was requested means that the
|
||||
caller won't get the results they wanted.
|
||||
|
||||
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
|
||||
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/gpio/gpio-wm8994.c | 5 ++++-
|
||||
1 files changed, 4 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/drivers/gpio/gpio-wm8994.c b/drivers/gpio/gpio-wm8994.c
|
||||
index 96198f3..a2da8f2 100644
|
||||
--- a/drivers/gpio/gpio-wm8994.c
|
||||
+++ b/drivers/gpio/gpio-wm8994.c
|
||||
@@ -89,8 +89,11 @@ static int wm8994_gpio_direction_out(struct gpio_chip *chip,
|
||||
struct wm8994_gpio *wm8994_gpio = to_wm8994_gpio(chip);
|
||||
struct wm8994 *wm8994 = wm8994_gpio->wm8994;
|
||||
|
||||
+ if (value)
|
||||
+ value = WM8994_GPN_LVL;
|
||||
+
|
||||
return wm8994_set_bits(wm8994, WM8994_GPIO_1 + offset,
|
||||
- WM8994_GPN_DIR, 0);
|
||||
+ WM8994_GPN_DIR | WM8994_GPN_LVL, value);
|
||||
}
|
||||
|
||||
static void wm8994_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+462
@@ -0,0 +1,462 @@
|
||||
From a7d3f237430003ca8d32d1703770f04d32a02b27 Mon Sep 17 00:00:00 2001
|
||||
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
|
||||
Date: Fri, 22 Jun 2012 15:52:09 +0200
|
||||
Subject: [PATCH 028/109] sched/nohz: Rewrite and fix load-avg computation --
|
||||
again
|
||||
|
||||
commit 5167e8d5417bf5c322a703d2927daec727ea40dd upstream.
|
||||
|
||||
Thanks to Charles Wang for spotting the defects in the current code:
|
||||
|
||||
- If we go idle during the sample window -- after sampling, we get a
|
||||
negative bias because we can negate our own sample.
|
||||
|
||||
- If we wake up during the sample window we get a positive bias
|
||||
because we push the sample to a known active period.
|
||||
|
||||
So rewrite the entire nohz load-avg muck once again, now adding
|
||||
copious documentation to the code.
|
||||
|
||||
Reported-and-tested-by: Doug Smythies <dsmythies@telus.net>
|
||||
Reported-and-tested-by: Charles Wang <muming.wq@gmail.com>
|
||||
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Link: http://lkml.kernel.org/r/1340373782.18025.74.camel@twins
|
||||
[ minor edits ]
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
[bwh: Backported to 3.2: adjust filenames, context]
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
include/linux/sched.h | 8 ++
|
||||
kernel/sched.c | 276 ++++++++++++++++++++++++++++++++++------------
|
||||
kernel/sched_idletask.c | 1 -
|
||||
kernel/time/tick-sched.c | 2 +
|
||||
4 files changed, 213 insertions(+), 74 deletions(-)
|
||||
|
||||
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
||||
index 1c4f3e9..5afa2a3 100644
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -1892,6 +1892,14 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
|
||||
}
|
||||
#endif
|
||||
|
||||
+#ifdef CONFIG_NO_HZ
|
||||
+void calc_load_enter_idle(void);
|
||||
+void calc_load_exit_idle(void);
|
||||
+#else
|
||||
+static inline void calc_load_enter_idle(void) { }
|
||||
+static inline void calc_load_exit_idle(void) { }
|
||||
+#endif /* CONFIG_NO_HZ */
|
||||
+
|
||||
#ifndef CONFIG_CPUMASK_OFFSTACK
|
||||
static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
|
||||
{
|
||||
diff --git a/kernel/sched.c b/kernel/sched.c
|
||||
index 576a27f..52ac69b 100644
|
||||
--- a/kernel/sched.c
|
||||
+++ b/kernel/sched.c
|
||||
@@ -1885,7 +1885,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
|
||||
|
||||
#endif
|
||||
|
||||
-static void calc_load_account_idle(struct rq *this_rq);
|
||||
static void update_sysctl(void);
|
||||
static int get_update_sysctl_factor(void);
|
||||
static void update_cpu_load(struct rq *this_rq);
|
||||
@@ -3401,11 +3400,73 @@ unsigned long this_cpu_load(void)
|
||||
}
|
||||
|
||||
|
||||
+/*
|
||||
+ * Global load-average calculations
|
||||
+ *
|
||||
+ * We take a distributed and async approach to calculating the global load-avg
|
||||
+ * in order to minimize overhead.
|
||||
+ *
|
||||
+ * The global load average is an exponentially decaying average of nr_running +
|
||||
+ * nr_uninterruptible.
|
||||
+ *
|
||||
+ * Once every LOAD_FREQ:
|
||||
+ *
|
||||
+ * nr_active = 0;
|
||||
+ * for_each_possible_cpu(cpu)
|
||||
+ * nr_active += cpu_of(cpu)->nr_running + cpu_of(cpu)->nr_uninterruptible;
|
||||
+ *
|
||||
+ * avenrun[n] = avenrun[0] * exp_n + nr_active * (1 - exp_n)
|
||||
+ *
|
||||
+ * Due to a number of reasons the above turns in the mess below:
|
||||
+ *
|
||||
+ * - for_each_possible_cpu() is prohibitively expensive on machines with
|
||||
+ * serious number of cpus, therefore we need to take a distributed approach
|
||||
+ * to calculating nr_active.
|
||||
+ *
|
||||
+ * \Sum_i x_i(t) = \Sum_i x_i(t) - x_i(t_0) | x_i(t_0) := 0
|
||||
+ * = \Sum_i { \Sum_j=1 x_i(t_j) - x_i(t_j-1) }
|
||||
+ *
|
||||
+ * So assuming nr_active := 0 when we start out -- true per definition, we
|
||||
+ * can simply take per-cpu deltas and fold those into a global accumulate
|
||||
+ * to obtain the same result. See calc_load_fold_active().
|
||||
+ *
|
||||
+ * Furthermore, in order to avoid synchronizing all per-cpu delta folding
|
||||
+ * across the machine, we assume 10 ticks is sufficient time for every
|
||||
+ * cpu to have completed this task.
|
||||
+ *
|
||||
+ * This places an upper-bound on the IRQ-off latency of the machine. Then
|
||||
+ * again, being late doesn't loose the delta, just wrecks the sample.
|
||||
+ *
|
||||
+ * - cpu_rq()->nr_uninterruptible isn't accurately tracked per-cpu because
|
||||
+ * this would add another cross-cpu cacheline miss and atomic operation
|
||||
+ * to the wakeup path. Instead we increment on whatever cpu the task ran
|
||||
+ * when it went into uninterruptible state and decrement on whatever cpu
|
||||
+ * did the wakeup. This means that only the sum of nr_uninterruptible over
|
||||
+ * all cpus yields the correct result.
|
||||
+ *
|
||||
+ * This covers the NO_HZ=n code, for extra head-aches, see the comment below.
|
||||
+ */
|
||||
+
|
||||
/* Variables and functions for calc_load */
|
||||
static atomic_long_t calc_load_tasks;
|
||||
static unsigned long calc_load_update;
|
||||
unsigned long avenrun[3];
|
||||
-EXPORT_SYMBOL(avenrun);
|
||||
+EXPORT_SYMBOL(avenrun); /* should be removed */
|
||||
+
|
||||
+/**
|
||||
+ * get_avenrun - get the load average array
|
||||
+ * @loads: pointer to dest load array
|
||||
+ * @offset: offset to add
|
||||
+ * @shift: shift count to shift the result left
|
||||
+ *
|
||||
+ * These values are estimates at best, so no need for locking.
|
||||
+ */
|
||||
+void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
|
||||
+{
|
||||
+ loads[0] = (avenrun[0] + offset) << shift;
|
||||
+ loads[1] = (avenrun[1] + offset) << shift;
|
||||
+ loads[2] = (avenrun[2] + offset) << shift;
|
||||
+}
|
||||
|
||||
static long calc_load_fold_active(struct rq *this_rq)
|
||||
{
|
||||
@@ -3422,6 +3483,9 @@ static long calc_load_fold_active(struct rq *this_rq)
|
||||
return delta;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * a1 = a0 * e + a * (1 - e)
|
||||
+ */
|
||||
static unsigned long
|
||||
calc_load(unsigned long load, unsigned long exp, unsigned long active)
|
||||
{
|
||||
@@ -3433,30 +3497,118 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
|
||||
|
||||
#ifdef CONFIG_NO_HZ
|
||||
/*
|
||||
- * For NO_HZ we delay the active fold to the next LOAD_FREQ update.
|
||||
+ * Handle NO_HZ for the global load-average.
|
||||
+ *
|
||||
+ * Since the above described distributed algorithm to compute the global
|
||||
+ * load-average relies on per-cpu sampling from the tick, it is affected by
|
||||
+ * NO_HZ.
|
||||
+ *
|
||||
+ * The basic idea is to fold the nr_active delta into a global idle-delta upon
|
||||
+ * entering NO_HZ state such that we can include this as an 'extra' cpu delta
|
||||
+ * when we read the global state.
|
||||
+ *
|
||||
+ * Obviously reality has to ruin such a delightfully simple scheme:
|
||||
+ *
|
||||
+ * - When we go NO_HZ idle during the window, we can negate our sample
|
||||
+ * contribution, causing under-accounting.
|
||||
+ *
|
||||
+ * We avoid this by keeping two idle-delta counters and flipping them
|
||||
+ * when the window starts, thus separating old and new NO_HZ load.
|
||||
+ *
|
||||
+ * The only trick is the slight shift in index flip for read vs write.
|
||||
+ *
|
||||
+ * 0s 5s 10s 15s
|
||||
+ * +10 +10 +10 +10
|
||||
+ * |-|-----------|-|-----------|-|-----------|-|
|
||||
+ * r:0 0 1 1 0 0 1 1 0
|
||||
+ * w:0 1 1 0 0 1 1 0 0
|
||||
+ *
|
||||
+ * This ensures we'll fold the old idle contribution in this window while
|
||||
+ * accumlating the new one.
|
||||
+ *
|
||||
+ * - When we wake up from NO_HZ idle during the window, we push up our
|
||||
+ * contribution, since we effectively move our sample point to a known
|
||||
+ * busy state.
|
||||
+ *
|
||||
+ * This is solved by pushing the window forward, and thus skipping the
|
||||
+ * sample, for this cpu (effectively using the idle-delta for this cpu which
|
||||
+ * was in effect at the time the window opened). This also solves the issue
|
||||
+ * of having to deal with a cpu having been in NOHZ idle for multiple
|
||||
+ * LOAD_FREQ intervals.
|
||||
*
|
||||
* When making the ILB scale, we should try to pull this in as well.
|
||||
*/
|
||||
-static atomic_long_t calc_load_tasks_idle;
|
||||
+static atomic_long_t calc_load_idle[2];
|
||||
+static int calc_load_idx;
|
||||
|
||||
-static void calc_load_account_idle(struct rq *this_rq)
|
||||
+static inline int calc_load_write_idx(void)
|
||||
{
|
||||
+ int idx = calc_load_idx;
|
||||
+
|
||||
+ /*
|
||||
+ * See calc_global_nohz(), if we observe the new index, we also
|
||||
+ * need to observe the new update time.
|
||||
+ */
|
||||
+ smp_rmb();
|
||||
+
|
||||
+ /*
|
||||
+ * If the folding window started, make sure we start writing in the
|
||||
+ * next idle-delta.
|
||||
+ */
|
||||
+ if (!time_before(jiffies, calc_load_update))
|
||||
+ idx++;
|
||||
+
|
||||
+ return idx & 1;
|
||||
+}
|
||||
+
|
||||
+static inline int calc_load_read_idx(void)
|
||||
+{
|
||||
+ return calc_load_idx & 1;
|
||||
+}
|
||||
+
|
||||
+void calc_load_enter_idle(void)
|
||||
+{
|
||||
+ struct rq *this_rq = this_rq();
|
||||
long delta;
|
||||
|
||||
+ /*
|
||||
+ * We're going into NOHZ mode, if there's any pending delta, fold it
|
||||
+ * into the pending idle delta.
|
||||
+ */
|
||||
delta = calc_load_fold_active(this_rq);
|
||||
- if (delta)
|
||||
- atomic_long_add(delta, &calc_load_tasks_idle);
|
||||
+ if (delta) {
|
||||
+ int idx = calc_load_write_idx();
|
||||
+ atomic_long_add(delta, &calc_load_idle[idx]);
|
||||
+ }
|
||||
}
|
||||
|
||||
-static long calc_load_fold_idle(void)
|
||||
+void calc_load_exit_idle(void)
|
||||
{
|
||||
- long delta = 0;
|
||||
+ struct rq *this_rq = this_rq();
|
||||
+
|
||||
+ /*
|
||||
+ * If we're still before the sample window, we're done.
|
||||
+ */
|
||||
+ if (time_before(jiffies, this_rq->calc_load_update))
|
||||
+ return;
|
||||
|
||||
/*
|
||||
- * Its got a race, we don't care...
|
||||
+ * We woke inside or after the sample window, this means we're already
|
||||
+ * accounted through the nohz accounting, so skip the entire deal and
|
||||
+ * sync up for the next window.
|
||||
*/
|
||||
- if (atomic_long_read(&calc_load_tasks_idle))
|
||||
- delta = atomic_long_xchg(&calc_load_tasks_idle, 0);
|
||||
+ this_rq->calc_load_update = calc_load_update;
|
||||
+ if (time_before(jiffies, this_rq->calc_load_update + 10))
|
||||
+ this_rq->calc_load_update += LOAD_FREQ;
|
||||
+}
|
||||
+
|
||||
+static long calc_load_fold_idle(void)
|
||||
+{
|
||||
+ int idx = calc_load_read_idx();
|
||||
+ long delta = 0;
|
||||
+
|
||||
+ if (atomic_long_read(&calc_load_idle[idx]))
|
||||
+ delta = atomic_long_xchg(&calc_load_idle[idx], 0);
|
||||
|
||||
return delta;
|
||||
}
|
||||
@@ -3542,66 +3694,39 @@ static void calc_global_nohz(void)
|
||||
{
|
||||
long delta, active, n;
|
||||
|
||||
- /*
|
||||
- * If we crossed a calc_load_update boundary, make sure to fold
|
||||
- * any pending idle changes, the respective CPUs might have
|
||||
- * missed the tick driven calc_load_account_active() update
|
||||
- * due to NO_HZ.
|
||||
- */
|
||||
- delta = calc_load_fold_idle();
|
||||
- if (delta)
|
||||
- atomic_long_add(delta, &calc_load_tasks);
|
||||
-
|
||||
- /*
|
||||
- * It could be the one fold was all it took, we done!
|
||||
- */
|
||||
- if (time_before(jiffies, calc_load_update + 10))
|
||||
- return;
|
||||
-
|
||||
- /*
|
||||
- * Catch-up, fold however many we are behind still
|
||||
- */
|
||||
- delta = jiffies - calc_load_update - 10;
|
||||
- n = 1 + (delta / LOAD_FREQ);
|
||||
+ if (!time_before(jiffies, calc_load_update + 10)) {
|
||||
+ /*
|
||||
+ * Catch-up, fold however many we are behind still
|
||||
+ */
|
||||
+ delta = jiffies - calc_load_update - 10;
|
||||
+ n = 1 + (delta / LOAD_FREQ);
|
||||
|
||||
- active = atomic_long_read(&calc_load_tasks);
|
||||
- active = active > 0 ? active * FIXED_1 : 0;
|
||||
+ active = atomic_long_read(&calc_load_tasks);
|
||||
+ active = active > 0 ? active * FIXED_1 : 0;
|
||||
|
||||
- avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
|
||||
- avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
|
||||
- avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
|
||||
+ avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
|
||||
+ avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
|
||||
+ avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
|
||||
|
||||
- calc_load_update += n * LOAD_FREQ;
|
||||
-}
|
||||
-#else
|
||||
-static void calc_load_account_idle(struct rq *this_rq)
|
||||
-{
|
||||
-}
|
||||
+ calc_load_update += n * LOAD_FREQ;
|
||||
+ }
|
||||
|
||||
-static inline long calc_load_fold_idle(void)
|
||||
-{
|
||||
- return 0;
|
||||
+ /*
|
||||
+ * Flip the idle index...
|
||||
+ *
|
||||
+ * Make sure we first write the new time then flip the index, so that
|
||||
+ * calc_load_write_idx() will see the new time when it reads the new
|
||||
+ * index, this avoids a double flip messing things up.
|
||||
+ */
|
||||
+ smp_wmb();
|
||||
+ calc_load_idx++;
|
||||
}
|
||||
+#else /* !CONFIG_NO_HZ */
|
||||
|
||||
-static void calc_global_nohz(void)
|
||||
-{
|
||||
-}
|
||||
-#endif
|
||||
+static inline long calc_load_fold_idle(void) { return 0; }
|
||||
+static inline void calc_global_nohz(void) { }
|
||||
|
||||
-/**
|
||||
- * get_avenrun - get the load average array
|
||||
- * @loads: pointer to dest load array
|
||||
- * @offset: offset to add
|
||||
- * @shift: shift count to shift the result left
|
||||
- *
|
||||
- * These values are estimates at best, so no need for locking.
|
||||
- */
|
||||
-void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
|
||||
-{
|
||||
- loads[0] = (avenrun[0] + offset) << shift;
|
||||
- loads[1] = (avenrun[1] + offset) << shift;
|
||||
- loads[2] = (avenrun[2] + offset) << shift;
|
||||
-}
|
||||
+#endif /* CONFIG_NO_HZ */
|
||||
|
||||
/*
|
||||
* calc_load - update the avenrun load estimates 10 ticks after the
|
||||
@@ -3609,11 +3734,18 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
|
||||
*/
|
||||
void calc_global_load(unsigned long ticks)
|
||||
{
|
||||
- long active;
|
||||
+ long active, delta;
|
||||
|
||||
if (time_before(jiffies, calc_load_update + 10))
|
||||
return;
|
||||
|
||||
+ /*
|
||||
+ * Fold the 'old' idle-delta to include all NO_HZ cpus.
|
||||
+ */
|
||||
+ delta = calc_load_fold_idle();
|
||||
+ if (delta)
|
||||
+ atomic_long_add(delta, &calc_load_tasks);
|
||||
+
|
||||
active = atomic_long_read(&calc_load_tasks);
|
||||
active = active > 0 ? active * FIXED_1 : 0;
|
||||
|
||||
@@ -3624,12 +3756,7 @@ void calc_global_load(unsigned long ticks)
|
||||
calc_load_update += LOAD_FREQ;
|
||||
|
||||
/*
|
||||
- * Account one period with whatever state we found before
|
||||
- * folding in the nohz state and ageing the entire idle period.
|
||||
- *
|
||||
- * This avoids loosing a sample when we go idle between
|
||||
- * calc_load_account_active() (10 ticks ago) and now and thus
|
||||
- * under-accounting.
|
||||
+ * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk.
|
||||
*/
|
||||
calc_global_nohz();
|
||||
}
|
||||
@@ -3646,7 +3773,6 @@ static void calc_load_account_active(struct rq *this_rq)
|
||||
return;
|
||||
|
||||
delta = calc_load_fold_active(this_rq);
|
||||
- delta += calc_load_fold_idle();
|
||||
if (delta)
|
||||
atomic_long_add(delta, &calc_load_tasks);
|
||||
|
||||
@@ -3654,6 +3780,10 @@ static void calc_load_account_active(struct rq *this_rq)
|
||||
}
|
||||
|
||||
/*
|
||||
+ * End of global load-average stuff
|
||||
+ */
|
||||
+
|
||||
+/*
|
||||
* The exact cpuload at various idx values, calculated at every tick would be
|
||||
* load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load
|
||||
*
|
||||
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
|
||||
index 0a51882..be92bfe 100644
|
||||
--- a/kernel/sched_idletask.c
|
||||
+++ b/kernel/sched_idletask.c
|
||||
@@ -23,7 +23,6 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl
|
||||
static struct task_struct *pick_next_task_idle(struct rq *rq)
|
||||
{
|
||||
schedstat_inc(rq, sched_goidle);
|
||||
- calc_load_account_idle(rq);
|
||||
return rq->idle;
|
||||
}
|
||||
|
||||
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
|
||||
index c923640..9955ebd 100644
|
||||
--- a/kernel/time/tick-sched.c
|
||||
+++ b/kernel/time/tick-sched.c
|
||||
@@ -430,6 +430,7 @@ void tick_nohz_stop_sched_tick(int inidle)
|
||||
*/
|
||||
if (!ts->tick_stopped) {
|
||||
select_nohz_load_balancer(1);
|
||||
+ calc_load_enter_idle();
|
||||
|
||||
ts->idle_tick = hrtimer_get_expires(&ts->sched_timer);
|
||||
ts->tick_stopped = 1;
|
||||
@@ -563,6 +564,7 @@ void tick_nohz_restart_sched_tick(void)
|
||||
account_idle_ticks(ticks);
|
||||
#endif
|
||||
|
||||
+ calc_load_exit_idle();
|
||||
touch_softlockup_watchdog();
|
||||
/*
|
||||
* Cancel the scheduled timer and restore the tick
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
@@ -0,0 +1,70 @@
|
||||
From 4090ab847de2c528ae152e864a7ce604ef300837 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
|
||||
Date: Mon, 2 Jul 2012 19:53:55 +0200
|
||||
Subject: [PATCH 029/109] USB: option: add ZTE MF60
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
commit 8e16e33c168a6efd0c9f7fa9dd4c1e1db9a74553 upstream.
|
||||
|
||||
Switches into a composite device by ejecting the initial
|
||||
driver CD. The four interfaces are: QCDM, AT, QMI/wwan
|
||||
and mass storage. Let this driver manage the two serial
|
||||
interfaces:
|
||||
|
||||
T: Bus=02 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 28 Spd=480 MxCh= 0
|
||||
D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1
|
||||
P: Vendor=19d2 ProdID=1402 Rev= 0.00
|
||||
S: Manufacturer=ZTE,Incorporated
|
||||
S: Product=ZTE WCDMA Technologies MSM
|
||||
S: SerialNumber=xxxxx
|
||||
C:* #Ifs= 4 Cfg#= 1 Atr=c0 MxPwr=500mA
|
||||
I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
|
||||
E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
|
||||
E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms
|
||||
I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
|
||||
E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
|
||||
E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms
|
||||
I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan
|
||||
E: Ad=83(I) Atr=03(Int.) MxPS= 64 Ivl=2ms
|
||||
E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
|
||||
E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms
|
||||
I:* If#= 3 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage
|
||||
E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
|
||||
E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
|
||||
|
||||
Signed-off-by: Bjørn Mork <bjorn@mork.no>
|
||||
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/usb/serial/option.c | 6 ++++++
|
||||
1 files changed, 6 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
|
||||
index 21a4734..5960c7b 100644
|
||||
--- a/drivers/usb/serial/option.c
|
||||
+++ b/drivers/usb/serial/option.c
|
||||
@@ -553,6 +553,10 @@ static const struct option_blacklist_info net_intf1_blacklist = {
|
||||
.reserved = BIT(1),
|
||||
};
|
||||
|
||||
+static const struct option_blacklist_info net_intf2_blacklist = {
|
||||
+ .reserved = BIT(2),
|
||||
+};
|
||||
+
|
||||
static const struct option_blacklist_info net_intf3_blacklist = {
|
||||
.reserved = BIT(3),
|
||||
};
|
||||
@@ -1093,6 +1097,8 @@ static const struct usb_device_id option_ids[] = {
|
||||
{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1298, 0xff, 0xff, 0xff) },
|
||||
{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1299, 0xff, 0xff, 0xff) },
|
||||
{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1300, 0xff, 0xff, 0xff) },
|
||||
+ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1402, 0xff, 0xff, 0xff),
|
||||
+ .driver_info = (kernel_ulong_t)&net_intf2_blacklist },
|
||||
{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x2002, 0xff,
|
||||
0xff, 0xff), .driver_info = (kernel_ulong_t)&zte_k3765_z_blacklist },
|
||||
{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x2003, 0xff, 0xff, 0xff) },
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+55
@@ -0,0 +1,55 @@
|
||||
From 26b05210d9f77b9b92fb12a73da5b9f6cb1b3f07 Mon Sep 17 00:00:00 2001
|
||||
From: Gaosen Zhang <gaosen.zhang@mediatek.com>
|
||||
Date: Thu, 5 Jul 2012 21:49:00 +0800
|
||||
Subject: [PATCH 030/109] USB: option: Add MEDIATEK product ids
|
||||
|
||||
commit aacef9c561a693341566a6850c451ce3df68cb9a upstream.
|
||||
|
||||
Signed-off-by: Gaosen Zhang <gaosen.zhang@mediatek.com>
|
||||
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/usb/serial/option.c | 20 ++++++++++++++++++++
|
||||
1 files changed, 20 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
|
||||
index 5960c7b..5971c95 100644
|
||||
--- a/drivers/usb/serial/option.c
|
||||
+++ b/drivers/usb/serial/option.c
|
||||
@@ -496,6 +496,15 @@ static void option_instat_callback(struct urb *urb);
|
||||
|
||||
/* MediaTek products */
|
||||
#define MEDIATEK_VENDOR_ID 0x0e8d
|
||||
+#define MEDIATEK_PRODUCT_DC_1COM 0x00a0
|
||||
+#define MEDIATEK_PRODUCT_DC_4COM 0x00a5
|
||||
+#define MEDIATEK_PRODUCT_DC_5COM 0x00a4
|
||||
+#define MEDIATEK_PRODUCT_7208_1COM 0x7101
|
||||
+#define MEDIATEK_PRODUCT_7208_2COM 0x7102
|
||||
+#define MEDIATEK_PRODUCT_FP_1COM 0x0003
|
||||
+#define MEDIATEK_PRODUCT_FP_2COM 0x0023
|
||||
+#define MEDIATEK_PRODUCT_FPDC_1COM 0x0043
|
||||
+#define MEDIATEK_PRODUCT_FPDC_2COM 0x0033
|
||||
|
||||
/* Cellient products */
|
||||
#define CELLIENT_VENDOR_ID 0x2692
|
||||
@@ -1240,6 +1249,17 @@ static const struct usb_device_id option_ids[] = {
|
||||
{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x00a1, 0xff, 0x02, 0x01) },
|
||||
{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x00a2, 0xff, 0x00, 0x00) },
|
||||
{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x00a2, 0xff, 0x02, 0x01) }, /* MediaTek MT6276M modem & app port */
|
||||
+ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_1COM, 0x0a, 0x00, 0x00) },
|
||||
+ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_5COM, 0xff, 0x02, 0x01) },
|
||||
+ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_5COM, 0xff, 0x00, 0x00) },
|
||||
+ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM, 0xff, 0x02, 0x01) },
|
||||
+ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM, 0xff, 0x00, 0x00) },
|
||||
+ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_7208_1COM, 0x02, 0x00, 0x00) },
|
||||
+ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_7208_2COM, 0x02, 0x02, 0x01) },
|
||||
+ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_FP_1COM, 0x0a, 0x00, 0x00) },
|
||||
+ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_FP_2COM, 0x0a, 0x00, 0x00) },
|
||||
+ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_FPDC_1COM, 0x0a, 0x00, 0x00) },
|
||||
+ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_FPDC_2COM, 0x0a, 0x00, 0x00) },
|
||||
{ USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MEN200) },
|
||||
{ } /* Terminating entry */
|
||||
};
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+78
@@ -0,0 +1,78 @@
|
||||
From 6fb488dec8482c866a2c7cd4d1da06b85b8b28c7 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
|
||||
Date: Mon, 2 Jul 2012 10:33:14 +0200
|
||||
Subject: [PATCH 031/109] USB: cdc-wdm: fix lockup on error in wdm_read
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
commit b086b6b10d9f182cd8d2f0dcfd7fd11edba93fc9 upstream.
|
||||
|
||||
Clear the WDM_READ flag on empty reads to avoid running
|
||||
forever in an infinite tight loop, causing lockups:
|
||||
|
||||
Jul 1 21:58:11 nemi kernel: [ 3658.898647] qmi_wwan 2-1:1.2: Unexpected error -71
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072021] BUG: soft lockup - CPU#0 stuck for 23s! [qmi.pl:12235]
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072212] CPU 0
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072355]
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072367] Pid: 12235, comm: qmi.pl Tainted: P O 3.5.0-rc2+ #13 LENOVO 2776LEG/2776LEG
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072383] RIP: 0010:[<ffffffffa0635008>] [<ffffffffa0635008>] spin_unlock_irq+0x8/0xc [cdc_wdm]
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072388] RSP: 0018:ffff88022dca1e70 EFLAGS: 00000282
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072393] RAX: ffff88022fc3f650 RBX: ffffffff811c56f7 RCX: 00000001000ce8c1
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072398] RDX: 0000000000000010 RSI: 000000000267d810 RDI: ffff88022fc3f650
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072403] RBP: ffff88022dca1eb0 R08: ffffffffa063578e R09: 0000000000000000
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072407] R10: 0000000000000008 R11: 0000000000000246 R12: 0000000000000002
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072412] R13: 0000000000000246 R14: ffffffff00000002 R15: ffff8802281d8c88
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072418] FS: 00007f666a260700(0000) GS:ffff88023bc00000(0000) knlGS:0000000000000000
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072423] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072428] CR2: 000000000270d9d8 CR3: 000000022e865000 CR4: 00000000000007f0
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072433] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072438] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072444] Process qmi.pl (pid: 12235, threadinfo ffff88022dca0000, task ffff88022ff76380)
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072448] Stack:
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072458] ffffffffa063592e 0000000100020000 ffff88022fc3f650 ffff88022fc3f6a8
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072466] 0000000000000200 0000000100000000 000000000267d810 0000000000000000
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072475] 0000000000000000 ffff880212cfb6d0 0000000000000200 ffff880212cfb6c0
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072479] Call Trace:
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072489] [<ffffffffa063592e>] ? wdm_read+0x1a0/0x263 [cdc_wdm]
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072500] [<ffffffff8110adb7>] ? vfs_read+0xa1/0xfb
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072509] [<ffffffff81040589>] ? alarm_setitimer+0x35/0x64
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072517] [<ffffffff8110aec7>] ? sys_read+0x45/0x6e
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072525] [<ffffffff813725f9>] ? system_call_fastpath+0x16/0x1b
|
||||
Jul 1 21:58:36 nemi kernel: [ 3684.072557] Code: <66> 66 90 c3 83 ff ed 89 f8 74 16 7f 06 83 ff a1 75 0a c3 83 ff f4
|
||||
|
||||
The WDM_READ flag is normally cleared by wdm_int_callback
|
||||
before resubmitting the read urb, and set by wdm_in_callback
|
||||
when this urb returns with data or an error. But a crashing
|
||||
device may cause both a read error and cancelling all urbs.
|
||||
Make sure that the flag is cleared by wdm_read if the buffer
|
||||
is empty.
|
||||
|
||||
We don't clear the flag on errors, as there may be pending
|
||||
data in the buffer which should be processed. The flag will
|
||||
instead be cleared on the next wdm_read call.
|
||||
|
||||
Signed-off-by: Bjørn Mork <bjorn@mork.no>
|
||||
Acked-by: Oliver Neukum <oneukum@suse.de>
|
||||
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/usb/class/cdc-wdm.c | 2 ++
|
||||
1 files changed, 2 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
|
||||
index 19fb5fa..9aaed0d 100644
|
||||
--- a/drivers/usb/class/cdc-wdm.c
|
||||
+++ b/drivers/usb/class/cdc-wdm.c
|
||||
@@ -473,6 +473,8 @@ retry:
|
||||
goto retry;
|
||||
}
|
||||
if (!desc->reslength) { /* zero length read */
|
||||
+ dev_dbg(&desc->intf->dev, "%s: zero length - clearing WDM_READ\n", __func__);
|
||||
+ clear_bit(WDM_READ, &desc->flags);
|
||||
spin_unlock_irq(&desc->iuspin);
|
||||
goto retry;
|
||||
}
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+94
@@ -0,0 +1,94 @@
|
||||
From bb5561cb838492a05e5dae25114d768828fe2dfe Mon Sep 17 00:00:00 2001
|
||||
From: Herton Ronaldo Krzesinski <herton.krzesinski@canonical.com>
|
||||
Date: Wed, 16 May 2012 16:21:52 -0300
|
||||
Subject: [PATCH 032/109] mtd: nandsim: don't open code a do_div helper
|
||||
|
||||
commit 596fd46268634082314b3af1ded4612e1b7f3f03 upstream.
|
||||
|
||||
We don't need to open code the divide function, just use div_u64 that
|
||||
already exists and do the same job. While this is a straightforward
|
||||
clean up, there is more to that, the real motivation for this.
|
||||
|
||||
While building on a cross compiling environment in armel, using gcc
|
||||
4.6.3 (Ubuntu/Linaro 4.6.3-1ubuntu5), I was getting the following build
|
||||
error:
|
||||
|
||||
ERROR: "__aeabi_uldivmod" [drivers/mtd/nand/nandsim.ko] undefined!
|
||||
|
||||
After investigating with objdump and hand built assembly version
|
||||
generated with the compiler, I narrowed __aeabi_uldivmod as being
|
||||
generated from the divide function. When nandsim.c is built with
|
||||
-fno-inline-functions-called-once, that happens when
|
||||
CONFIG_DEBUG_SECTION_MISMATCH is enabled, the do_div optimization in
|
||||
arch/arm/include/asm/div64.h doesn't work as expected with the open
|
||||
coded divide function: even if the do_div we are using doesn't have a
|
||||
constant divisor, the compiler still includes the else parts of the
|
||||
optimized do_div macro, and translates the divisions there to use
|
||||
__aeabi_uldivmod, instead of only calling __do_div_asm -> __do_div64 and
|
||||
optimizing/removing everything else out.
|
||||
|
||||
So to reproduce, gcc 4.6 plus CONFIG_DEBUG_SECTION_MISMATCH=y and
|
||||
CONFIG_MTD_NAND_NANDSIM=m should do it, building on armel.
|
||||
|
||||
After this change, the compiler does the intended thing even with
|
||||
-fno-inline-functions-called-once, and optimizes out as expected the
|
||||
constant handling in the optimized do_div on arm. As this also avoids a
|
||||
build issue, I'm marking for Stable, as I think is applicable for this
|
||||
case.
|
||||
|
||||
Signed-off-by: Herton Ronaldo Krzesinski <herton.krzesinski@canonical.com>
|
||||
Acked-by: Nicolas Pitre <nico@linaro.org>
|
||||
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
|
||||
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/mtd/nand/nandsim.c | 12 +++---------
|
||||
1 files changed, 3 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
|
||||
index 34c03be..83e8e1b 100644
|
||||
--- a/drivers/mtd/nand/nandsim.c
|
||||
+++ b/drivers/mtd/nand/nandsim.c
|
||||
@@ -28,7 +28,7 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/vmalloc.h>
|
||||
-#include <asm/div64.h>
|
||||
+#include <linux/math64.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/string.h>
|
||||
@@ -547,12 +547,6 @@ static char *get_partition_name(int i)
|
||||
return kstrdup(buf, GFP_KERNEL);
|
||||
}
|
||||
|
||||
-static uint64_t divide(uint64_t n, uint32_t d)
|
||||
-{
|
||||
- do_div(n, d);
|
||||
- return n;
|
||||
-}
|
||||
-
|
||||
/*
|
||||
* Initialize the nandsim structure.
|
||||
*
|
||||
@@ -581,7 +575,7 @@ static int init_nandsim(struct mtd_info *mtd)
|
||||
ns->geom.oobsz = mtd->oobsize;
|
||||
ns->geom.secsz = mtd->erasesize;
|
||||
ns->geom.pgszoob = ns->geom.pgsz + ns->geom.oobsz;
|
||||
- ns->geom.pgnum = divide(ns->geom.totsz, ns->geom.pgsz);
|
||||
+ ns->geom.pgnum = div_u64(ns->geom.totsz, ns->geom.pgsz);
|
||||
ns->geom.totszoob = ns->geom.totsz + (uint64_t)ns->geom.pgnum * ns->geom.oobsz;
|
||||
ns->geom.secshift = ffs(ns->geom.secsz) - 1;
|
||||
ns->geom.pgshift = chip->page_shift;
|
||||
@@ -924,7 +918,7 @@ static int setup_wear_reporting(struct mtd_info *mtd)
|
||||
|
||||
if (!rptwear)
|
||||
return 0;
|
||||
- wear_eb_count = divide(mtd->size, mtd->erasesize);
|
||||
+ wear_eb_count = div_u64(mtd->size, mtd->erasesize);
|
||||
mem = wear_eb_count * sizeof(unsigned long);
|
||||
if (mem / sizeof(unsigned long) != wear_eb_count) {
|
||||
NS_ERR("Too many erase blocks for wear reporting\n");
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+33
@@ -0,0 +1,33 @@
|
||||
From 9b4b8dd705a1a7f9c4b7c2128663d2e31b1d0265 Mon Sep 17 00:00:00 2001
|
||||
From: Santosh Nayak <santoshprasadnayak@gmail.com>
|
||||
Date: Sat, 23 Jun 2012 07:59:54 -0300
|
||||
Subject: [PATCH 033/109] dvb-core: Release semaphore on error path
|
||||
dvb_register_device()
|
||||
|
||||
commit 82163edcdfa4eb3d74516cc8e9f38dd3d039b67d upstream.
|
||||
|
||||
There is a missing "up_write()" here. Semaphore should be released
|
||||
before returning error value.
|
||||
|
||||
Signed-off-by: Santosh Nayak <santoshprasadnayak@gmail.com>
|
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/media/dvb/dvb-core/dvbdev.c | 1 +
|
||||
1 files changed, 1 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/drivers/media/dvb/dvb-core/dvbdev.c b/drivers/media/dvb/dvb-core/dvbdev.c
|
||||
index f732877..d5cda35 100644
|
||||
--- a/drivers/media/dvb/dvb-core/dvbdev.c
|
||||
+++ b/drivers/media/dvb/dvb-core/dvbdev.c
|
||||
@@ -243,6 +243,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev,
|
||||
if (minor == MAX_DVB_MINORS) {
|
||||
kfree(dvbdevfops);
|
||||
kfree(dvbdev);
|
||||
+ up_write(&minor_rwsem);
|
||||
mutex_unlock(&dvbdev_register_lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+52
@@ -0,0 +1,52 @@
|
||||
From 6c6190dbd1e0054c77445ed61dcbc70db441d4d2 Mon Sep 17 00:00:00 2001
|
||||
From: Shinya Kuribayashi <shinya.kuribayashi.px@renesas.com>
|
||||
Date: Sat, 7 Jul 2012 13:37:42 +0300
|
||||
Subject: [PATCH 034/109] hwspinlock/core: use global ID to register
|
||||
hwspinlocks on multiple devices
|
||||
|
||||
commit 476a7eeb60e70ddab138e7cb4bc44ef5ac20782e upstream.
|
||||
|
||||
Commit 300bab9770 (hwspinlock/core: register a bank of hwspinlocks in a
|
||||
single API call, 2011-09-06) introduced 'hwspin_lock_register_single()'
|
||||
to register numerous (a bank of) hwspinlock instances in a single API,
|
||||
'hwspin_lock_register()'.
|
||||
|
||||
At which time, 'hwspin_lock_register()' accidentally passes 'local IDs'
|
||||
to 'hwspin_lock_register_single()', despite that ..._single() requires
|
||||
'global IDs' to register hwspinlocks.
|
||||
|
||||
We have to convert into global IDs by supplying the missing 'base_id'.
|
||||
|
||||
Signed-off-by: Shinya Kuribayashi <shinya.kuribayashi.px@renesas.com>
|
||||
[ohad: fix error path of hwspin_lock_register, too]
|
||||
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/hwspinlock/hwspinlock_core.c | 4 ++--
|
||||
1 files changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/drivers/hwspinlock/hwspinlock_core.c b/drivers/hwspinlock/hwspinlock_core.c
|
||||
index 61c9cf1..1201a15 100644
|
||||
--- a/drivers/hwspinlock/hwspinlock_core.c
|
||||
+++ b/drivers/hwspinlock/hwspinlock_core.c
|
||||
@@ -345,7 +345,7 @@ int hwspin_lock_register(struct hwspinlock_device *bank, struct device *dev,
|
||||
spin_lock_init(&hwlock->lock);
|
||||
hwlock->bank = bank;
|
||||
|
||||
- ret = hwspin_lock_register_single(hwlock, i);
|
||||
+ ret = hwspin_lock_register_single(hwlock, base_id + i);
|
||||
if (ret)
|
||||
goto reg_failed;
|
||||
}
|
||||
@@ -354,7 +354,7 @@ int hwspin_lock_register(struct hwspinlock_device *bank, struct device *dev,
|
||||
|
||||
reg_failed:
|
||||
while (--i >= 0)
|
||||
- hwspin_lock_unregister_single(i);
|
||||
+ hwspin_lock_unregister_single(base_id + i);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hwspin_lock_register);
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+116
@@ -0,0 +1,116 @@
|
||||
From 65719aa5de077d1ccbfe535e9b934d6e91d11601 Mon Sep 17 00:00:00 2001
|
||||
From: Dan Williams <dan.j.williams@intel.com>
|
||||
Date: Fri, 22 Jun 2012 10:52:34 -0700
|
||||
Subject: [PATCH 035/109] libsas: fix taskfile corruption in
|
||||
sas_ata_qc_fill_rtf
|
||||
|
||||
commit 6ef1b512f4e6f936d89aa20be3d97a7ec7c290ac upstream.
|
||||
|
||||
fill_result_tf() grabs the taskfile flags from the originating qc which
|
||||
sas_ata_qc_fill_rtf() promptly overwrites. The presence of an
|
||||
ata_taskfile in the sata_device makes it tempting to just copy the full
|
||||
contents in sas_ata_qc_fill_rtf(). However, libata really only wants
|
||||
the fis contents and expects the other portions of the taskfile to not
|
||||
be touched by ->qc_fill_rtf. To that end store a fis buffer in the
|
||||
sata_device and use ata_tf_from_fis() like every other ->qc_fill_rtf()
|
||||
implementation.
|
||||
|
||||
Reported-by: Praveen Murali <pmurali@logicube.com>
|
||||
Tested-by: Praveen Murali <pmurali@logicube.com>
|
||||
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
|
||||
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
|
||||
[bwh: Backported to 3.2: adjust context]
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/scsi/aic94xx/aic94xx_task.c | 2 +-
|
||||
drivers/scsi/libsas/sas_ata.c | 12 ++++++------
|
||||
include/scsi/libsas.h | 6 ++++--
|
||||
3 files changed, 11 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/drivers/scsi/aic94xx/aic94xx_task.c b/drivers/scsi/aic94xx/aic94xx_task.c
|
||||
index 532d212..393e7ce 100644
|
||||
--- a/drivers/scsi/aic94xx/aic94xx_task.c
|
||||
+++ b/drivers/scsi/aic94xx/aic94xx_task.c
|
||||
@@ -201,7 +201,7 @@ static void asd_get_response_tasklet(struct asd_ascb *ascb,
|
||||
|
||||
if (SAS_STATUS_BUF_SIZE >= sizeof(*resp)) {
|
||||
resp->frame_len = le16_to_cpu(*(__le16 *)(r+6));
|
||||
- memcpy(&resp->ending_fis[0], r+16, 24);
|
||||
+ memcpy(&resp->ending_fis[0], r+16, ATA_RESP_FIS_SIZE);
|
||||
ts->buf_valid_size = sizeof(*resp);
|
||||
}
|
||||
}
|
||||
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
|
||||
index db9238f..4868fc9 100644
|
||||
--- a/drivers/scsi/libsas/sas_ata.c
|
||||
+++ b/drivers/scsi/libsas/sas_ata.c
|
||||
@@ -112,12 +112,12 @@ static void sas_ata_task_done(struct sas_task *task)
|
||||
if (stat->stat == SAS_PROTO_RESPONSE || stat->stat == SAM_STAT_GOOD ||
|
||||
((stat->stat == SAM_STAT_CHECK_CONDITION &&
|
||||
dev->sata_dev.command_set == ATAPI_COMMAND_SET))) {
|
||||
- ata_tf_from_fis(resp->ending_fis, &dev->sata_dev.tf);
|
||||
+ memcpy(dev->sata_dev.fis, resp->ending_fis, ATA_RESP_FIS_SIZE);
|
||||
|
||||
if (!link->sactive) {
|
||||
- qc->err_mask |= ac_err_mask(dev->sata_dev.tf.command);
|
||||
+ qc->err_mask |= ac_err_mask(dev->sata_dev.fis[2]);
|
||||
} else {
|
||||
- link->eh_info.err_mask |= ac_err_mask(dev->sata_dev.tf.command);
|
||||
+ link->eh_info.err_mask |= ac_err_mask(dev->sata_dev.fis[2]);
|
||||
if (unlikely(link->eh_info.err_mask))
|
||||
qc->flags |= ATA_QCFLAG_FAILED;
|
||||
}
|
||||
@@ -138,8 +138,8 @@ static void sas_ata_task_done(struct sas_task *task)
|
||||
qc->flags |= ATA_QCFLAG_FAILED;
|
||||
}
|
||||
|
||||
- dev->sata_dev.tf.feature = 0x04; /* status err */
|
||||
- dev->sata_dev.tf.command = ATA_ERR;
|
||||
+ dev->sata_dev.fis[3] = 0x04; /* status err */
|
||||
+ dev->sata_dev.fis[2] = ATA_ERR;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -252,7 +252,7 @@ static bool sas_ata_qc_fill_rtf(struct ata_queued_cmd *qc)
|
||||
{
|
||||
struct domain_device *dev = qc->ap->private_data;
|
||||
|
||||
- memcpy(&qc->result_tf, &dev->sata_dev.tf, sizeof(qc->result_tf));
|
||||
+ ata_tf_from_fis(dev->sata_dev.fis, &qc->result_tf);
|
||||
return true;
|
||||
}
|
||||
|
||||
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
|
||||
index 6a308d4..1e100c6 100644
|
||||
--- a/include/scsi/libsas.h
|
||||
+++ b/include/scsi/libsas.h
|
||||
@@ -159,6 +159,8 @@ enum ata_command_set {
|
||||
ATAPI_COMMAND_SET = 1,
|
||||
};
|
||||
|
||||
+#define ATA_RESP_FIS_SIZE 24
|
||||
+
|
||||
struct sata_device {
|
||||
enum ata_command_set command_set;
|
||||
struct smp_resp rps_resp; /* report_phy_sata_resp */
|
||||
@@ -170,7 +172,7 @@ struct sata_device {
|
||||
|
||||
struct ata_port *ap;
|
||||
struct ata_host ata_host;
|
||||
- struct ata_taskfile tf;
|
||||
+ u8 fis[ATA_RESP_FIS_SIZE];
|
||||
u32 sstatus;
|
||||
u32 serror;
|
||||
u32 scontrol;
|
||||
@@ -486,7 +488,7 @@ enum exec_status {
|
||||
*/
|
||||
struct ata_task_resp {
|
||||
u16 frame_len;
|
||||
- u8 ending_fis[24]; /* dev to host or data-in */
|
||||
+ u8 ending_fis[ATA_RESP_FIS_SIZE]; /* dev to host or data-in */
|
||||
u32 sstatus;
|
||||
u32 serror;
|
||||
u32 scontrol;
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+58
@@ -0,0 +1,58 @@
|
||||
From 2710006f98cf587ce6b3108f543689de1bb6d60b Mon Sep 17 00:00:00 2001
|
||||
From: NeilBrown <neilb@suse.de>
|
||||
Date: Mon, 9 Jul 2012 11:34:13 +1000
|
||||
Subject: [PATCH 036/109] md/raid1: fix use-after-free bug in RAID1 data-check
|
||||
code.
|
||||
|
||||
commit 2d4f4f3384d4ef4f7c571448e803a1ce721113d5 upstream.
|
||||
|
||||
This bug has been present ever since data-check was introduce
|
||||
in 2.6.16. However it would only fire if a data-check were
|
||||
done on a degraded array, which was only possible if the array
|
||||
has 3 or more devices. This is certainly possible, but is quite
|
||||
uncommon.
|
||||
|
||||
Since hot-replace was added in 3.3 it can happen more often as
|
||||
the same condition can arise if not all possible replacements are
|
||||
present.
|
||||
|
||||
The problem is that as soon as we submit the last read request, the
|
||||
'r1_bio' structure could be freed at any time, so we really should
|
||||
stop looking at it. If the last device is being read from we will
|
||||
stop looking at it. However if the last device is not due to be read
|
||||
from, we will still check the bio pointer in the r1_bio, but the
|
||||
r1_bio might already be free.
|
||||
|
||||
So use the read_targets counter to make sure we stop looking for bios
|
||||
to submit as soon as we have submitted them all.
|
||||
|
||||
This fix is suitable for any -stable kernel since 2.6.16.
|
||||
|
||||
Reported-by: Arnold Schulz <arnysch@gmx.net>
|
||||
Signed-off-by: NeilBrown <neilb@suse.de>
|
||||
[bwh: Backported to 3.2: no doubling of conf->raid_disks; we don't have
|
||||
hot-replace support]
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/md/raid1.c | 3 ++-
|
||||
1 files changed, 2 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
|
||||
index 7af60ec..58f0055 100644
|
||||
--- a/drivers/md/raid1.c
|
||||
+++ b/drivers/md/raid1.c
|
||||
@@ -2378,9 +2378,10 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
|
||||
*/
|
||||
if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
|
||||
atomic_set(&r1_bio->remaining, read_targets);
|
||||
- for (i=0; i<conf->raid_disks; i++) {
|
||||
+ for (i = 0; i < conf->raid_disks && read_targets; i++) {
|
||||
bio = r1_bio->bios[i];
|
||||
if (bio->bi_end_io == end_sync_read) {
|
||||
+ read_targets--;
|
||||
md_sync_acct(bio->bi_bdev, nr_sectors);
|
||||
generic_make_request(bio);
|
||||
}
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+166
@@ -0,0 +1,166 @@
|
||||
From 45cd1f6207fb66990e5f25e11fb4cd9486c31794 Mon Sep 17 00:00:00 2001
|
||||
From: Alan Stern <stern@rowland.harvard.edu>
|
||||
Date: Mon, 9 Jul 2012 11:09:21 -0400
|
||||
Subject: [PATCH 037/109] PCI: EHCI: fix crash during suspend on ASUS
|
||||
computers
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
commit dbf0e4c7257f8d684ec1a3c919853464293de66e upstream.
|
||||
|
||||
Quite a few ASUS computers experience a nasty problem, related to the
|
||||
EHCI controllers, when going into system suspend. It was observed
|
||||
that the problem didn't occur if the controllers were not put into the
|
||||
D3 power state before starting the suspend, and commit
|
||||
151b61284776be2d6f02d48c23c3625678960b97 (USB: EHCI: fix crash during
|
||||
suspend on ASUS computers) was created to do this.
|
||||
|
||||
It turned out this approach messed up other computers that didn't have
|
||||
the problem -- it prevented USB wakeup from working. Consequently
|
||||
commit c2fb8a3fa25513de8fedb38509b1f15a5bbee47b (USB: add
|
||||
NO_D3_DURING_SLEEP flag and revert 151b61284776be2) was merged; it
|
||||
reverted the earlier commit and added a whitelist of known good board
|
||||
names.
|
||||
|
||||
Now we know the actual cause of the problem. Thanks to AceLan Kao for
|
||||
tracking it down.
|
||||
|
||||
According to him, an engineer at ASUS explained that some of their
|
||||
BIOSes contain a bug that was added in an attempt to work around a
|
||||
problem in early versions of Windows. When the computer goes into S3
|
||||
suspend, the BIOS tries to verify that the EHCI controllers were first
|
||||
quiesced by the OS. Nothing's wrong with this, but the BIOS does it
|
||||
by checking that the PCI COMMAND registers contain 0 without checking
|
||||
the controllers' power state. If the register isn't 0, the BIOS
|
||||
assumes the controller needs to be quiesced and tries to do so. This
|
||||
involves making various MMIO accesses to the controller, which don't
|
||||
work very well if the controller is already in D3. The end result is
|
||||
a system hang or memory corruption.
|
||||
|
||||
Since the value in the PCI COMMAND register doesn't matter once the
|
||||
controller has been suspended, and since the value will be restored
|
||||
anyway when the controller is resumed, we can work around the BIOS bug
|
||||
simply by setting the register to 0 during system suspend. This patch
|
||||
(as1590) does so and also reverts the second commit mentioned above,
|
||||
which is now unnecessary.
|
||||
|
||||
In theory we could do this for every PCI device. However to avoid
|
||||
introducing new problems, the patch restricts itself to EHCI host
|
||||
controllers.
|
||||
|
||||
Finally the affected systems can suspend with USB wakeup working
|
||||
properly.
|
||||
|
||||
Reference: https://bugzilla.kernel.org/show_bug.cgi?id=37632
|
||||
Reference: https://bugzilla.kernel.org/show_bug.cgi?id=42728
|
||||
Based-on-patch-by: AceLan Kao <acelan.kao@canonical.com>
|
||||
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
|
||||
Tested-by: Dâniel Fraga <fragabr@gmail.com>
|
||||
Tested-by: Javier Marcet <jmarcet@gmail.com>
|
||||
Tested-by: Andrey Rahmatullin <wrar@wrar.name>
|
||||
Tested-by: Oleksij Rempel <bug-track@fisher-privat.net>
|
||||
Tested-by: Pavel Pisa <pisa@cmp.felk.cvut.cz>
|
||||
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
|
||||
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
|
||||
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/pci/pci-driver.c | 12 ++++++++++++
|
||||
drivers/pci/pci.c | 5 -----
|
||||
drivers/pci/quirks.c | 26 --------------------------
|
||||
include/linux/pci.h | 2 --
|
||||
4 files changed, 12 insertions(+), 33 deletions(-)
|
||||
|
||||
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
|
||||
index 12d1e81..d024f83 100644
|
||||
--- a/drivers/pci/pci-driver.c
|
||||
+++ b/drivers/pci/pci-driver.c
|
||||
@@ -742,6 +742,18 @@ static int pci_pm_suspend_noirq(struct device *dev)
|
||||
|
||||
pci_pm_set_unknown_state(pci_dev);
|
||||
|
||||
+ /*
|
||||
+ * Some BIOSes from ASUS have a bug: If a USB EHCI host controller's
|
||||
+ * PCI COMMAND register isn't 0, the BIOS assumes that the controller
|
||||
+ * hasn't been quiesced and tries to turn it off. If the controller
|
||||
+ * is already in D3, this can hang or cause memory corruption.
|
||||
+ *
|
||||
+ * Since the value of the COMMAND register doesn't matter once the
|
||||
+ * device has been suspended, we can safely set it to 0 here.
|
||||
+ */
|
||||
+ if (pci_dev->class == PCI_CLASS_SERIAL_USB_EHCI)
|
||||
+ pci_write_config_word(pci_dev, PCI_COMMAND, 0);
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
|
||||
index e5b75eb..6d4a531 100644
|
||||
--- a/drivers/pci/pci.c
|
||||
+++ b/drivers/pci/pci.c
|
||||
@@ -1689,11 +1689,6 @@ int pci_prepare_to_sleep(struct pci_dev *dev)
|
||||
if (target_state == PCI_POWER_ERROR)
|
||||
return -EIO;
|
||||
|
||||
- /* Some devices mustn't be in D3 during system sleep */
|
||||
- if (target_state == PCI_D3hot &&
|
||||
- (dev->dev_flags & PCI_DEV_FLAGS_NO_D3_DURING_SLEEP))
|
||||
- return 0;
|
||||
-
|
||||
pci_enable_wake(dev, target_state, device_may_wakeup(&dev->dev));
|
||||
|
||||
error = pci_set_power_state(dev, target_state);
|
||||
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
|
||||
index 3c56fec..78fda9c 100644
|
||||
--- a/drivers/pci/quirks.c
|
||||
+++ b/drivers/pci/quirks.c
|
||||
@@ -2940,32 +2940,6 @@ static void __devinit disable_igfx_irq(struct pci_dev *dev)
|
||||
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0102, disable_igfx_irq);
|
||||
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x010a, disable_igfx_irq);
|
||||
|
||||
-/*
|
||||
- * The Intel 6 Series/C200 Series chipset's EHCI controllers on many
|
||||
- * ASUS motherboards will cause memory corruption or a system crash
|
||||
- * if they are in D3 while the system is put into S3 sleep.
|
||||
- */
|
||||
-static void __devinit asus_ehci_no_d3(struct pci_dev *dev)
|
||||
-{
|
||||
- const char *sys_info;
|
||||
- static const char good_Asus_board[] = "P8Z68-V";
|
||||
-
|
||||
- if (dev->dev_flags & PCI_DEV_FLAGS_NO_D3_DURING_SLEEP)
|
||||
- return;
|
||||
- if (dev->subsystem_vendor != PCI_VENDOR_ID_ASUSTEK)
|
||||
- return;
|
||||
- sys_info = dmi_get_system_info(DMI_BOARD_NAME);
|
||||
- if (sys_info && memcmp(sys_info, good_Asus_board,
|
||||
- sizeof(good_Asus_board) - 1) == 0)
|
||||
- return;
|
||||
-
|
||||
- dev_info(&dev->dev, "broken D3 during system sleep on ASUS\n");
|
||||
- dev->dev_flags |= PCI_DEV_FLAGS_NO_D3_DURING_SLEEP;
|
||||
- device_set_wakeup_capable(&dev->dev, false);
|
||||
-}
|
||||
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1c26, asus_ehci_no_d3);
|
||||
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1c2d, asus_ehci_no_d3);
|
||||
-
|
||||
static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f,
|
||||
struct pci_fixup *end)
|
||||
{
|
||||
diff --git a/include/linux/pci.h b/include/linux/pci.h
|
||||
index c0cfa0d..7cda65b 100644
|
||||
--- a/include/linux/pci.h
|
||||
+++ b/include/linux/pci.h
|
||||
@@ -176,8 +176,6 @@ enum pci_dev_flags {
|
||||
PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2,
|
||||
/* Provide indication device is assigned by a Virtual Machine Manager */
|
||||
PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) 4,
|
||||
- /* Device causes system crash if in D3 during S3 sleep */
|
||||
- PCI_DEV_FLAGS_NO_D3_DURING_SLEEP = (__force pci_dev_flags_t) 8,
|
||||
};
|
||||
|
||||
enum pci_irq_reroute_variant {
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+114
@@ -0,0 +1,114 @@
|
||||
From c9a4beeb70f62ec5976dcbb9086683fda56d6aec Mon Sep 17 00:00:00 2001
|
||||
From: Jiang Liu <jiang.liu@huawei.com>
|
||||
Date: Wed, 11 Jul 2012 14:01:52 -0700
|
||||
Subject: [PATCH 038/109] memory hotplug: fix invalid memory access caused by
|
||||
stale kswapd pointer
|
||||
|
||||
commit d8adde17e5f858427504725218c56aef90e90fc7 upstream.
|
||||
|
||||
kswapd_stop() is called to destroy the kswapd work thread when all memory
|
||||
of a NUMA node has been offlined. But kswapd_stop() only terminates the
|
||||
work thread without resetting NODE_DATA(nid)->kswapd to NULL. The stale
|
||||
pointer will prevent kswapd_run() from creating a new work thread when
|
||||
adding memory to the memory-less NUMA node again. Eventually the stale
|
||||
pointer may cause invalid memory access.
|
||||
|
||||
An example stack dump as below. It's reproduced with 2.6.32, but latest
|
||||
kernel has the same issue.
|
||||
|
||||
BUG: unable to handle kernel NULL pointer dereference at (null)
|
||||
IP: [<ffffffff81051a94>] exit_creds+0x12/0x78
|
||||
PGD 0
|
||||
Oops: 0000 [#1] SMP
|
||||
last sysfs file: /sys/devices/system/memory/memory391/state
|
||||
CPU 11
|
||||
Modules linked in: cpufreq_conservative cpufreq_userspace cpufreq_powersave acpi_cpufreq microcode fuse loop dm_mod tpm_tis rtc_cmos i2c_i801 rtc_core tpm serio_raw pcspkr sg tpm_bios igb i2c_core iTCO_wdt rtc_lib mptctl iTCO_vendor_support button dca bnx2 usbhid hid uhci_hcd ehci_hcd usbcore sd_mod crc_t10dif edd ext3 mbcache jbd fan ide_pci_generic ide_core ata_generic ata_piix libata thermal processor thermal_sys hwmon mptsas mptscsih mptbase scsi_transport_sas scsi_mod
|
||||
Pid: 7949, comm: sh Not tainted 2.6.32.12-qiuxishi-5-default #92 Tecal RH2285
|
||||
RIP: 0010:exit_creds+0x12/0x78
|
||||
RSP: 0018:ffff8806044f1d78 EFLAGS: 00010202
|
||||
RAX: 0000000000000000 RBX: ffff880604f22140 RCX: 0000000000019502
|
||||
RDX: 0000000000000000 RSI: 0000000000000202 RDI: 0000000000000000
|
||||
RBP: ffff880604f22150 R08: 0000000000000000 R09: ffffffff81a4dc10
|
||||
R10: 00000000000032a0 R11: ffff880006202500 R12: 0000000000000000
|
||||
R13: 0000000000c40000 R14: 0000000000008000 R15: 0000000000000001
|
||||
FS: 00007fbc03d066f0(0000) GS:ffff8800282e0000(0000) knlGS:0000000000000000
|
||||
CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
|
||||
CR2: 0000000000000000 CR3: 000000060f029000 CR4: 00000000000006e0
|
||||
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
|
||||
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
|
||||
Process sh (pid: 7949, threadinfo ffff8806044f0000, task ffff880603d7c600)
|
||||
Stack:
|
||||
ffff880604f22140 ffffffff8103aac5 ffff880604f22140 ffffffff8104d21e
|
||||
ffff880006202500 0000000000008000 0000000000c38000 ffffffff810bd5b1
|
||||
0000000000000000 ffff880603d7c600 00000000ffffdd29 0000000000000003
|
||||
Call Trace:
|
||||
__put_task_struct+0x5d/0x97
|
||||
kthread_stop+0x50/0x58
|
||||
offline_pages+0x324/0x3da
|
||||
memory_block_change_state+0x179/0x1db
|
||||
store_mem_state+0x9e/0xbb
|
||||
sysfs_write_file+0xd0/0x107
|
||||
vfs_write+0xad/0x169
|
||||
sys_write+0x45/0x6e
|
||||
system_call_fastpath+0x16/0x1b
|
||||
Code: ff 4d 00 0f 94 c0 84 c0 74 08 48 89 ef e8 1f fd ff ff 5b 5d 31 c0 41 5c c3 53 48 8b 87 20 06 00 00 48 89 fb 48 8b bf 18 06 00 00 <8b> 00 48 c7 83 18 06 00 00 00 00 00 00 f0 ff 0f 0f 94 c0 84 c0
|
||||
RIP exit_creds+0x12/0x78
|
||||
RSP <ffff8806044f1d78>
|
||||
CR2: 0000000000000000
|
||||
|
||||
[akpm@linux-foundation.org: add pglist_data.kswapd locking comments]
|
||||
Signed-off-by: Xishi Qiu <qiuxishi@huawei.com>
|
||||
Signed-off-by: Jiang Liu <jiang.liu@huawei.com>
|
||||
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
|
||||
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
|
||||
Acked-by: Mel Gorman <mgorman@suse.de>
|
||||
Acked-by: David Rientjes <rientjes@google.com>
|
||||
Reviewed-by: Minchan Kim <minchan@kernel.org>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
include/linux/mmzone.h | 2 +-
|
||||
mm/vmscan.c | 7 +++++--
|
||||
2 files changed, 6 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
|
||||
index 188cb2f..905b1e1 100644
|
||||
--- a/include/linux/mmzone.h
|
||||
+++ b/include/linux/mmzone.h
|
||||
@@ -652,7 +652,7 @@ typedef struct pglist_data {
|
||||
range, including holes */
|
||||
int node_id;
|
||||
wait_queue_head_t kswapd_wait;
|
||||
- struct task_struct *kswapd;
|
||||
+ struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */
|
||||
int kswapd_max_order;
|
||||
enum zone_type classzone_idx;
|
||||
} pg_data_t;
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index fbe2d2c..72cf498 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -3090,14 +3090,17 @@ int kswapd_run(int nid)
|
||||
}
|
||||
|
||||
/*
|
||||
- * Called by memory hotplug when all memory in a node is offlined.
|
||||
+ * Called by memory hotplug when all memory in a node is offlined. Caller must
|
||||
+ * hold lock_memory_hotplug().
|
||||
*/
|
||||
void kswapd_stop(int nid)
|
||||
{
|
||||
struct task_struct *kswapd = NODE_DATA(nid)->kswapd;
|
||||
|
||||
- if (kswapd)
|
||||
+ if (kswapd) {
|
||||
kthread_stop(kswapd);
|
||||
+ NODE_DATA(nid)->kswapd = NULL;
|
||||
+ }
|
||||
}
|
||||
|
||||
static int __init kswapd_init(void)
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+43
@@ -0,0 +1,43 @@
|
||||
From 827be17f16d9325db1c05f012ceab345139f7cad Mon Sep 17 00:00:00 2001
|
||||
From: Luis Henriques <luis.henriques@canonical.com>
|
||||
Date: Wed, 11 Jul 2012 14:02:10 -0700
|
||||
Subject: [PATCH 039/109] ocfs2: fix NULL pointer dereference in
|
||||
__ocfs2_change_file_space()
|
||||
|
||||
commit a4e08d001f2e50bb8b3c4eebadcf08e5535f02ee upstream.
|
||||
|
||||
As ocfs2_fallocate() will invoke __ocfs2_change_file_space() with a NULL
|
||||
as the first parameter (file), it may trigger a NULL pointer dereferrence
|
||||
due to a missing check.
|
||||
|
||||
Addresses http://bugs.launchpad.net/bugs/1006012
|
||||
|
||||
Signed-off-by: Luis Henriques <luis.henriques@canonical.com>
|
||||
Reported-by: Bret Towe <magnade@gmail.com>
|
||||
Tested-by: Bret Towe <magnade@gmail.com>
|
||||
Cc: Sunil Mushran <sunil.mushran@oracle.com>
|
||||
Acked-by: Joel Becker <jlbec@evilplan.org>
|
||||
Acked-by: Mark Fasheh <mfasheh@suse.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
fs/ocfs2/file.c | 2 +-
|
||||
1 files changed, 1 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
|
||||
index 07ee5b4..1c7d45e 100644
|
||||
--- a/fs/ocfs2/file.c
|
||||
+++ b/fs/ocfs2/file.c
|
||||
@@ -1950,7 +1950,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
|
||||
if (ret < 0)
|
||||
mlog_errno(ret);
|
||||
|
||||
- if (file->f_flags & O_SYNC)
|
||||
+ if (file && (file->f_flags & O_SYNC))
|
||||
handle->h_sync = 1;
|
||||
|
||||
ocfs2_commit_trans(osb, handle);
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+53
@@ -0,0 +1,53 @@
|
||||
From 6a918e81eb228757f20a244ee0d81c32ba7feedc Mon Sep 17 00:00:00 2001
|
||||
From: David Rientjes <rientjes@google.com>
|
||||
Date: Wed, 11 Jul 2012 14:02:13 -0700
|
||||
Subject: [PATCH 040/109] mm, thp: abort compaction if migration page cannot
|
||||
be charged to memcg
|
||||
|
||||
commit 4bf2bba3750f10aa9e62e6949bc7e8329990f01b upstream.
|
||||
|
||||
If page migration cannot charge the temporary page to the memcg,
|
||||
migrate_pages() will return -ENOMEM. This isn't considered in memory
|
||||
compaction however, and the loop continues to iterate over all
|
||||
pageblocks trying to isolate and migrate pages. If a small number of
|
||||
very large memcgs happen to be oom, however, these attempts will mostly
|
||||
be futile leading to an enormous amout of cpu consumption due to the
|
||||
page migration failures.
|
||||
|
||||
This patch will short circuit and fail memory compaction if
|
||||
migrate_pages() returns -ENOMEM. COMPACT_PARTIAL is returned in case
|
||||
some migrations were successful so that the page allocator will retry.
|
||||
|
||||
Signed-off-by: David Rientjes <rientjes@google.com>
|
||||
Acked-by: Mel Gorman <mgorman@suse.de>
|
||||
Cc: Minchan Kim <minchan@kernel.org>
|
||||
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
|
||||
Cc: Rik van Riel <riel@redhat.com>
|
||||
Cc: Andrea Arcangeli <aarcange@redhat.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
mm/compaction.c | 5 ++++-
|
||||
1 files changed, 4 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/mm/compaction.c b/mm/compaction.c
|
||||
index 8fb8a40..50f1c60 100644
|
||||
--- a/mm/compaction.c
|
||||
+++ b/mm/compaction.c
|
||||
@@ -592,8 +592,11 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
|
||||
if (err) {
|
||||
putback_lru_pages(&cc->migratepages);
|
||||
cc->nr_migratepages = 0;
|
||||
+ if (err == -ENOMEM) {
|
||||
+ ret = COMPACT_PARTIAL;
|
||||
+ goto out;
|
||||
+ }
|
||||
}
|
||||
-
|
||||
}
|
||||
|
||||
out:
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+74
@@ -0,0 +1,74 @@
|
||||
From 810c142eafb17318d32209a727060a756a57235d Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Beno=C3=AEt=20Th=C3=A9baudeau?=
|
||||
<benoit.thebaudeau@advansee.com>
|
||||
Date: Wed, 11 Jul 2012 14:02:32 -0700
|
||||
Subject: [PATCH 041/109] drivers/rtc/rtc-mxc.c: fix irq enabled interrupts
|
||||
warning
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
commit b59f6d1febd6cbe9fae4589bf72da0ed32bc69e0 upstream.
|
||||
|
||||
Fixes
|
||||
|
||||
WARNING: at irq/handle.c:146 handle_irq_event_percpu+0x19c/0x1b8()
|
||||
irq 25 handler mxc_rtc_interrupt+0x0/0xac enabled interrupts
|
||||
Modules linked in:
|
||||
(unwind_backtrace+0x0/0xf0) from (warn_slowpath_common+0x4c/0x64)
|
||||
(warn_slowpath_common+0x4c/0x64) from (warn_slowpath_fmt+0x30/0x40)
|
||||
(warn_slowpath_fmt+0x30/0x40) from (handle_irq_event_percpu+0x19c/0x1b8)
|
||||
(handle_irq_event_percpu+0x19c/0x1b8) from (handle_irq_event+0x28/0x38)
|
||||
(handle_irq_event+0x28/0x38) from (handle_level_irq+0x80/0xc4)
|
||||
(handle_level_irq+0x80/0xc4) from (generic_handle_irq+0x24/0x38)
|
||||
(generic_handle_irq+0x24/0x38) from (handle_IRQ+0x30/0x84)
|
||||
(handle_IRQ+0x30/0x84) from (avic_handle_irq+0x2c/0x4c)
|
||||
(avic_handle_irq+0x2c/0x4c) from (__irq_svc+0x40/0x60)
|
||||
Exception stack(0xc050bf60 to 0xc050bfa8)
|
||||
bf60: 00000001 00000000 003c4208 c0018e20 c050a000 c050a000 c054a4c8 c050a000
|
||||
bf80: c05157a8 4117b363 80503bb4 00000000 01000000 c050bfa8 c0018e2c c000e808
|
||||
bfa0: 60000013 ffffffff
|
||||
(__irq_svc+0x40/0x60) from (default_idle+0x1c/0x30)
|
||||
(default_idle+0x1c/0x30) from (cpu_idle+0x68/0xa8)
|
||||
(cpu_idle+0x68/0xa8) from (start_kernel+0x22c/0x26c)
|
||||
|
||||
Signed-off-by: Benoît Thébaudeau <benoit.thebaudeau@advansee.com>
|
||||
Cc: Alessandro Zummo <a.zummo@towertech.it>
|
||||
Cc: Sascha Hauer <kernel@pengutronix.de>
|
||||
Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/rtc/rtc-mxc.c | 5 +++--
|
||||
1 files changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c
|
||||
index 39e41fb..5160354 100644
|
||||
--- a/drivers/rtc/rtc-mxc.c
|
||||
+++ b/drivers/rtc/rtc-mxc.c
|
||||
@@ -191,10 +191,11 @@ static irqreturn_t mxc_rtc_interrupt(int irq, void *dev_id)
|
||||
struct platform_device *pdev = dev_id;
|
||||
struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
|
||||
void __iomem *ioaddr = pdata->ioaddr;
|
||||
+ unsigned long flags;
|
||||
u32 status;
|
||||
u32 events = 0;
|
||||
|
||||
- spin_lock_irq(&pdata->rtc->irq_lock);
|
||||
+ spin_lock_irqsave(&pdata->rtc->irq_lock, flags);
|
||||
status = readw(ioaddr + RTC_RTCISR) & readw(ioaddr + RTC_RTCIENR);
|
||||
/* clear interrupt sources */
|
||||
writew(status, ioaddr + RTC_RTCISR);
|
||||
@@ -217,7 +218,7 @@ static irqreturn_t mxc_rtc_interrupt(int irq, void *dev_id)
|
||||
rtc_update_alarm(&pdev->dev, &pdata->g_rtc_alarm);
|
||||
|
||||
rtc_update_irq(pdata->rtc, 1, events);
|
||||
- spin_unlock_irq(&pdata->rtc->irq_lock);
|
||||
+ spin_unlock_irqrestore(&pdata->rtc->irq_lock, flags);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+49
@@ -0,0 +1,49 @@
|
||||
From e3b50d463ce69af9fb5b2d4a54e6c37637c2ecef Mon Sep 17 00:00:00 2001
|
||||
From: Bob Liu <lliubbo@gmail.com>
|
||||
Date: Wed, 11 Jul 2012 14:02:35 -0700
|
||||
Subject: [PATCH 042/109] fs: ramfs: file-nommu: add SetPageUptodate()
|
||||
|
||||
commit fea9f718b3d68147f162ed2d870183ce5e0ad8d8 upstream.
|
||||
|
||||
There is a bug in the below scenario for !CONFIG_MMU:
|
||||
|
||||
1. create a new file
|
||||
2. mmap the file and write to it
|
||||
3. read the file can't get the correct value
|
||||
|
||||
Because
|
||||
|
||||
sys_read() -> generic_file_aio_read() -> simple_readpage() -> clear_page()
|
||||
|
||||
which causes the page to be zeroed.
|
||||
|
||||
Add SetPageUptodate() to ramfs_nommu_expand_for_mapping() so that
|
||||
generic_file_aio_read() do not call simple_readpage().
|
||||
|
||||
Signed-off-by: Bob Liu <lliubbo@gmail.com>
|
||||
Cc: Hugh Dickins <hughd@google.com>
|
||||
Cc: David Howells <dhowells@redhat.com>
|
||||
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
|
||||
Cc: Greg Ungerer <gerg@uclinux.org>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
fs/ramfs/file-nommu.c | 1 +
|
||||
1 files changed, 1 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
|
||||
index fbb0b47..d5378d0 100644
|
||||
--- a/fs/ramfs/file-nommu.c
|
||||
+++ b/fs/ramfs/file-nommu.c
|
||||
@@ -110,6 +110,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
|
||||
|
||||
/* prevent the page from being discarded on memory pressure */
|
||||
SetPageDirty(page);
|
||||
+ SetPageUptodate(page);
|
||||
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+53
@@ -0,0 +1,53 @@
|
||||
From 8d63c0484f5ed79f498c5bf3e5b90de29555bcb7 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Renninger <trenn@suse.de>
|
||||
Date: Thu, 12 Jul 2012 12:24:33 +0200
|
||||
Subject: [PATCH 043/109] cpufreq / ACPI: Fix not loading acpi-cpufreq driver
|
||||
regression
|
||||
|
||||
commit c4686c71a9183f76e3ef59098da5c098748672f6 upstream.
|
||||
|
||||
Commit d640113fe80e45ebd4a5b420b introduced a regression on SMP
|
||||
systems where the processor core with ACPI id zero is disabled
|
||||
(typically should be the case because of hyperthreading).
|
||||
The regression got spread through stable kernels.
|
||||
On 3.0.X it got introduced via 3.0.18.
|
||||
|
||||
Such platforms may be rare, but do exist.
|
||||
Look out for a disabled processor with acpi_id 0 in dmesg:
|
||||
ACPI: LAPIC (acpi_id[0x00] lapic_id[0x10] disabled)
|
||||
|
||||
This problem has been observed on a:
|
||||
HP Proliant BL280c G6 blade
|
||||
|
||||
This patch restricts the introduced workaround to platforms
|
||||
with nr_cpu_ids <= 1.
|
||||
|
||||
Signed-off-by: Thomas Renninger <trenn@suse.de>
|
||||
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/acpi/processor_core.c | 6 ++++--
|
||||
1 files changed, 4 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
|
||||
index c850de4..eff7222 100644
|
||||
--- a/drivers/acpi/processor_core.c
|
||||
+++ b/drivers/acpi/processor_core.c
|
||||
@@ -189,10 +189,12 @@ int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id)
|
||||
* Processor (CPU3, 0x03, 0x00000410, 0x06) {}
|
||||
* }
|
||||
*
|
||||
- * Ignores apic_id and always return 0 for CPU0's handle.
|
||||
+ * Ignores apic_id and always returns 0 for the processor
|
||||
+ * handle with acpi id 0 if nr_cpu_ids is 1.
|
||||
+ * This should be the case if SMP tables are not found.
|
||||
* Return -1 for other CPU's handle.
|
||||
*/
|
||||
- if (acpi_id == 0)
|
||||
+ if (nr_cpu_ids <= 1 && acpi_id == 0)
|
||||
return acpi_id;
|
||||
else
|
||||
return apic_id;
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+37
@@ -0,0 +1,37 @@
|
||||
From 9b12ab6f8a8a5859e0165b3510dbecae16ca98e3 Mon Sep 17 00:00:00 2001
|
||||
From: Jean Delvare <khali@linux-fr.org>
|
||||
Date: Thu, 12 Jul 2012 22:47:37 +0200
|
||||
Subject: [PATCH 044/109] hwmon: (it87) Preserve configuration register bits
|
||||
on init
|
||||
|
||||
commit 41002f8dd5938d5ad1d008ce5bfdbfe47fa7b4e8 upstream.
|
||||
|
||||
We were accidentally losing one bit in the configuration register on
|
||||
device initialization. It was reported to freeze one specific system
|
||||
right away. Properly preserve all bits we don't explicitly want to
|
||||
change in order to prevent that.
|
||||
|
||||
Reported-by: Stevie Trujillo <stevie.trujillo@gmail.com>
|
||||
Signed-off-by: Jean Delvare <khali@linux-fr.org>
|
||||
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/hwmon/it87.c | 2 +-
|
||||
1 files changed, 1 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
|
||||
index d912649..1ba7af2 100644
|
||||
--- a/drivers/hwmon/it87.c
|
||||
+++ b/drivers/hwmon/it87.c
|
||||
@@ -2086,7 +2086,7 @@ static void __devinit it87_init_device(struct platform_device *pdev)
|
||||
|
||||
/* Start monitoring */
|
||||
it87_write_value(data, IT87_REG_CONFIG,
|
||||
- (it87_read_value(data, IT87_REG_CONFIG) & 0x36)
|
||||
+ (it87_read_value(data, IT87_REG_CONFIG) & 0x3e)
|
||||
| (update_vbat ? 0x41 : 0x01));
|
||||
}
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+42
@@ -0,0 +1,42 @@
|
||||
From 81b7824449f04aec76681f7723b0f7911ad66f11 Mon Sep 17 00:00:00 2001
|
||||
From: Todd Poynor <toddpoynor@google.com>
|
||||
Date: Fri, 13 Jul 2012 15:30:48 +0900
|
||||
Subject: [PATCH 045/109] ARM: SAMSUNG: fix race in s3c_adc_start for ADC
|
||||
|
||||
commit 8265981bb439f3ecc5356fb877a6c2a6636ac88a upstream.
|
||||
|
||||
Checking for adc->ts_pend already claimed should be done with the
|
||||
lock held.
|
||||
|
||||
Signed-off-by: Todd Poynor <toddpoynor@google.com>
|
||||
Acked-by: Ben Dooks <ben-linux@fluff.org>
|
||||
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
arch/arm/plat-samsung/adc.c | 8 +++++---
|
||||
1 files changed, 5 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/arch/arm/plat-samsung/adc.c b/arch/arm/plat-samsung/adc.c
|
||||
index 33ecd0c..b1e05cc 100644
|
||||
--- a/arch/arm/plat-samsung/adc.c
|
||||
+++ b/arch/arm/plat-samsung/adc.c
|
||||
@@ -157,11 +157,13 @@ int s3c_adc_start(struct s3c_adc_client *client,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
- if (client->is_ts && adc->ts_pend)
|
||||
- return -EAGAIN;
|
||||
-
|
||||
spin_lock_irqsave(&adc->lock, flags);
|
||||
|
||||
+ if (client->is_ts && adc->ts_pend) {
|
||||
+ spin_unlock_irqrestore(&adc->lock, flags);
|
||||
+ return -EAGAIN;
|
||||
+ }
|
||||
+
|
||||
client->channel = channel;
|
||||
client->nr_samples = nr_samples;
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+116
@@ -0,0 +1,116 @@
|
||||
From 898f4d272514d19aafee8cd66b796c0553fca080 Mon Sep 17 00:00:00 2001
|
||||
From: Jeff Moyer <jmoyer@redhat.com>
|
||||
Date: Thu, 12 Jul 2012 09:43:14 -0400
|
||||
Subject: [PATCH 046/109] block: fix infinite loop in __getblk_slow
|
||||
|
||||
commit 91f68c89d8f35fe98ea04159b9a3b42d0149478f upstream.
|
||||
|
||||
Commit 080399aaaf35 ("block: don't mark buffers beyond end of disk as
|
||||
mapped") exposed a bug in __getblk_slow that causes mount to hang as it
|
||||
loops infinitely waiting for a buffer that lies beyond the end of the
|
||||
disk to become uptodate.
|
||||
|
||||
The problem was initially reported by Torsten Hilbrich here:
|
||||
|
||||
https://lkml.org/lkml/2012/6/18/54
|
||||
|
||||
and also reported independently here:
|
||||
|
||||
http://www.sysresccd.org/forums/viewtopic.php?f=13&t=4511
|
||||
|
||||
and then Richard W.M. Jones and Marcos Mello noted a few separate
|
||||
bugzillas also associated with the same issue. This patch has been
|
||||
confirmed to fix:
|
||||
|
||||
https://bugzilla.redhat.com/show_bug.cgi?id=835019
|
||||
|
||||
The main problem is here, in __getblk_slow:
|
||||
|
||||
for (;;) {
|
||||
struct buffer_head * bh;
|
||||
int ret;
|
||||
|
||||
bh = __find_get_block(bdev, block, size);
|
||||
if (bh)
|
||||
return bh;
|
||||
|
||||
ret = grow_buffers(bdev, block, size);
|
||||
if (ret < 0)
|
||||
return NULL;
|
||||
if (ret == 0)
|
||||
free_more_memory();
|
||||
}
|
||||
|
||||
__find_get_block does not find the block, since it will not be marked as
|
||||
mapped, and so grow_buffers is called to fill in the buffers for the
|
||||
associated page. I believe the for (;;) loop is there primarily to
|
||||
retry in the case of memory pressure keeping grow_buffers from
|
||||
succeeding. However, we also continue to loop for other cases, like the
|
||||
block lying beond the end of the disk. So, the fix I came up with is to
|
||||
only loop when grow_buffers fails due to memory allocation issues
|
||||
(return value of 0).
|
||||
|
||||
The attached patch was tested by myself, Torsten, and Rich, and was
|
||||
found to resolve the problem in call cases.
|
||||
|
||||
Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
|
||||
Reported-and-Tested-by: Torsten Hilbrich <torsten.hilbrich@secunet.com>
|
||||
Tested-by: Richard W.M. Jones <rjones@redhat.com>
|
||||
Reviewed-by: Josh Boyer <jwboyer@redhat.com>
|
||||
[ Jens is on vacation, taking this directly - Linus ]
|
||||
--
|
||||
Stable Notes: this patch requires backport to 3.0, 3.2 and 3.3.
|
||||
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
fs/buffer.c | 22 +++++++++++++---------
|
||||
1 files changed, 13 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/fs/buffer.c b/fs/buffer.c
|
||||
index c807931..4115eca 100644
|
||||
--- a/fs/buffer.c
|
||||
+++ b/fs/buffer.c
|
||||
@@ -1087,6 +1087,9 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
|
||||
static struct buffer_head *
|
||||
__getblk_slow(struct block_device *bdev, sector_t block, int size)
|
||||
{
|
||||
+ int ret;
|
||||
+ struct buffer_head *bh;
|
||||
+
|
||||
/* Size must be multiple of hard sectorsize */
|
||||
if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
|
||||
(size < 512 || size > PAGE_SIZE))) {
|
||||
@@ -1099,20 +1102,21 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
- for (;;) {
|
||||
- struct buffer_head * bh;
|
||||
- int ret;
|
||||
+retry:
|
||||
+ bh = __find_get_block(bdev, block, size);
|
||||
+ if (bh)
|
||||
+ return bh;
|
||||
|
||||
+ ret = grow_buffers(bdev, block, size);
|
||||
+ if (ret == 0) {
|
||||
+ free_more_memory();
|
||||
+ goto retry;
|
||||
+ } else if (ret > 0) {
|
||||
bh = __find_get_block(bdev, block, size);
|
||||
if (bh)
|
||||
return bh;
|
||||
-
|
||||
- ret = grow_buffers(bdev, block, size);
|
||||
- if (ret < 0)
|
||||
- return NULL;
|
||||
- if (ret == 0)
|
||||
- free_more_memory();
|
||||
}
|
||||
+ return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+42
@@ -0,0 +1,42 @@
|
||||
From 51b23c5c8a3aacf16acf8b723c35a23c07c37115 Mon Sep 17 00:00:00 2001
|
||||
From: Dave Jones <davej@redhat.com>
|
||||
Date: Fri, 13 Jul 2012 13:35:36 -0400
|
||||
Subject: [PATCH 047/109] Remove easily user-triggerable BUG from
|
||||
generic_setlease
|
||||
|
||||
commit 8d657eb3b43861064d36241e88d9d61c709f33f0 upstream.
|
||||
|
||||
This can be trivially triggered from userspace by passing in something unexpected.
|
||||
|
||||
kernel BUG at fs/locks.c:1468!
|
||||
invalid opcode: 0000 [#1] SMP
|
||||
RIP: 0010:generic_setlease+0xc2/0x100
|
||||
Call Trace:
|
||||
__vfs_setlease+0x35/0x40
|
||||
fcntl_setlease+0x76/0x150
|
||||
sys_fcntl+0x1c6/0x810
|
||||
system_call_fastpath+0x1a/0x1f
|
||||
|
||||
Signed-off-by: Dave Jones <davej@redhat.com>
|
||||
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
fs/locks.c | 2 +-
|
||||
1 files changed, 1 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/fs/locks.c b/fs/locks.c
|
||||
index 0d68f1f..6a64f15 100644
|
||||
--- a/fs/locks.c
|
||||
+++ b/fs/locks.c
|
||||
@@ -1465,7 +1465,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
|
||||
case F_WRLCK:
|
||||
return generic_add_lease(filp, arg, flp);
|
||||
default:
|
||||
- BUG();
|
||||
+ return -EINVAL;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(generic_setlease);
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+32
@@ -0,0 +1,32 @@
|
||||
From 8750544d6522f38e7f5722ee263d0f95941c9bd8 Mon Sep 17 00:00:00 2001
|
||||
From: Samuel Ortiz <sameo@linux.intel.com>
|
||||
Date: Thu, 10 May 2012 19:45:51 +0200
|
||||
Subject: [PATCH 048/109] NFC: Export nfc.h to userland
|
||||
|
||||
commit dbd4fcaf8d664fab4163b1f8682e41ad8bff3444 upstream.
|
||||
|
||||
The netlink commands and attributes, along with the socket structure
|
||||
definitions need to be exported.
|
||||
|
||||
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
|
||||
Signed-off-by: John W. Linville <linville@tuxdriver.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
include/linux/Kbuild | 1 +
|
||||
1 files changed, 1 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
|
||||
index bd21ecd..a3ce901 100644
|
||||
--- a/include/linux/Kbuild
|
||||
+++ b/include/linux/Kbuild
|
||||
@@ -268,6 +268,7 @@ header-y += netfilter_ipv4.h
|
||||
header-y += netfilter_ipv6.h
|
||||
header-y += netlink.h
|
||||
header-y += netrom.h
|
||||
+header-y += nfc.h
|
||||
header-y += nfs.h
|
||||
header-y += nfs2.h
|
||||
header-y += nfs3.h
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+178
@@ -0,0 +1,178 @@
|
||||
From b6807062ada796cdfde2c0f5ca59390b0c916aae Mon Sep 17 00:00:00 2001
|
||||
From: Bojan Smojver <bojan@rexursive.com>
|
||||
Date: Sun, 29 Apr 2012 22:42:06 +0200
|
||||
Subject: [PATCH 049/109] PM / Hibernate: Hibernate/thaw fixes/improvements
|
||||
|
||||
commit 5a21d489fd9541a4a66b9a500659abaca1b19a51 upstream.
|
||||
|
||||
1. Do not allocate memory for buffers from emergency pools, unless
|
||||
absolutely required. Do not warn about and do not retry non-essential
|
||||
failed allocations.
|
||||
|
||||
2. Do not check the amount of free pages left on every single page
|
||||
write, but wait until one map is completely populated and then check.
|
||||
|
||||
3. Set maximum number of pages for read buffering consistently, instead
|
||||
of inadvertently depending on the size of the sector type.
|
||||
|
||||
4. Fix copyright line, which I missed when I submitted the hibernation
|
||||
threading patch.
|
||||
|
||||
5. Dispense with bit shifting arithmetic to improve readability.
|
||||
|
||||
6. Really recalculate the number of pages required to be free after all
|
||||
allocations have been done.
|
||||
|
||||
7. Fix calculation of pages required for read buffering. Only count in
|
||||
pages that do not belong to high memory.
|
||||
|
||||
Signed-off-by: Bojan Smojver <bojan@rexursive.com>
|
||||
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
kernel/power/swap.c | 62 ++++++++++++++++++++++++++++++++-------------------
|
||||
1 files changed, 39 insertions(+), 23 deletions(-)
|
||||
|
||||
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
|
||||
index b313086..64f8f97 100644
|
||||
--- a/kernel/power/swap.c
|
||||
+++ b/kernel/power/swap.c
|
||||
@@ -6,7 +6,7 @@
|
||||
*
|
||||
* Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz>
|
||||
* Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
|
||||
- * Copyright (C) 2010 Bojan Smojver <bojan@rexursive.com>
|
||||
+ * Copyright (C) 2010-2012 Bojan Smojver <bojan@rexursive.com>
|
||||
*
|
||||
* This file is released under the GPLv2.
|
||||
*
|
||||
@@ -283,14 +283,17 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
|
||||
return -ENOSPC;
|
||||
|
||||
if (bio_chain) {
|
||||
- src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
|
||||
+ src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN |
|
||||
+ __GFP_NORETRY);
|
||||
if (src) {
|
||||
copy_page(src, buf);
|
||||
} else {
|
||||
ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */
|
||||
if (ret)
|
||||
return ret;
|
||||
- src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
|
||||
+ src = (void *)__get_free_page(__GFP_WAIT |
|
||||
+ __GFP_NOWARN |
|
||||
+ __GFP_NORETRY);
|
||||
if (src) {
|
||||
copy_page(src, buf);
|
||||
} else {
|
||||
@@ -368,12 +371,17 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
|
||||
clear_page(handle->cur);
|
||||
handle->cur_swap = offset;
|
||||
handle->k = 0;
|
||||
- }
|
||||
- if (bio_chain && low_free_pages() <= handle->reqd_free_pages) {
|
||||
- error = hib_wait_on_bio_chain(bio_chain);
|
||||
- if (error)
|
||||
- goto out;
|
||||
- handle->reqd_free_pages = reqd_free_pages();
|
||||
+
|
||||
+ if (bio_chain && low_free_pages() <= handle->reqd_free_pages) {
|
||||
+ error = hib_wait_on_bio_chain(bio_chain);
|
||||
+ if (error)
|
||||
+ goto out;
|
||||
+ /*
|
||||
+ * Recalculate the number of required free pages, to
|
||||
+ * make sure we never take more than half.
|
||||
+ */
|
||||
+ handle->reqd_free_pages = reqd_free_pages();
|
||||
+ }
|
||||
}
|
||||
out:
|
||||
return error;
|
||||
@@ -420,8 +428,9 @@ static int swap_writer_finish(struct swap_map_handle *handle,
|
||||
/* Maximum number of threads for compression/decompression. */
|
||||
#define LZO_THREADS 3
|
||||
|
||||
-/* Maximum number of pages for read buffering. */
|
||||
-#define LZO_READ_PAGES (MAP_PAGE_ENTRIES * 8)
|
||||
+/* Minimum/maximum number of pages for read buffering. */
|
||||
+#define LZO_MIN_RD_PAGES 1024
|
||||
+#define LZO_MAX_RD_PAGES 8192
|
||||
|
||||
|
||||
/**
|
||||
@@ -632,12 +641,6 @@ static int save_image_lzo(struct swap_map_handle *handle,
|
||||
}
|
||||
|
||||
/*
|
||||
- * Adjust number of free pages after all allocations have been done.
|
||||
- * We don't want to run out of pages when writing.
|
||||
- */
|
||||
- handle->reqd_free_pages = reqd_free_pages();
|
||||
-
|
||||
- /*
|
||||
* Start the CRC32 thread.
|
||||
*/
|
||||
init_waitqueue_head(&crc->go);
|
||||
@@ -658,6 +661,12 @@ static int save_image_lzo(struct swap_map_handle *handle,
|
||||
goto out_clean;
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * Adjust the number of required free pages after all allocations have
|
||||
+ * been done. We don't want to run out of pages when writing.
|
||||
+ */
|
||||
+ handle->reqd_free_pages = reqd_free_pages();
|
||||
+
|
||||
printk(KERN_INFO
|
||||
"PM: Using %u thread(s) for compression.\n"
|
||||
"PM: Compressing and saving image data (%u pages) ... ",
|
||||
@@ -1067,7 +1076,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
|
||||
unsigned i, thr, run_threads, nr_threads;
|
||||
unsigned ring = 0, pg = 0, ring_size = 0,
|
||||
have = 0, want, need, asked = 0;
|
||||
- unsigned long read_pages;
|
||||
+ unsigned long read_pages = 0;
|
||||
unsigned char **page = NULL;
|
||||
struct dec_data *data = NULL;
|
||||
struct crc_data *crc = NULL;
|
||||
@@ -1079,7 +1088,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
|
||||
nr_threads = num_online_cpus() - 1;
|
||||
nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
|
||||
|
||||
- page = vmalloc(sizeof(*page) * LZO_READ_PAGES);
|
||||
+ page = vmalloc(sizeof(*page) * LZO_MAX_RD_PAGES);
|
||||
if (!page) {
|
||||
printk(KERN_ERR "PM: Failed to allocate LZO page\n");
|
||||
ret = -ENOMEM;
|
||||
@@ -1144,15 +1153,22 @@ static int load_image_lzo(struct swap_map_handle *handle,
|
||||
}
|
||||
|
||||
/*
|
||||
- * Adjust number of pages for read buffering, in case we are short.
|
||||
+ * Set the number of pages for read buffering.
|
||||
+ * This is complete guesswork, because we'll only know the real
|
||||
+ * picture once prepare_image() is called, which is much later on
|
||||
+ * during the image load phase. We'll assume the worst case and
|
||||
+ * say that none of the image pages are from high memory.
|
||||
*/
|
||||
- read_pages = (nr_free_pages() - snapshot_get_image_size()) >> 1;
|
||||
- read_pages = clamp_val(read_pages, LZO_CMP_PAGES, LZO_READ_PAGES);
|
||||
+ if (low_free_pages() > snapshot_get_image_size())
|
||||
+ read_pages = (low_free_pages() - snapshot_get_image_size()) / 2;
|
||||
+ read_pages = clamp_val(read_pages, LZO_MIN_RD_PAGES, LZO_MAX_RD_PAGES);
|
||||
|
||||
for (i = 0; i < read_pages; i++) {
|
||||
page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ?
|
||||
__GFP_WAIT | __GFP_HIGH :
|
||||
- __GFP_WAIT);
|
||||
+ __GFP_WAIT | __GFP_NOWARN |
|
||||
+ __GFP_NORETRY);
|
||||
+
|
||||
if (!page[i]) {
|
||||
if (i < LZO_CMP_PAGES) {
|
||||
ring_size = i;
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+40
@@ -0,0 +1,40 @@
|
||||
From c83354ed38bbb4ebfa25f954d825594453b160c1 Mon Sep 17 00:00:00 2001
|
||||
From: Michal Kazior <michal.kazior@tieto.com>
|
||||
Date: Fri, 8 Jun 2012 10:55:44 +0200
|
||||
Subject: [PATCH 050/109] cfg80211: check iface combinations only when iface
|
||||
is running
|
||||
|
||||
commit f8cdddb8d61d16a156229f0910f7ecfc7a82c003 upstream.
|
||||
|
||||
Don't validate interface combinations on a stopped
|
||||
interface. Otherwise we might end up being able to
|
||||
create a new interface with a certain type, but
|
||||
won't be able to change an existing interface
|
||||
into that type.
|
||||
|
||||
This also skips some other functions when
|
||||
interface is stopped and changing interface type.
|
||||
|
||||
Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
|
||||
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
net/wireless/util.c | 2 +-
|
||||
1 files changed, 1 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/net/wireless/util.c b/net/wireless/util.c
|
||||
index d38815d..74d5292 100644
|
||||
--- a/net/wireless/util.c
|
||||
+++ b/net/wireless/util.c
|
||||
@@ -813,7 +813,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
|
||||
ntype == NL80211_IFTYPE_P2P_CLIENT))
|
||||
return -EBUSY;
|
||||
|
||||
- if (ntype != otype) {
|
||||
+ if (ntype != otype && netif_running(dev)) {
|
||||
err = cfg80211_can_change_interface(rdev, dev->ieee80211_ptr,
|
||||
ntype);
|
||||
if (err)
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+70
@@ -0,0 +1,70 @@
|
||||
From ea2ca0ebd427d4a745043e6e030619221fe9a55b Mon Sep 17 00:00:00 2001
|
||||
From: Takashi Iwai <tiwai@suse.de>
|
||||
Date: Mon, 25 Jun 2012 15:07:17 +0200
|
||||
Subject: [PATCH 051/109] intel_ips: blacklist HP ProBook laptops
|
||||
|
||||
commit 88ca518b0bb4161e5f20f8a1d9cc477cae294e54 upstream.
|
||||
|
||||
intel_ips driver spews the warning message
|
||||
"ME failed to update for more than 1s, likely hung"
|
||||
at each second endlessly on HP ProBook laptops with IronLake.
|
||||
|
||||
As this has never worked, better to blacklist the driver for now.
|
||||
|
||||
Signed-off-by: Takashi Iwai <tiwai@suse.de>
|
||||
Signed-off-by: Matthew Garrett <mjg@redhat.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/platform/x86/intel_ips.c | 22 ++++++++++++++++++++++
|
||||
1 files changed, 22 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
|
||||
index 809a3ae..b46ec11 100644
|
||||
--- a/drivers/platform/x86/intel_ips.c
|
||||
+++ b/drivers/platform/x86/intel_ips.c
|
||||
@@ -72,6 +72,7 @@
|
||||
#include <linux/string.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/timer.h>
|
||||
+#include <linux/dmi.h>
|
||||
#include <drm/i915_drm.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/processor.h>
|
||||
@@ -1505,6 +1506,24 @@ static DEFINE_PCI_DEVICE_TABLE(ips_id_table) = {
|
||||
|
||||
MODULE_DEVICE_TABLE(pci, ips_id_table);
|
||||
|
||||
+static int ips_blacklist_callback(const struct dmi_system_id *id)
|
||||
+{
|
||||
+ pr_info("Blacklisted intel_ips for %s\n", id->ident);
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
+static const struct dmi_system_id ips_blacklist[] = {
|
||||
+ {
|
||||
+ .callback = ips_blacklist_callback,
|
||||
+ .ident = "HP ProBook",
|
||||
+ .matches = {
|
||||
+ DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
|
||||
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook"),
|
||||
+ },
|
||||
+ },
|
||||
+ { } /* terminating entry */
|
||||
+};
|
||||
+
|
||||
static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id)
|
||||
{
|
||||
u64 platform_info;
|
||||
@@ -1514,6 +1533,9 @@ static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id)
|
||||
u16 htshi, trc, trc_required_mask;
|
||||
u8 tse;
|
||||
|
||||
+ if (dmi_check_system(ips_blacklist))
|
||||
+ return -ENODEV;
|
||||
+
|
||||
ips = kzalloc(sizeof(struct ips_driver), GFP_KERNEL);
|
||||
if (!ips)
|
||||
return -ENOMEM;
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+42
@@ -0,0 +1,42 @@
|
||||
From 0e6bee2eb164145946ea6ca49c4fd1e02c7177fa Mon Sep 17 00:00:00 2001
|
||||
From: Cloud Ren <cjren@qca.qualcomm.com>
|
||||
Date: Tue, 3 Jul 2012 16:51:48 +0000
|
||||
Subject: [PATCH 052/109] atl1c: fix issue of transmit queue 0 timed out
|
||||
|
||||
commit b94e52f62683dc0b00c6d1b58b80929a078c0fd5 upstream.
|
||||
|
||||
some people report atl1c could cause system hang with following
|
||||
kernel trace info:
|
||||
---------------------------------------
|
||||
WARNING: at.../net/sched/sch_generic.c:258 dev_watchdog+0x1db/0x1d0()
|
||||
...
|
||||
NETDEV WATCHDOG: eth0 (atl1c): transmit queue 0 timed out
|
||||
...
|
||||
---------------------------------------
|
||||
This is caused by netif_stop_queue calling when cable Link is down.
|
||||
So remove netif_stop_queue, because link_watch will take it over.
|
||||
|
||||
Signed-off-by: xiong <xiong@qca.qualcomm.com>
|
||||
Signed-off-by: Cloud Ren <cjren@qca.qualcomm.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
[bwh: Backported to 3.2: adjust context]
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 1 -
|
||||
1 files changed, 0 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
|
||||
index eccdcff..5ae7df7 100644
|
||||
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
|
||||
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
|
||||
@@ -267,7 +267,6 @@ static void atl1c_check_link_status(struct atl1c_adapter *adapter)
|
||||
dev_warn(&pdev->dev, "stop mac failed\n");
|
||||
atl1c_set_aspm(hw, false);
|
||||
netif_carrier_off(netdev);
|
||||
- netif_stop_queue(netdev);
|
||||
atl1c_phy_reset(hw);
|
||||
atl1c_phy_init(&adapter->hw);
|
||||
} else {
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+51
@@ -0,0 +1,51 @@
|
||||
From 6b52d1306665e9da06ac76126a97888849dbf290 Mon Sep 17 00:00:00 2001
|
||||
From: Stanislaw Gruszka <sgruszka@redhat.com>
|
||||
Date: Wed, 4 Jul 2012 13:10:02 +0200
|
||||
Subject: [PATCH 053/109] rt2x00usb: fix indexes ordering on RX queue kick
|
||||
|
||||
commit efd821182cec8c92babef6e00a95066d3252fda4 upstream.
|
||||
|
||||
On rt2x00_dmastart() we increase index specified by Q_INDEX and on
|
||||
rt2x00_dmadone() we increase index specified by Q_INDEX_DONE. So entries
|
||||
between Q_INDEX_DONE and Q_INDEX are those we currently process in the
|
||||
hardware. Entries between Q_INDEX and Q_INDEX_DONE are those we can
|
||||
submit to the hardware.
|
||||
|
||||
According to that fix rt2x00usb_kick_queue(), as we need to submit RX
|
||||
entries that are not processed by the hardware. It worked before only
|
||||
for empty queue, otherwise was broken.
|
||||
|
||||
Note that for TX queues indexes ordering are ok. We need to kick entries
|
||||
that have filled skb, but was not submitted to the hardware, i.e.
|
||||
started from Q_INDEX_DONE and have ENTRY_DATA_PENDING bit set.
|
||||
|
||||
From practical standpoint this fixes RX queue stall, usually reproducible
|
||||
in AP mode, like for example reported here:
|
||||
https://bugzilla.redhat.com/show_bug.cgi?id=828824
|
||||
|
||||
Reported-and-tested-by: Franco Miceli <fmiceli@plan.ceibal.edu.uy>
|
||||
Reported-and-tested-by: Tom Horsley <horsley1953@gmail.com>
|
||||
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
|
||||
Signed-off-by: John W. Linville <linville@tuxdriver.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/net/wireless/rt2x00/rt2x00usb.c | 2 +-
|
||||
1 files changed, 1 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c
|
||||
index 1e31050..ba28807 100644
|
||||
--- a/drivers/net/wireless/rt2x00/rt2x00usb.c
|
||||
+++ b/drivers/net/wireless/rt2x00/rt2x00usb.c
|
||||
@@ -426,8 +426,8 @@ void rt2x00usb_kick_queue(struct data_queue *queue)
|
||||
case QID_RX:
|
||||
if (!rt2x00queue_full(queue))
|
||||
rt2x00queue_for_each_entry(queue,
|
||||
- Q_INDEX_DONE,
|
||||
Q_INDEX,
|
||||
+ Q_INDEX_DONE,
|
||||
NULL,
|
||||
rt2x00usb_kick_rx_entry);
|
||||
break;
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+65
@@ -0,0 +1,65 @@
|
||||
From b7d2c1e70d2c94585ac5839e38b861bdc6d469d2 Mon Sep 17 00:00:00 2001
|
||||
From: Stanislaw Gruszka <sgruszka@redhat.com>
|
||||
Date: Wed, 4 Jul 2012 13:20:20 +0200
|
||||
Subject: [PATCH 054/109] iwlegacy: always monitor for stuck queues
|
||||
|
||||
commit c2ca7d92ed4bbd779516beb6eb226e19f7f7ab0f upstream.
|
||||
|
||||
This is iwlegacy version of:
|
||||
|
||||
commit 342bbf3fee2fa9a18147e74b2e3c4229a4564912
|
||||
Author: Johannes Berg <johannes.berg@intel.com>
|
||||
Date: Sun Mar 4 08:50:46 2012 -0800
|
||||
|
||||
iwlwifi: always monitor for stuck queues
|
||||
|
||||
If we only monitor while associated, the following
|
||||
can happen:
|
||||
- we're associated, and the queue stuck check
|
||||
runs, setting the queue "touch" time to X
|
||||
- we disassociate, stopping the monitoring,
|
||||
which leaves the time set to X
|
||||
- almost 2s later, we associate, and enqueue
|
||||
a frame
|
||||
- before the frame is transmitted, we monitor
|
||||
for stuck queues, and find the time set to
|
||||
X, although it is now later than X + 2000ms,
|
||||
so we decide that the queue is stuck and
|
||||
erroneously restart the device
|
||||
|
||||
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
|
||||
Signed-off-by: John W. Linville <linville@tuxdriver.com>
|
||||
[bwh: Backported to 3.2: adjust filename, function and variable names]
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/net/wireless/iwlegacy/iwl-core.c | 14 ++++++--------
|
||||
1 files changed, 6 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/drivers/net/wireless/iwlegacy/iwl-core.c b/drivers/net/wireless/iwlegacy/iwl-core.c
|
||||
index 2bd5659..1bb64c9 100644
|
||||
--- a/drivers/net/wireless/iwlegacy/iwl-core.c
|
||||
+++ b/drivers/net/wireless/iwlegacy/iwl-core.c
|
||||
@@ -1884,14 +1884,12 @@ void iwl_legacy_bg_watchdog(unsigned long data)
|
||||
return;
|
||||
|
||||
/* monitor and check for other stuck queues */
|
||||
- if (iwl_legacy_is_any_associated(priv)) {
|
||||
- for (cnt = 0; cnt < priv->hw_params.max_txq_num; cnt++) {
|
||||
- /* skip as we already checked the command queue */
|
||||
- if (cnt == priv->cmd_queue)
|
||||
- continue;
|
||||
- if (iwl_legacy_check_stuck_queue(priv, cnt))
|
||||
- return;
|
||||
- }
|
||||
+ for (cnt = 0; cnt < priv->hw_params.max_txq_num; cnt++) {
|
||||
+ /* skip as we already checked the command queue */
|
||||
+ if (cnt == priv->cmd_queue)
|
||||
+ continue;
|
||||
+ if (iwl_legacy_check_stuck_queue(priv, cnt))
|
||||
+ return;
|
||||
}
|
||||
|
||||
mod_timer(&priv->watchdog, jiffies +
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+51
@@ -0,0 +1,51 @@
|
||||
From 910c9012a7e02b93cc1f877aa8ef245dd1d99fbe Mon Sep 17 00:00:00 2001
|
||||
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
|
||||
Date: Wed, 4 Jul 2012 13:59:08 +0200
|
||||
Subject: [PATCH 055/109] iwlegacy: don't mess up the SCD when removing a key
|
||||
|
||||
commit b48d96652626b315229b1b82c6270eead6a77a6d upstream.
|
||||
|
||||
When we remove a key, we put a key index which was supposed
|
||||
to tell the fw that we are actually removing the key. But
|
||||
instead the fw took that index as a valid index and messed
|
||||
up the SRAM of the device.
|
||||
|
||||
This memory corruption on the device mangled the data of
|
||||
the SCD. The impact on the user is that SCD queue 2 got
|
||||
stuck after having removed keys.
|
||||
|
||||
Reported-by: Paul Bolle <pebolle@tiscali.nl>
|
||||
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
|
||||
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
|
||||
Signed-off-by: John W. Linville <linville@tuxdriver.com>
|
||||
[bwh: Backported to 3.2: adjust filename, context and variable name]
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/net/wireless/iwlegacy/iwl-4965-sta.c | 4 ++--
|
||||
1 files changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/drivers/net/wireless/iwlegacy/iwl-4965-sta.c b/drivers/net/wireless/iwlegacy/iwl-4965-sta.c
|
||||
index a262c23..0116ca8 100644
|
||||
--- a/drivers/net/wireless/iwlegacy/iwl-4965-sta.c
|
||||
+++ b/drivers/net/wireless/iwlegacy/iwl-4965-sta.c
|
||||
@@ -466,7 +466,7 @@ int iwl4965_remove_dynamic_key(struct iwl_priv *priv,
|
||||
return 0;
|
||||
}
|
||||
|
||||
- if (priv->stations[sta_id].sta.key.key_offset == WEP_INVALID_OFFSET) {
|
||||
+ if (priv->stations[sta_id].sta.key.key_flags & STA_KEY_FLG_INVALID) {
|
||||
IWL_WARN(priv, "Removing wrong key %d 0x%x\n",
|
||||
keyconf->keyidx, key_flags);
|
||||
spin_unlock_irqrestore(&priv->sta_lock, flags);
|
||||
@@ -483,7 +483,7 @@ int iwl4965_remove_dynamic_key(struct iwl_priv *priv,
|
||||
sizeof(struct iwl4965_keyinfo));
|
||||
priv->stations[sta_id].sta.key.key_flags =
|
||||
STA_KEY_FLG_NO_ENC | STA_KEY_FLG_INVALID;
|
||||
- priv->stations[sta_id].sta.key.key_offset = WEP_INVALID_OFFSET;
|
||||
+ priv->stations[sta_id].sta.key.key_offset = keyconf->hw_key_idx;
|
||||
priv->stations[sta_id].sta.sta.modify_mask = STA_MODIFY_KEY_MASK;
|
||||
priv->stations[sta_id].sta.mode = STA_CONTROL_MODIFY_MSK;
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+39
@@ -0,0 +1,39 @@
|
||||
From c82dafb3ba87352cb605641f7d709ec76dc64168 Mon Sep 17 00:00:00 2001
|
||||
From: Tushar Dave <tushar.n.dave@intel.com>
|
||||
Date: Thu, 12 Jul 2012 08:56:56 +0000
|
||||
Subject: [PATCH 056/109] e1000e: Correct link check logic for 82571 serdes
|
||||
|
||||
commit d0efa8f23a644f7cb7d1f8e78dd9a223efa412a3 upstream.
|
||||
|
||||
SYNCH bit and IV bit of RXCW register are sticky. Before examining these bits,
|
||||
RXCW should be read twice to filter out one-time false events and have correct
|
||||
values for these bits. Incorrect values of these bits in link check logic can
|
||||
cause weird link stability issues if auto-negotiation fails.
|
||||
|
||||
Reported-by: Dean Nelson <dnelson@redhat.com>
|
||||
Signed-off-by: Tushar Dave <tushar.n.dave@intel.com>
|
||||
Reviewed-by: Bruce Allan <bruce.w.allan@intel.com>
|
||||
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
|
||||
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/net/ethernet/intel/e1000e/82571.c | 3 +++
|
||||
1 files changed, 3 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c
|
||||
index e556fc3..3072d35 100644
|
||||
--- a/drivers/net/ethernet/intel/e1000e/82571.c
|
||||
+++ b/drivers/net/ethernet/intel/e1000e/82571.c
|
||||
@@ -1571,6 +1571,9 @@ static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw)
|
||||
ctrl = er32(CTRL);
|
||||
status = er32(STATUS);
|
||||
rxcw = er32(RXCW);
|
||||
+ /* SYNCH bit and IV bit are sticky */
|
||||
+ udelay(10);
|
||||
+ rxcw = er32(RXCW);
|
||||
|
||||
if ((rxcw & E1000_RXCW_SYNCH) && !(rxcw & E1000_RXCW_IV)) {
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+39
@@ -0,0 +1,39 @@
|
||||
From f846f3528f3dcd02646a919a50696d026e0864ae Mon Sep 17 00:00:00 2001
|
||||
From: Mark Rustad <mark.d.rustad@intel.com>
|
||||
Date: Fri, 13 Jul 2012 18:18:04 -0700
|
||||
Subject: [PATCH 057/109] tcm_fc: Fix crash seen with aborts and large reads
|
||||
|
||||
commit 3cc5d2a6b9a2fd1bf024aa5e52dd22961eecaf13 upstream.
|
||||
|
||||
This patch fixes a crash seen when large reads have their exchange
|
||||
aborted by either timing out or being reset. Because the exchange
|
||||
abort results in the seq pointer being set to NULL, because the
|
||||
sequence is no longer valid, it must not be dereferenced. This
|
||||
patch changes the function ft_get_task_tag to return ~0 if it is
|
||||
unable to get the tag for this reason. Because the get_task_tag
|
||||
interface provides no means of returning an error, this seems
|
||||
like the best way to fix this issue at the moment.
|
||||
|
||||
Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
|
||||
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/target/tcm_fc/tfc_cmd.c | 2 ++
|
||||
1 files changed, 2 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c
|
||||
index d95cfe2..278819c 100644
|
||||
--- a/drivers/target/tcm_fc/tfc_cmd.c
|
||||
+++ b/drivers/target/tcm_fc/tfc_cmd.c
|
||||
@@ -249,6 +249,8 @@ u32 ft_get_task_tag(struct se_cmd *se_cmd)
|
||||
{
|
||||
struct ft_cmd *cmd = container_of(se_cmd, struct ft_cmd, se_cmd);
|
||||
|
||||
+ if (cmd->aborted)
|
||||
+ return ~0;
|
||||
return fc_seq_exch(cmd->seq)->rxid;
|
||||
}
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+115
@@ -0,0 +1,115 @@
|
||||
From 13d0304203a528b1c1c76b5c9b6f5b8dc093f996 Mon Sep 17 00:00:00 2001
|
||||
From: Anders Kaseorg <andersk@MIT.EDU>
|
||||
Date: Sun, 15 Jul 2012 17:14:25 -0400
|
||||
Subject: [PATCH 058/109] fifo: Do not restart open() if it already found a
|
||||
partner
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
commit 05d290d66be6ef77a0b962ebecf01911bd984a78 upstream.
|
||||
|
||||
If a parent and child process open the two ends of a fifo, and the
|
||||
child immediately exits, the parent may receive a SIGCHLD before its
|
||||
open() returns. In that case, we need to make sure that open() will
|
||||
return successfully after the SIGCHLD handler returns, instead of
|
||||
throwing EINTR or being restarted. Otherwise, the restarted open()
|
||||
would incorrectly wait for a second partner on the other end.
|
||||
|
||||
The following test demonstrates the EINTR that was wrongly thrown from
|
||||
the parent’s open(). Change .sa_flags = 0 to .sa_flags = SA_RESTART
|
||||
to see a deadlock instead, in which the restarted open() waits for a
|
||||
second reader that will never come. (On my systems, this happens
|
||||
pretty reliably within about 5 to 500 iterations. Others report that
|
||||
it manages to loop ~forever sometimes; YMMV.)
|
||||
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <fcntl.h>
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define CHECK(x) do if ((x) == -1) {perror(#x); abort();} while(0)
|
||||
|
||||
void handler(int signum) {}
|
||||
|
||||
int main()
|
||||
{
|
||||
struct sigaction act = {.sa_handler = handler, .sa_flags = 0};
|
||||
CHECK(sigaction(SIGCHLD, &act, NULL));
|
||||
CHECK(mknod("fifo", S_IFIFO | S_IRWXU, 0));
|
||||
for (;;) {
|
||||
int fd;
|
||||
pid_t pid;
|
||||
putc('.', stderr);
|
||||
CHECK(pid = fork());
|
||||
if (pid == 0) {
|
||||
CHECK(fd = open("fifo", O_RDONLY));
|
||||
_exit(0);
|
||||
}
|
||||
CHECK(fd = open("fifo", O_WRONLY));
|
||||
CHECK(close(fd));
|
||||
CHECK(waitpid(pid, NULL, 0));
|
||||
}
|
||||
}
|
||||
|
||||
This is what I suspect was causing the Git test suite to fail in
|
||||
t9010-svn-fe.sh:
|
||||
|
||||
http://bugs.debian.org/678852
|
||||
|
||||
Signed-off-by: Anders Kaseorg <andersk@mit.edu>
|
||||
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
|
||||
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
fs/fifo.c | 9 ++++-----
|
||||
1 files changed, 4 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/fs/fifo.c b/fs/fifo.c
|
||||
index b1a524d..cf6f434 100644
|
||||
--- a/fs/fifo.c
|
||||
+++ b/fs/fifo.c
|
||||
@@ -14,7 +14,7 @@
|
||||
#include <linux/sched.h>
|
||||
#include <linux/pipe_fs_i.h>
|
||||
|
||||
-static void wait_for_partner(struct inode* inode, unsigned int *cnt)
|
||||
+static int wait_for_partner(struct inode* inode, unsigned int *cnt)
|
||||
{
|
||||
int cur = *cnt;
|
||||
|
||||
@@ -23,6 +23,7 @@ static void wait_for_partner(struct inode* inode, unsigned int *cnt)
|
||||
if (signal_pending(current))
|
||||
break;
|
||||
}
|
||||
+ return cur == *cnt ? -ERESTARTSYS : 0;
|
||||
}
|
||||
|
||||
static void wake_up_partner(struct inode* inode)
|
||||
@@ -67,8 +68,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
|
||||
* seen a writer */
|
||||
filp->f_version = pipe->w_counter;
|
||||
} else {
|
||||
- wait_for_partner(inode, &pipe->w_counter);
|
||||
- if(signal_pending(current))
|
||||
+ if (wait_for_partner(inode, &pipe->w_counter))
|
||||
goto err_rd;
|
||||
}
|
||||
}
|
||||
@@ -90,8 +90,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
|
||||
wake_up_partner(inode);
|
||||
|
||||
if (!pipe->readers) {
|
||||
- wait_for_partner(inode, &pipe->r_counter);
|
||||
- if (signal_pending(current))
|
||||
+ if (wait_for_partner(inode, &pipe->r_counter))
|
||||
goto err_wr;
|
||||
}
|
||||
break;
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+55
@@ -0,0 +1,55 @@
|
||||
From ab68c7b575aff70124f83d2ec207d06c60eea003 Mon Sep 17 00:00:00 2001
|
||||
From: Roland Dreier <roland@purestorage.com>
|
||||
Date: Mon, 16 Jul 2012 15:17:10 -0700
|
||||
Subject: [PATCH 059/109] target: Clean up returning errors in PR handling
|
||||
code
|
||||
|
||||
commit d35212f3ca3bf4fb49d15e37f530c9931e2d2183 upstream.
|
||||
|
||||
- instead of (PTR_ERR(file) < 0) just use IS_ERR(file)
|
||||
- return -EINVAL instead of EINVAL
|
||||
- all other error returns in target_scsi3_emulate_pr_out() use
|
||||
"goto out" -- get rid of the one remaining straight "return."
|
||||
|
||||
Signed-off-by: Roland Dreier <roland@purestorage.com>
|
||||
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/target/target_core_pr.c | 7 ++++---
|
||||
1 files changed, 4 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
|
||||
index b75bc92..9145141 100644
|
||||
--- a/drivers/target/target_core_pr.c
|
||||
+++ b/drivers/target/target_core_pr.c
|
||||
@@ -2042,7 +2042,7 @@ static int __core_scsi3_write_aptpl_to_file(
|
||||
if (IS_ERR(file) || !file || !file->f_dentry) {
|
||||
pr_err("filp_open(%s) for APTPL metadata"
|
||||
" failed\n", path);
|
||||
- return (PTR_ERR(file) < 0 ? PTR_ERR(file) : -ENOENT);
|
||||
+ return IS_ERR(file) ? PTR_ERR(file) : -ENOENT;
|
||||
}
|
||||
|
||||
iov[0].iov_base = &buf[0];
|
||||
@@ -3853,7 +3853,7 @@ int target_scsi3_emulate_pr_out(struct se_task *task)
|
||||
" SPC-2 reservation is held, returning"
|
||||
" RESERVATION_CONFLICT\n");
|
||||
cmd->scsi_sense_reason = TCM_RESERVATION_CONFLICT;
|
||||
- ret = EINVAL;
|
||||
+ ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -3863,7 +3863,8 @@ int target_scsi3_emulate_pr_out(struct se_task *task)
|
||||
*/
|
||||
if (!cmd->se_sess) {
|
||||
cmd->scsi_sense_reason = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
|
||||
- return -EINVAL;
|
||||
+ ret = -EINVAL;
|
||||
+ goto out;
|
||||
}
|
||||
|
||||
if (cmd->data_length < 24) {
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+41
@@ -0,0 +1,41 @@
|
||||
From 63a96e329f2c66af145a93d6f42067e54ef932af Mon Sep 17 00:00:00 2001
|
||||
From: Roland Dreier <roland@purestorage.com>
|
||||
Date: Mon, 16 Jul 2012 17:10:17 -0700
|
||||
Subject: [PATCH 060/109] target: Fix range calculation in WRITE SAME
|
||||
emulation when num blocks == 0
|
||||
|
||||
commit 1765fe5edcb83f53fc67edeb559fcf4bc82c6460 upstream.
|
||||
|
||||
When NUMBER OF LOGICAL BLOCKS is 0, WRITE SAME is supposed to write
|
||||
all the blocks from the specified LBA through the end of the device.
|
||||
However, dev->transport->get_blocks(dev) (perhaps confusingly) returns
|
||||
the last valid LBA rather than the number of blocks, so the correct
|
||||
number of blocks to write starting with lba is
|
||||
|
||||
dev->transport->get_blocks(dev) - lba + 1
|
||||
|
||||
(nab: Backport roland's for-3.6 patch to for-3.5)
|
||||
|
||||
Signed-off-by: Roland Dreier <roland@purestorage.com>
|
||||
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/target/target_core_cdb.c | 2 +-
|
||||
1 files changed, 1 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/drivers/target/target_core_cdb.c b/drivers/target/target_core_cdb.c
|
||||
index 65ea65a..93b9406 100644
|
||||
--- a/drivers/target/target_core_cdb.c
|
||||
+++ b/drivers/target/target_core_cdb.c
|
||||
@@ -1199,7 +1199,7 @@ int target_emulate_write_same(struct se_task *task)
|
||||
if (num_blocks != 0)
|
||||
range = num_blocks;
|
||||
else
|
||||
- range = (dev->transport->get_blocks(dev) - lba);
|
||||
+ range = (dev->transport->get_blocks(dev) - lba) + 1;
|
||||
|
||||
pr_debug("WRITE_SAME UNMAP: LBA: %llu Range: %llu\n",
|
||||
(unsigned long long)lba, (unsigned long long)range);
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+82
@@ -0,0 +1,82 @@
|
||||
From 0028855e0b717cadb5fc6b05934af9bd9d2cc4c1 Mon Sep 17 00:00:00 2001
|
||||
From: Jeff Layton <jlayton@redhat.com>
|
||||
Date: Wed, 11 Jul 2012 09:09:35 -0400
|
||||
Subject: [PATCH 061/109] cifs: on CONFIG_HIGHMEM machines, limit the
|
||||
rsize/wsize to the kmap space
|
||||
|
||||
commit 3ae629d98bd5ed77585a878566f04f310adbc591 upstream.
|
||||
|
||||
We currently rely on being able to kmap all of the pages in an async
|
||||
read or write request. If you're on a machine that has CONFIG_HIGHMEM
|
||||
set then that kmap space is limited, sometimes to as low as 512 slots.
|
||||
|
||||
With 512 slots, we can only support up to a 2M r/wsize, and that's
|
||||
assuming that we can get our greedy little hands on all of them. There
|
||||
are other users however, so it's possible we'll end up stuck with a
|
||||
size that large.
|
||||
|
||||
Since we can't handle a rsize or wsize larger than that currently, cap
|
||||
those options at the number of kmap slots we have. We could consider
|
||||
capping it even lower, but we currently default to a max of 1M. Might as
|
||||
well allow those luddites on 32 bit arches enough rope to hang
|
||||
themselves.
|
||||
|
||||
A more robust fix would be to teach the send and receive routines how
|
||||
to contend with an array of pages so we don't need to marshal up a kvec
|
||||
array at all. That's a fairly significant overhaul though, so we'll need
|
||||
this limit in place until that's ready.
|
||||
|
||||
Reported-by: Jian Li <jiali@redhat.com>
|
||||
Signed-off-by: Jeff Layton <jlayton@redhat.com>
|
||||
Signed-off-by: Steve French <smfrench@gmail.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
fs/cifs/connect.c | 18 ++++++++++++++++++
|
||||
1 files changed, 18 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
|
||||
index b21670c..56c152d 100644
|
||||
--- a/fs/cifs/connect.c
|
||||
+++ b/fs/cifs/connect.c
|
||||
@@ -2925,6 +2925,18 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
|
||||
#define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024)
|
||||
#define CIFS_DEFAULT_NON_POSIX_WSIZE (65536)
|
||||
|
||||
+/*
|
||||
+ * On hosts with high memory, we can't currently support wsize/rsize that are
|
||||
+ * larger than we can kmap at once. Cap the rsize/wsize at
|
||||
+ * LAST_PKMAP * PAGE_SIZE. We'll never be able to fill a read or write request
|
||||
+ * larger than that anyway.
|
||||
+ */
|
||||
+#ifdef CONFIG_HIGHMEM
|
||||
+#define CIFS_KMAP_SIZE_LIMIT (LAST_PKMAP * PAGE_CACHE_SIZE)
|
||||
+#else /* CONFIG_HIGHMEM */
|
||||
+#define CIFS_KMAP_SIZE_LIMIT (1<<24)
|
||||
+#endif /* CONFIG_HIGHMEM */
|
||||
+
|
||||
static unsigned int
|
||||
cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
|
||||
{
|
||||
@@ -2955,6 +2967,9 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
|
||||
wsize = min_t(unsigned int, wsize,
|
||||
server->maxBuf - sizeof(WRITE_REQ) + 4);
|
||||
|
||||
+ /* limit to the amount that we can kmap at once */
|
||||
+ wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT);
|
||||
+
|
||||
/* hard limit of CIFS_MAX_WSIZE */
|
||||
wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE);
|
||||
|
||||
@@ -2996,6 +3011,9 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
|
||||
if (!(server->capabilities & CAP_LARGE_READ_X))
|
||||
rsize = min_t(unsigned int, CIFSMaxBufSize, rsize);
|
||||
|
||||
+ /* limit to the amount that we can kmap at once */
|
||||
+ rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT);
|
||||
+
|
||||
/* hard limit of CIFS_MAX_RSIZE */
|
||||
rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE);
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+47
@@ -0,0 +1,47 @@
|
||||
From 3d7e548a161a109e404e1068901f834c69eeb0ea Mon Sep 17 00:00:00 2001
|
||||
From: Jeff Layton <jlayton@redhat.com>
|
||||
Date: Fri, 6 Jul 2012 07:09:42 -0400
|
||||
Subject: [PATCH 062/109] cifs: always update the inode cache with the results
|
||||
from a FIND_*
|
||||
|
||||
commit cd60042cc1392e79410dc8de9e9c1abb38a29e57 upstream.
|
||||
|
||||
When we get back a FIND_FIRST/NEXT result, we have some info about the
|
||||
dentry that we use to instantiate a new inode. We were ignoring and
|
||||
discarding that info when we had an existing dentry in the cache.
|
||||
|
||||
Fix this by updating the inode in place when we find an existing dentry
|
||||
and the uniqueid is the same.
|
||||
|
||||
Reported-and-Tested-by: Andrew Bartlett <abartlet@samba.org>
|
||||
Reported-by: Bill Robertson <bill_robertson@debortoli.com.au>
|
||||
Reported-by: Dion Edwards <dion_edwards@debortoli.com.au>
|
||||
Signed-off-by: Jeff Layton <jlayton@redhat.com>
|
||||
Signed-off-by: Steve French <smfrench@gmail.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
fs/cifs/readdir.c | 7 +++++--
|
||||
1 files changed, 5 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
|
||||
index db4a138..4c37ed4 100644
|
||||
--- a/fs/cifs/readdir.c
|
||||
+++ b/fs/cifs/readdir.c
|
||||
@@ -86,9 +86,12 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
|
||||
|
||||
dentry = d_lookup(parent, name);
|
||||
if (dentry) {
|
||||
- /* FIXME: check for inode number changes? */
|
||||
- if (dentry->d_inode != NULL)
|
||||
+ inode = dentry->d_inode;
|
||||
+ /* update inode in place if i_ino didn't change */
|
||||
+ if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) {
|
||||
+ cifs_fattr_to_inode(inode, fattr);
|
||||
return dentry;
|
||||
+ }
|
||||
d_drop(dentry);
|
||||
dput(dentry);
|
||||
}
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+84
@@ -0,0 +1,84 @@
|
||||
From 6ece4e48bfa223f77eff8fc4d2fcc4808214f42e Mon Sep 17 00:00:00 2001
|
||||
From: Aaditya Kumar <aaditya.kumar.30@gmail.com>
|
||||
Date: Tue, 17 Jul 2012 15:48:07 -0700
|
||||
Subject: [PATCH 063/109] mm: fix lost kswapd wakeup in kswapd_stop()
|
||||
|
||||
commit 1c7e7f6c0703d03af6bcd5ccc11fc15d23e5ecbe upstream.
|
||||
|
||||
Offlining memory may block forever, waiting for kswapd() to wake up
|
||||
because kswapd() does not check the event kthread->should_stop before
|
||||
sleeping.
|
||||
|
||||
The proper pattern, from Documentation/memory-barriers.txt, is:
|
||||
|
||||
--- waker ---
|
||||
event_indicated = 1;
|
||||
wake_up_process(event_daemon);
|
||||
|
||||
--- sleeper ---
|
||||
for (;;) {
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
if (event_indicated)
|
||||
break;
|
||||
schedule();
|
||||
}
|
||||
|
||||
set_current_state() may be wrapped by:
|
||||
prepare_to_wait();
|
||||
|
||||
In the kswapd() case, event_indicated is kthread->should_stop.
|
||||
|
||||
=== offlining memory (waker) ===
|
||||
kswapd_stop()
|
||||
kthread_stop()
|
||||
kthread->should_stop = 1
|
||||
wake_up_process()
|
||||
wait_for_completion()
|
||||
|
||||
=== kswapd_try_to_sleep (sleeper) ===
|
||||
kswapd_try_to_sleep()
|
||||
prepare_to_wait()
|
||||
.
|
||||
.
|
||||
schedule()
|
||||
.
|
||||
.
|
||||
finish_wait()
|
||||
|
||||
The schedule() needs to be protected by a test of kthread->should_stop,
|
||||
which is wrapped by kthread_should_stop().
|
||||
|
||||
Reproducer:
|
||||
Do heavy file I/O in background.
|
||||
Do a memory offline/online in a tight loop
|
||||
|
||||
Signed-off-by: Aaditya Kumar <aaditya.kumar@ap.sony.com>
|
||||
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
|
||||
Reviewed-by: Minchan Kim <minchan@kernel.org>
|
||||
Acked-by: Mel Gorman <mel@csn.ul.ie>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
mm/vmscan.c | 5 ++++-
|
||||
1 files changed, 4 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index 72cf498..8342119 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -2824,7 +2824,10 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
|
||||
* them before going back to sleep.
|
||||
*/
|
||||
set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold);
|
||||
- schedule();
|
||||
+
|
||||
+ if (!kthread_should_stop())
|
||||
+ schedule();
|
||||
+
|
||||
set_pgdat_percpu_threshold(pgdat, calculate_pressure_threshold);
|
||||
} else {
|
||||
if (remaining)
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+164
@@ -0,0 +1,164 @@
|
||||
From 22c2c30192d85ffa042433e89e929b4ea08ab528 Mon Sep 17 00:00:00 2001
|
||||
From: NeilBrown <neilb@suse.de>
|
||||
Date: Thu, 19 Jul 2012 15:59:18 +1000
|
||||
Subject: [PATCH 064/109] md: avoid crash when stopping md array races with
|
||||
closing other open fds.
|
||||
|
||||
commit a05b7ea03d72f36edb0cec05e8893803335c61a0 upstream.
|
||||
|
||||
md will refuse to stop an array if any other fd (or mounted fs) is
|
||||
using it.
|
||||
When any fs is unmounted of when the last open fd is closed all
|
||||
pending IO will be flushed (e.g. sync_blockdev call in __blkdev_put)
|
||||
so there will be no pending IO to worry about when the array is
|
||||
stopped.
|
||||
|
||||
However in order to send the STOP_ARRAY ioctl to stop the array one
|
||||
must first get and open fd on the block device.
|
||||
If some fd is being used to write to the block device and it is closed
|
||||
after mdadm open the block device, but before mdadm issues the
|
||||
STOP_ARRAY ioctl, then there will be no last-close on the md device so
|
||||
__blkdev_put will not call sync_blockdev.
|
||||
|
||||
If this happens, then IO can still be in-flight while md tears down
|
||||
the array and bad things can happen (use-after-free and subsequent
|
||||
havoc).
|
||||
|
||||
So in the case where do_md_stop is being called from an open file
|
||||
descriptor, call sync_block after taking the mutex to ensure there
|
||||
will be no new openers.
|
||||
|
||||
This is needed when setting a read-write device to read-only too.
|
||||
|
||||
Reported-by: majianpeng <majianpeng@gmail.com>
|
||||
Signed-off-by: NeilBrown <neilb@suse.de>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/md/md.c | 36 +++++++++++++++++++++++-------------
|
||||
1 files changed, 23 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/drivers/md/md.c b/drivers/md/md.c
|
||||
index 700ecae..d8646d7 100644
|
||||
--- a/drivers/md/md.c
|
||||
+++ b/drivers/md/md.c
|
||||
@@ -3700,8 +3700,8 @@ array_state_show(struct mddev *mddev, char *page)
|
||||
return sprintf(page, "%s\n", array_states[st]);
|
||||
}
|
||||
|
||||
-static int do_md_stop(struct mddev * mddev, int ro, int is_open);
|
||||
-static int md_set_readonly(struct mddev * mddev, int is_open);
|
||||
+static int do_md_stop(struct mddev * mddev, int ro, struct block_device *bdev);
|
||||
+static int md_set_readonly(struct mddev * mddev, struct block_device *bdev);
|
||||
static int do_md_run(struct mddev * mddev);
|
||||
static int restart_array(struct mddev *mddev);
|
||||
|
||||
@@ -3717,14 +3717,14 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
/* stopping an active array */
|
||||
if (atomic_read(&mddev->openers) > 0)
|
||||
return -EBUSY;
|
||||
- err = do_md_stop(mddev, 0, 0);
|
||||
+ err = do_md_stop(mddev, 0, NULL);
|
||||
break;
|
||||
case inactive:
|
||||
/* stopping an active array */
|
||||
if (mddev->pers) {
|
||||
if (atomic_read(&mddev->openers) > 0)
|
||||
return -EBUSY;
|
||||
- err = do_md_stop(mddev, 2, 0);
|
||||
+ err = do_md_stop(mddev, 2, NULL);
|
||||
} else
|
||||
err = 0; /* already inactive */
|
||||
break;
|
||||
@@ -3732,7 +3732,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
break; /* not supported yet */
|
||||
case readonly:
|
||||
if (mddev->pers)
|
||||
- err = md_set_readonly(mddev, 0);
|
||||
+ err = md_set_readonly(mddev, NULL);
|
||||
else {
|
||||
mddev->ro = 1;
|
||||
set_disk_ro(mddev->gendisk, 1);
|
||||
@@ -3742,7 +3742,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
case read_auto:
|
||||
if (mddev->pers) {
|
||||
if (mddev->ro == 0)
|
||||
- err = md_set_readonly(mddev, 0);
|
||||
+ err = md_set_readonly(mddev, NULL);
|
||||
else if (mddev->ro == 1)
|
||||
err = restart_array(mddev);
|
||||
if (err == 0) {
|
||||
@@ -5078,15 +5078,17 @@ void md_stop(struct mddev *mddev)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_stop);
|
||||
|
||||
-static int md_set_readonly(struct mddev *mddev, int is_open)
|
||||
+static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
|
||||
{
|
||||
int err = 0;
|
||||
mutex_lock(&mddev->open_mutex);
|
||||
- if (atomic_read(&mddev->openers) > is_open) {
|
||||
+ if (atomic_read(&mddev->openers) > !!bdev) {
|
||||
printk("md: %s still in use.\n",mdname(mddev));
|
||||
err = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
+ if (bdev)
|
||||
+ sync_blockdev(bdev);
|
||||
if (mddev->pers) {
|
||||
__md_stop_writes(mddev);
|
||||
|
||||
@@ -5108,18 +5110,26 @@ out:
|
||||
* 0 - completely stop and dis-assemble array
|
||||
* 2 - stop but do not disassemble array
|
||||
*/
|
||||
-static int do_md_stop(struct mddev * mddev, int mode, int is_open)
|
||||
+static int do_md_stop(struct mddev * mddev, int mode,
|
||||
+ struct block_device *bdev)
|
||||
{
|
||||
struct gendisk *disk = mddev->gendisk;
|
||||
struct md_rdev *rdev;
|
||||
|
||||
mutex_lock(&mddev->open_mutex);
|
||||
- if (atomic_read(&mddev->openers) > is_open ||
|
||||
+ if (atomic_read(&mddev->openers) > !!bdev ||
|
||||
mddev->sysfs_active) {
|
||||
printk("md: %s still in use.\n",mdname(mddev));
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
return -EBUSY;
|
||||
}
|
||||
+ if (bdev)
|
||||
+ /* It is possible IO was issued on some other
|
||||
+ * open file which was closed before we took ->open_mutex.
|
||||
+ * As that was not the last close __blkdev_put will not
|
||||
+ * have called sync_blockdev, so we must.
|
||||
+ */
|
||||
+ sync_blockdev(bdev);
|
||||
|
||||
if (mddev->pers) {
|
||||
if (mddev->ro)
|
||||
@@ -5193,7 +5203,7 @@ static void autorun_array(struct mddev *mddev)
|
||||
err = do_md_run(mddev);
|
||||
if (err) {
|
||||
printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
|
||||
- do_md_stop(mddev, 0, 0);
|
||||
+ do_md_stop(mddev, 0, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6184,11 +6194,11 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
goto done_unlock;
|
||||
|
||||
case STOP_ARRAY:
|
||||
- err = do_md_stop(mddev, 0, 1);
|
||||
+ err = do_md_stop(mddev, 0, bdev);
|
||||
goto done_unlock;
|
||||
|
||||
case STOP_ARRAY_RO:
|
||||
- err = md_set_readonly(mddev, 1);
|
||||
+ err = md_set_readonly(mddev, bdev);
|
||||
goto done_unlock;
|
||||
|
||||
case BLKROSET:
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+58
@@ -0,0 +1,58 @@
|
||||
From a5f676adf9ef247dd5363de5f0e26d0bdb6597bc Mon Sep 17 00:00:00 2001
|
||||
From: NeilBrown <neilb@suse.de>
|
||||
Date: Thu, 19 Jul 2012 15:59:18 +1000
|
||||
Subject: [PATCH 065/109] md/raid1: close some possible races on write errors
|
||||
during resync
|
||||
|
||||
commit 58e94ae18478c08229626daece2fc108a4a23261 upstream.
|
||||
|
||||
commit 4367af556133723d0f443e14ca8170d9447317cb
|
||||
md/raid1: clear bad-block record when write succeeds.
|
||||
|
||||
Added a 'reschedule_retry' call possibility at the end of
|
||||
end_sync_write, but didn't add matching code at the end of
|
||||
sync_request_write. So if the writes complete very quickly, or
|
||||
scheduling makes it seem that way, then we can miss rescheduling
|
||||
the request and the resync could hang.
|
||||
|
||||
Also commit 73d5c38a9536142e062c35997b044e89166e063b
|
||||
md: avoid races when stopping resync.
|
||||
|
||||
Fix a race condition in this same code in end_sync_write but didn't
|
||||
make the change in sync_request_write.
|
||||
|
||||
This patch updates sync_request_write to fix both of those.
|
||||
Patch is suitable for 3.1 and later kernels.
|
||||
|
||||
Reported-by: Alexander Lyakas <alex.bolshoy@gmail.com>
|
||||
Original-version-by: Alexander Lyakas <alex.bolshoy@gmail.com>
|
||||
Signed-off-by: NeilBrown <neilb@suse.de>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/md/raid1.c | 10 ++++++++--
|
||||
1 files changed, 8 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
|
||||
index 58f0055..2d97bf0 100644
|
||||
--- a/drivers/md/raid1.c
|
||||
+++ b/drivers/md/raid1.c
|
||||
@@ -1713,8 +1713,14 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
|
||||
|
||||
if (atomic_dec_and_test(&r1_bio->remaining)) {
|
||||
/* if we're here, all write(s) have completed, so clean up */
|
||||
- md_done_sync(mddev, r1_bio->sectors, 1);
|
||||
- put_buf(r1_bio);
|
||||
+ int s = r1_bio->sectors;
|
||||
+ if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
|
||||
+ test_bit(R1BIO_WriteError, &r1_bio->state))
|
||||
+ reschedule_retry(r1_bio);
|
||||
+ else {
|
||||
+ put_buf(r1_bio);
|
||||
+ md_done_sync(mddev, s, 1);
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+70
@@ -0,0 +1,70 @@
|
||||
From 892d35f24ea2801daef7e48e41d8ec4e9bac34e8 Mon Sep 17 00:00:00 2001
|
||||
From: David Daney <david.daney@cavium.com>
|
||||
Date: Thu, 19 Jul 2012 09:11:14 +0200
|
||||
Subject: [PATCH 066/109] MIPS: Properly align the .data..init_task section.
|
||||
|
||||
commit 7b1c0d26a8e272787f0f9fcc5f3e8531df3b3409 upstream.
|
||||
|
||||
Improper alignment can lead to unbootable systems and/or random
|
||||
crashes.
|
||||
|
||||
[ralf@linux-mips.org: This is a lond standing bug since
|
||||
6eb10bc9e2deab06630261cd05c4cb1e9a60e980 (kernel.org) rsp.
|
||||
c422a10917f75fd19fa7fe070aaaa23e384dae6f (lmo) [MIPS: Clean up linker script
|
||||
using new linker script macros.] so dates back to 2.6.32.]
|
||||
|
||||
Signed-off-by: David Daney <david.daney@cavium.com>
|
||||
Cc: linux-mips@linux-mips.org
|
||||
Patchwork: https://patchwork.linux-mips.org/patch/3881/
|
||||
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
arch/mips/include/asm/thread_info.h | 4 ++--
|
||||
arch/mips/kernel/vmlinux.lds.S | 3 ++-
|
||||
2 files changed, 4 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
|
||||
index 97f8bf6..adda036 100644
|
||||
--- a/arch/mips/include/asm/thread_info.h
|
||||
+++ b/arch/mips/include/asm/thread_info.h
|
||||
@@ -60,6 +60,8 @@ struct thread_info {
|
||||
register struct thread_info *__current_thread_info __asm__("$28");
|
||||
#define current_thread_info() __current_thread_info
|
||||
|
||||
+#endif /* !__ASSEMBLY__ */
|
||||
+
|
||||
/* thread information allocation */
|
||||
#if defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_32BIT)
|
||||
#define THREAD_SIZE_ORDER (1)
|
||||
@@ -97,8 +99,6 @@ register struct thread_info *__current_thread_info __asm__("$28");
|
||||
|
||||
#define free_thread_info(info) kfree(info)
|
||||
|
||||
-#endif /* !__ASSEMBLY__ */
|
||||
-
|
||||
#define PREEMPT_ACTIVE 0x10000000
|
||||
|
||||
/*
|
||||
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
|
||||
index a81176f..be281c6 100644
|
||||
--- a/arch/mips/kernel/vmlinux.lds.S
|
||||
+++ b/arch/mips/kernel/vmlinux.lds.S
|
||||
@@ -1,5 +1,6 @@
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/page.h>
|
||||
+#include <asm/thread_info.h>
|
||||
#include <asm-generic/vmlinux.lds.h>
|
||||
|
||||
#undef mips
|
||||
@@ -73,7 +74,7 @@ SECTIONS
|
||||
.data : { /* Data */
|
||||
. = . + DATAOFFSET; /* for CONFIG_MAPPED_KERNEL */
|
||||
|
||||
- INIT_TASK_DATA(PAGE_SIZE)
|
||||
+ INIT_TASK_DATA(THREAD_SIZE)
|
||||
NOSAVE_DATA
|
||||
CACHELINE_ALIGNED_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT)
|
||||
READ_MOSTLY_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT)
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+67
@@ -0,0 +1,67 @@
|
||||
From f6ba94c29333fa6df9b3b553415e93bafbd3c831 Mon Sep 17 00:00:00 2001
|
||||
From: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com>
|
||||
Date: Sat, 14 Jul 2012 14:33:09 +0300
|
||||
Subject: [PATCH 067/109] UBIFS: fix a bug in empty space fix-up
|
||||
|
||||
commit c6727932cfdb13501108b16c38463c09d5ec7a74 upstream.
|
||||
|
||||
UBIFS has a feature called "empty space fix-up" which is a quirk to work-around
|
||||
limitations of dumb flasher programs. Namely, of those flashers that are unable
|
||||
to skip NAND pages full of 0xFFs while flashing, resulting in empty space at
|
||||
the end of half-filled eraseblocks to be unusable for UBIFS. This feature is
|
||||
relatively new (introduced in v3.0).
|
||||
|
||||
The fix-up routine (fixup_free_space()) is executed only once at the very first
|
||||
mount if the superblock has the 'space_fixup' flag set (can be done with -F
|
||||
option of mkfs.ubifs). It basically reads all the UBIFS data and metadata and
|
||||
writes it back to the same LEB. The routine assumes the image is pristine and
|
||||
does not have anything in the journal.
|
||||
|
||||
There was a bug in 'fixup_free_space()' where it fixed up the log incorrectly.
|
||||
All but one LEB of the log of a pristine file-system are empty. And one
|
||||
contains just a commit start node. And 'fixup_free_space()' just unmapped this
|
||||
LEB, which resulted in wiping the commit start node. As a result, some users
|
||||
were unable to mount the file-system next time with the following symptom:
|
||||
|
||||
UBIFS error (pid 1): replay_log_leb: first log node at LEB 3:0 is not CS node
|
||||
UBIFS error (pid 1): replay_log_leb: log error detected while replaying the log at LEB 3:0
|
||||
|
||||
The root-cause of this bug was that 'fixup_free_space()' wrongly assumed
|
||||
that the beginning of empty space in the log head (c->lhead_offs) was known
|
||||
on mount. However, it is not the case - it was always 0. UBIFS does not store
|
||||
in it the master node and finds out by scanning the log on every mount.
|
||||
|
||||
The fix is simple - just pass commit start node size instead of 0 to
|
||||
'fixup_leb()'.
|
||||
|
||||
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com>
|
||||
Reported-by: Iwo Mergler <Iwo.Mergler@netcommwireless.com>
|
||||
Tested-by: Iwo Mergler <Iwo.Mergler@netcommwireless.com>
|
||||
Reported-by: James Nute <newten82@gmail.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
fs/ubifs/sb.c | 8 ++++++--
|
||||
1 files changed, 6 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
|
||||
index 6094c5a..b73ecd8 100644
|
||||
--- a/fs/ubifs/sb.c
|
||||
+++ b/fs/ubifs/sb.c
|
||||
@@ -715,8 +715,12 @@ static int fixup_free_space(struct ubifs_info *c)
|
||||
lnum = ubifs_next_log_lnum(c, lnum);
|
||||
}
|
||||
|
||||
- /* Fixup the current log head */
|
||||
- err = fixup_leb(c, c->lhead_lnum, c->lhead_offs);
|
||||
+ /*
|
||||
+ * Fixup the log head which contains the only a CS node at the
|
||||
+ * beginning.
|
||||
+ */
|
||||
+ err = fixup_leb(c, c->lhead_lnum,
|
||||
+ ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size));
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+211
@@ -0,0 +1,211 @@
|
||||
From b4c39a3690fd0d723f50eba441fe567e8fee68f1 Mon Sep 17 00:00:00 2001
|
||||
From: Boaz Harrosh <bharrosh@panasas.com>
|
||||
Date: Fri, 8 Jun 2012 01:19:07 +0300
|
||||
Subject: [PATCH 068/109] ore: Fix NFS crash by supporting any unaligned RAID
|
||||
IO
|
||||
|
||||
commit 9ff19309a9623f2963ac5a136782ea4d8b5d67fb upstream.
|
||||
|
||||
In RAID_5/6 We used to not permit an IO that it's end
|
||||
byte is not stripe_size aligned and spans more than one stripe.
|
||||
.i.e the caller must check if after submission the actual
|
||||
transferred bytes is shorter, and would need to resubmit
|
||||
a new IO with the remainder.
|
||||
|
||||
Exofs supports this, and NFS was supposed to support this
|
||||
as well with it's short write mechanism. But late testing has
|
||||
exposed a CRASH when this is used with none-RPC layout-drivers.
|
||||
|
||||
The change at NFS is deep and risky, in it's place the fix
|
||||
at ORE to lift the limitation is actually clean and simple.
|
||||
So here it is below.
|
||||
|
||||
The principal here is that in the case of unaligned IO on
|
||||
both ends, beginning and end, we will send two read requests
|
||||
one like old code, before the calculation of the first stripe,
|
||||
and also a new site, before the calculation of the last stripe.
|
||||
If any "boundary" is aligned or the complete IO is within a single
|
||||
stripe. we do a single read like before.
|
||||
|
||||
The code is clean and simple by splitting the old _read_4_write
|
||||
into 3 even parts:
|
||||
1._read_4_write_first_stripe
|
||||
2. _read_4_write_last_stripe
|
||||
3. _read_4_write_execute
|
||||
|
||||
And calling 1+3 at the same place as before. 2+3 before last
|
||||
stripe, and in the case of all in a single stripe then 1+2+3
|
||||
is preformed additively.
|
||||
|
||||
Why did I not think of it before. Well I had a strike of
|
||||
genius because I have stared at this code for 2 years, and did
|
||||
not find this simple solution, til today. Not that I did not try.
|
||||
|
||||
This solution is much better for NFS than the previous supposedly
|
||||
solution because the short write was dealt with out-of-band after
|
||||
IO_done, which would cause for a seeky IO pattern where as in here
|
||||
we execute in order. At both solutions we do 2 separate reads, only
|
||||
here we do it within a single IO request. (And actually combine two
|
||||
writes into a single submission)
|
||||
|
||||
NFS/exofs code need not change since the ORE API communicates the new
|
||||
shorter length on return, what will happen is that this case would not
|
||||
occur anymore.
|
||||
|
||||
hurray!!
|
||||
|
||||
[Stable this is an NFS bug since 3.2 Kernel should apply cleanly]
|
||||
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
fs/exofs/ore_raid.c | 67 +++++++++++++++++++++++++++-----------------------
|
||||
1 files changed, 36 insertions(+), 31 deletions(-)
|
||||
|
||||
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
|
||||
index d222c77..fff2070 100644
|
||||
--- a/fs/exofs/ore_raid.c
|
||||
+++ b/fs/exofs/ore_raid.c
|
||||
@@ -461,16 +461,12 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
|
||||
* ios->sp2d[p][*], xor is calculated the same way. These pages are
|
||||
* allocated/freed and don't go through cache
|
||||
*/
|
||||
-static int _read_4_write(struct ore_io_state *ios)
|
||||
+static int _read_4_write_first_stripe(struct ore_io_state *ios)
|
||||
{
|
||||
- struct ore_io_state *ios_read;
|
||||
struct ore_striping_info read_si;
|
||||
struct __stripe_pages_2d *sp2d = ios->sp2d;
|
||||
u64 offset = ios->si.first_stripe_start;
|
||||
- u64 last_stripe_end;
|
||||
- unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
|
||||
- unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1;
|
||||
- int ret;
|
||||
+ unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
|
||||
|
||||
if (offset == ios->offset) /* Go to start collect $200 */
|
||||
goto read_last_stripe;
|
||||
@@ -478,6 +474,9 @@ static int _read_4_write(struct ore_io_state *ios)
|
||||
min_p = _sp2d_min_pg(sp2d);
|
||||
max_p = _sp2d_max_pg(sp2d);
|
||||
|
||||
+ ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n",
|
||||
+ offset, ios->offset, min_p, max_p);
|
||||
+
|
||||
for (c = 0; ; c++) {
|
||||
ore_calc_stripe_info(ios->layout, offset, 0, &read_si);
|
||||
read_si.obj_offset += min_p * PAGE_SIZE;
|
||||
@@ -512,6 +511,18 @@ static int _read_4_write(struct ore_io_state *ios)
|
||||
}
|
||||
|
||||
read_last_stripe:
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int _read_4_write_last_stripe(struct ore_io_state *ios)
|
||||
+{
|
||||
+ struct ore_striping_info read_si;
|
||||
+ struct __stripe_pages_2d *sp2d = ios->sp2d;
|
||||
+ u64 offset;
|
||||
+ u64 last_stripe_end;
|
||||
+ unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
|
||||
+ unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
|
||||
+
|
||||
offset = ios->offset + ios->length;
|
||||
if (offset % PAGE_SIZE)
|
||||
_add_to_r4w_last_page(ios, &offset);
|
||||
@@ -527,15 +538,15 @@ read_last_stripe:
|
||||
c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
|
||||
ios->layout->mirrors_p1, read_si.par_dev, read_si.dev);
|
||||
|
||||
- BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end);
|
||||
- /* unaligned IO must be within a single stripe */
|
||||
-
|
||||
if (min_p == sp2d->pages_in_unit) {
|
||||
/* Didn't do it yet */
|
||||
min_p = _sp2d_min_pg(sp2d);
|
||||
max_p = _sp2d_max_pg(sp2d);
|
||||
}
|
||||
|
||||
+ ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n",
|
||||
+ offset, last_stripe_end, min_p, max_p);
|
||||
+
|
||||
while (offset < last_stripe_end) {
|
||||
struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
|
||||
|
||||
@@ -568,6 +579,15 @@ read_last_stripe:
|
||||
}
|
||||
|
||||
read_it:
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int _read_4_write_execute(struct ore_io_state *ios)
|
||||
+{
|
||||
+ struct ore_io_state *ios_read;
|
||||
+ unsigned i;
|
||||
+ int ret;
|
||||
+
|
||||
ios_read = ios->ios_read_4_write;
|
||||
if (!ios_read)
|
||||
return 0;
|
||||
@@ -591,6 +611,8 @@ read_it:
|
||||
}
|
||||
|
||||
_mark_read4write_pages_uptodate(ios_read, ret);
|
||||
+ ore_put_io_state(ios_read);
|
||||
+ ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -626,8 +648,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
|
||||
/* If first stripe, Read in all read4write pages
|
||||
* (if needed) before we calculate the first parity.
|
||||
*/
|
||||
- _read_4_write(ios);
|
||||
+ _read_4_write_first_stripe(ios);
|
||||
}
|
||||
+ if (!cur_len) /* If last stripe r4w pages of last stripe */
|
||||
+ _read_4_write_last_stripe(ios);
|
||||
+ _read_4_write_execute(ios);
|
||||
|
||||
for (i = 0; i < num_pages; i++) {
|
||||
pages[i] = _raid_page_alloc();
|
||||
@@ -654,34 +679,14 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
|
||||
|
||||
int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)
|
||||
{
|
||||
- struct ore_layout *layout = ios->layout;
|
||||
-
|
||||
if (ios->parity_pages) {
|
||||
+ struct ore_layout *layout = ios->layout;
|
||||
unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE;
|
||||
- unsigned stripe_size = ios->si.bytes_in_stripe;
|
||||
- u64 last_stripe, first_stripe;
|
||||
|
||||
if (_sp2d_alloc(pages_in_unit, layout->group_width,
|
||||
layout->parity, &ios->sp2d)) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
-
|
||||
- /* Round io down to last full strip */
|
||||
- first_stripe = div_u64(ios->offset, stripe_size);
|
||||
- last_stripe = div_u64(ios->offset + ios->length, stripe_size);
|
||||
-
|
||||
- /* If an IO spans more then a single stripe it must end at
|
||||
- * a stripe boundary. The reminder at the end is pushed into the
|
||||
- * next IO.
|
||||
- */
|
||||
- if (last_stripe != first_stripe) {
|
||||
- ios->length = last_stripe * stripe_size - ios->offset;
|
||||
-
|
||||
- BUG_ON(!ios->length);
|
||||
- ios->nr_pages = (ios->length + PAGE_SIZE - 1) /
|
||||
- PAGE_SIZE;
|
||||
- ios->si.length = ios->length; /*make it consistent */
|
||||
- }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+50
@@ -0,0 +1,50 @@
|
||||
From a2f43c94b074e0bf567ddc35e17504bbcd237ae1 Mon Sep 17 00:00:00 2001
|
||||
From: Boaz Harrosh <bharrosh@panasas.com>
|
||||
Date: Fri, 8 Jun 2012 04:30:40 +0300
|
||||
Subject: [PATCH 069/109] ore: Remove support of partial IO request (NFS
|
||||
crash)
|
||||
|
||||
commit 62b62ad873f2accad9222a4d7ffbe1e93f6714c1 upstream.
|
||||
|
||||
Do to OOM situations the ore might fail to allocate all resources
|
||||
needed for IO of the full request. If some progress was possible
|
||||
it would proceed with a partial/short request, for the sake of
|
||||
forward progress.
|
||||
|
||||
Since this crashes NFS-core and exofs is just fine without it just
|
||||
remove this contraption, and fail.
|
||||
|
||||
TODO:
|
||||
Support real forward progress with some reserved allocations
|
||||
of resources, such as mem pools and/or bio_sets
|
||||
|
||||
[Bug since 3.2 Kernel]
|
||||
CC: Benny Halevy <bhalevy@tonian.com>
|
||||
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
fs/exofs/ore.c | 8 +-------
|
||||
1 files changed, 1 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
|
||||
index 49cf230..24a49d4 100644
|
||||
--- a/fs/exofs/ore.c
|
||||
+++ b/fs/exofs/ore.c
|
||||
@@ -735,13 +735,7 @@ static int _prepare_for_striping(struct ore_io_state *ios)
|
||||
out:
|
||||
ios->numdevs = devs_in_group;
|
||||
ios->pages_consumed = cur_pg;
|
||||
- if (unlikely(ret)) {
|
||||
- if (length == ios->length)
|
||||
- return ret;
|
||||
- else
|
||||
- ios->length -= length;
|
||||
- }
|
||||
- return 0;
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
int ore_create(struct ore_io_state *ios)
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+46
@@ -0,0 +1,46 @@
|
||||
From 7b47a0e2114e8530614a25d7ec998fd52d069853 Mon Sep 17 00:00:00 2001
|
||||
From: Boaz Harrosh <bharrosh@panasas.com>
|
||||
Date: Fri, 8 Jun 2012 05:29:40 +0300
|
||||
Subject: [PATCH 070/109] pnfs-obj: don't leak objio_state if ore_write/read
|
||||
fails
|
||||
|
||||
commit 9909d45a8557455ca5f8ee7af0f253debc851f1a upstream.
|
||||
|
||||
[Bug since 3.2 Kernel]
|
||||
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
fs/nfs/objlayout/objio_osd.c | 9 +++++++--
|
||||
1 files changed, 7 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
|
||||
index 55d0128..0e7b3fc 100644
|
||||
--- a/fs/nfs/objlayout/objio_osd.c
|
||||
+++ b/fs/nfs/objlayout/objio_osd.c
|
||||
@@ -433,7 +433,10 @@ int objio_read_pagelist(struct nfs_read_data *rdata)
|
||||
objios->ios->done = _read_done;
|
||||
dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
|
||||
rdata->args.offset, rdata->args.count);
|
||||
- return ore_read(objios->ios);
|
||||
+ ret = ore_read(objios->ios);
|
||||
+ if (unlikely(ret))
|
||||
+ objio_free_result(&objios->oir);
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -517,8 +520,10 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how)
|
||||
dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
|
||||
wdata->args.offset, wdata->args.count);
|
||||
ret = ore_write(objios->ios);
|
||||
- if (unlikely(ret))
|
||||
+ if (unlikely(ret)) {
|
||||
+ objio_free_result(&objios->oir);
|
||||
return ret;
|
||||
+ }
|
||||
|
||||
if (objios->sync)
|
||||
_write_done(objios->ios, objios);
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+71
@@ -0,0 +1,71 @@
|
||||
From e4750a0414e24bcd0106493a2f8f251dd02264bf Mon Sep 17 00:00:00 2001
|
||||
From: Boaz Harrosh <bharrosh@panasas.com>
|
||||
Date: Fri, 8 Jun 2012 02:02:30 +0300
|
||||
Subject: [PATCH 071/109] pnfs-obj: Fix __r4w_get_page when offset is beyond
|
||||
i_size
|
||||
|
||||
commit c999ff68029ebd0f56ccae75444f640f6d5a27d2 upstream.
|
||||
|
||||
It is very common for the end of the file to be unaligned on
|
||||
stripe size. But since we know it's beyond file's end then
|
||||
the XOR should be preformed with all zeros.
|
||||
|
||||
Old code used to just read zeros out of the OSD devices, which is a great
|
||||
waist. But what scares me more about this situation is that, we now have
|
||||
pages attached to the file's mapping that are beyond i_size. I don't
|
||||
like the kind of bugs this calls for.
|
||||
|
||||
Fix both birds, by returning a global zero_page, if offset is beyond
|
||||
i_size.
|
||||
|
||||
TODO:
|
||||
Change the API to ->__r4w_get_page() so a NULL can be
|
||||
returned without being considered as error, since XOR API
|
||||
treats NULL entries as zero_pages.
|
||||
|
||||
[Bug since 3.2. Should apply the same way to all Kernels since]
|
||||
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
|
||||
[bwh: Backported to 3.2: adjust for lack of wdata->header]
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
fs/nfs/objlayout/objio_osd.c | 16 +++++++++++++---
|
||||
1 files changed, 13 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
|
||||
index 0e7b3fc..a03ee52 100644
|
||||
--- a/fs/nfs/objlayout/objio_osd.c
|
||||
+++ b/fs/nfs/objlayout/objio_osd.c
|
||||
@@ -467,8 +467,16 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
|
||||
struct objio_state *objios = priv;
|
||||
struct nfs_write_data *wdata = objios->oir.rpcdata;
|
||||
pgoff_t index = offset / PAGE_SIZE;
|
||||
- struct page *page = find_get_page(wdata->inode->i_mapping, index);
|
||||
+ struct page *page;
|
||||
+ loff_t i_size = i_size_read(wdata->inode);
|
||||
|
||||
+ if (offset >= i_size) {
|
||||
+ *uptodate = true;
|
||||
+ dprintk("%s: g_zero_page index=0x%lx\n", __func__, index);
|
||||
+ return ZERO_PAGE(0);
|
||||
+ }
|
||||
+
|
||||
+ page = find_get_page(wdata->inode->i_mapping, index);
|
||||
if (!page) {
|
||||
page = find_or_create_page(wdata->inode->i_mapping,
|
||||
index, GFP_NOFS);
|
||||
@@ -489,8 +497,10 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
|
||||
|
||||
static void __r4w_put_page(void *priv, struct page *page)
|
||||
{
|
||||
- dprintk("%s: index=0x%lx\n", __func__, page->index);
|
||||
- page_cache_release(page);
|
||||
+ dprintk("%s: index=0x%lx\n", __func__,
|
||||
+ (page == ZERO_PAGE(0)) ? -1UL : page->index);
|
||||
+ if (ZERO_PAGE(0) != page)
|
||||
+ page_cache_release(page);
|
||||
return;
|
||||
}
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+114
@@ -0,0 +1,114 @@
|
||||
From 035afb0de8406d0f820abf43471d51a377add326 Mon Sep 17 00:00:00 2001
|
||||
From: Mikulas Patocka <mpatocka@redhat.com>
|
||||
Date: Fri, 20 Jul 2012 14:25:03 +0100
|
||||
Subject: [PATCH 072/109] dm raid1: fix crash with mirror recovery and discard
|
||||
|
||||
commit 751f188dd5ab95b3f2b5f2f467c38aae5a2877eb upstream.
|
||||
|
||||
This patch fixes a crash when a discard request is sent during mirror
|
||||
recovery.
|
||||
|
||||
Firstly, some background. Generally, the following sequence happens during
|
||||
mirror synchronization:
|
||||
- function do_recovery is called
|
||||
- do_recovery calls dm_rh_recovery_prepare
|
||||
- dm_rh_recovery_prepare uses a semaphore to limit the number
|
||||
simultaneously recovered regions (by default the semaphore value is 1,
|
||||
so only one region at a time is recovered)
|
||||
- dm_rh_recovery_prepare calls __rh_recovery_prepare,
|
||||
__rh_recovery_prepare asks the log driver for the next region to
|
||||
recover. Then, it sets the region state to DM_RH_RECOVERING. If there
|
||||
are no pending I/Os on this region, the region is added to
|
||||
quiesced_regions list. If there are pending I/Os, the region is not
|
||||
added to any list. It is added to the quiesced_regions list later (by
|
||||
dm_rh_dec function) when all I/Os finish.
|
||||
- when the region is on quiesced_regions list, there are no I/Os in
|
||||
flight on this region. The region is popped from the list in
|
||||
dm_rh_recovery_start function. Then, a kcopyd job is started in the
|
||||
recover function.
|
||||
- when the kcopyd job finishes, recovery_complete is called. It calls
|
||||
dm_rh_recovery_end. dm_rh_recovery_end adds the region to
|
||||
recovered_regions or failed_recovered_regions list (depending on
|
||||
whether the copy operation was successful or not).
|
||||
|
||||
The above mechanism assumes that if the region is in DM_RH_RECOVERING
|
||||
state, no new I/Os are started on this region. When I/O is started,
|
||||
dm_rh_inc_pending is called, which increases reg->pending count. When
|
||||
I/O is finished, dm_rh_dec is called. It decreases reg->pending count.
|
||||
If the count is zero and the region was in DM_RH_RECOVERING state,
|
||||
dm_rh_dec adds it to the quiesced_regions list.
|
||||
|
||||
Consequently, if we call dm_rh_inc_pending/dm_rh_dec while the region is
|
||||
in DM_RH_RECOVERING state, it could be added to quiesced_regions list
|
||||
multiple times or it could be added to this list when kcopyd is copying
|
||||
data (it is assumed that the region is not on any list while kcopyd does
|
||||
its jobs). This results in memory corruption and crash.
|
||||
|
||||
There already exist bypasses for REQ_FLUSH requests: REQ_FLUSH requests
|
||||
do not belong to any region, so they are always added to the sync list
|
||||
in do_writes. dm_rh_inc_pending does not increase count for REQ_FLUSH
|
||||
requests. In mirror_end_io, dm_rh_dec is never called for REQ_FLUSH
|
||||
requests. These bypasses avoid the crash possibility described above.
|
||||
|
||||
These bypasses were improperly implemented for REQ_DISCARD when
|
||||
the mirror target gained discard support in commit
|
||||
5fc2ffeabb9ee0fc0e71ff16b49f34f0ed3d05b4 (dm raid1: support discard).
|
||||
|
||||
In do_writes, REQ_DISCARD requests is always added to the sync queue and
|
||||
immediately dispatched (even if the region is in DM_RH_RECOVERING). However,
|
||||
dm_rh_inc and dm_rh_dec is called for REQ_DISCARD resusts. So it violates the
|
||||
rule that no I/Os are started on DM_RH_RECOVERING regions, and causes the list
|
||||
corruption described above.
|
||||
|
||||
This patch changes it so that REQ_DISCARD requests follow the same path
|
||||
as REQ_FLUSH. This avoids the crash.
|
||||
|
||||
Reference: https://bugzilla.redhat.com/837607
|
||||
|
||||
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
|
||||
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/md/dm-raid1.c | 2 +-
|
||||
drivers/md/dm-region-hash.c | 5 ++++-
|
||||
2 files changed, 5 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
|
||||
index 9bfd057..42ef54f 100644
|
||||
--- a/drivers/md/dm-raid1.c
|
||||
+++ b/drivers/md/dm-raid1.c
|
||||
@@ -1210,7 +1210,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
|
||||
* We need to dec pending if this was a write.
|
||||
*/
|
||||
if (rw == WRITE) {
|
||||
- if (!(bio->bi_rw & REQ_FLUSH))
|
||||
+ if (!(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD)))
|
||||
dm_rh_dec(ms->rh, map_context->ll);
|
||||
return error;
|
||||
}
|
||||
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c
|
||||
index 7771ed2..69732e0 100644
|
||||
--- a/drivers/md/dm-region-hash.c
|
||||
+++ b/drivers/md/dm-region-hash.c
|
||||
@@ -404,6 +404,9 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
|
||||
return;
|
||||
}
|
||||
|
||||
+ if (bio->bi_rw & REQ_DISCARD)
|
||||
+ return;
|
||||
+
|
||||
/* We must inform the log that the sync count has changed. */
|
||||
log->type->set_region_sync(log, region, 0);
|
||||
|
||||
@@ -524,7 +527,7 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
|
||||
struct bio *bio;
|
||||
|
||||
for (bio = bios->head; bio; bio = bio->bi_next) {
|
||||
- if (bio->bi_rw & REQ_FLUSH)
|
||||
+ if (bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))
|
||||
continue;
|
||||
rh_inc(rh, dm_rh_bio_to_region(rh, bio));
|
||||
}
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+41
@@ -0,0 +1,41 @@
|
||||
From e8cf7231ce4f6464f8962ae6ef0421da40ddad15 Mon Sep 17 00:00:00 2001
|
||||
From: Mikulas Patocka <mpatocka@redhat.com>
|
||||
Date: Fri, 20 Jul 2012 14:25:07 +0100
|
||||
Subject: [PATCH 073/109] dm raid1: set discard_zeroes_data_unsupported
|
||||
|
||||
commit 7c8d3a42fe1c58a7e8fd3f6a013e7d7b474ff931 upstream.
|
||||
|
||||
We can't guarantee that REQ_DISCARD on dm-mirror zeroes the data even if
|
||||
the underlying disks support zero on discard. So this patch sets
|
||||
ti->discard_zeroes_data_unsupported.
|
||||
|
||||
For example, if the mirror is in the process of resynchronizing, it may
|
||||
happen that kcopyd reads a piece of data, then discard is sent on the
|
||||
same area and then kcopyd writes the piece of data to another leg.
|
||||
Consequently, the data is not zeroed.
|
||||
|
||||
The flag was made available by commit 983c7db347db8ce2d8453fd1d89b7a4bb6920d56
|
||||
(dm crypt: always disable discard_zeroes_data).
|
||||
|
||||
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
|
||||
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/md/dm-raid1.c | 1 +
|
||||
1 files changed, 1 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
|
||||
index 42ef54f..dae2b7a 100644
|
||||
--- a/drivers/md/dm-raid1.c
|
||||
+++ b/drivers/md/dm-raid1.c
|
||||
@@ -1080,6 +1080,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
||||
ti->split_io = dm_rh_get_region_size(ms->rh);
|
||||
ti->num_flush_requests = 1;
|
||||
ti->num_discard_requests = 1;
|
||||
+ ti->discard_zeroes_data_unsupported = 1;
|
||||
|
||||
ms->kmirrord_wq = alloc_workqueue("kmirrord",
|
||||
WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+349
@@ -0,0 +1,349 @@
|
||||
From 19aeba1469884ed9a789b143cf73ce047663c095 Mon Sep 17 00:00:00 2001
|
||||
From: John Stultz <john.stultz@linaro.org>
|
||||
Date: Tue, 17 Jul 2012 03:05:14 -0400
|
||||
Subject: [PATCH 074/109] ntp: Fix leap-second hrtimer livelock
|
||||
|
||||
This is a backport of 6b43ae8a619d17c4935c3320d2ef9e92bdeed05d
|
||||
|
||||
This should have been backported when it was commited, but I
|
||||
mistook the problem as requiring the ntp_lock changes
|
||||
that landed in 3.4 in order for it to occur.
|
||||
|
||||
Unfortunately the same issue can happen (with only one cpu)
|
||||
as follows:
|
||||
do_adjtimex()
|
||||
write_seqlock_irq(&xtime_lock);
|
||||
process_adjtimex_modes()
|
||||
process_adj_status()
|
||||
ntp_start_leap_timer()
|
||||
hrtimer_start()
|
||||
hrtimer_reprogram()
|
||||
tick_program_event()
|
||||
clockevents_program_event()
|
||||
ktime_get()
|
||||
seq = req_seqbegin(xtime_lock); [DEADLOCK]
|
||||
|
||||
This deadlock will no always occur, as it requires the
|
||||
leap_timer to force a hrtimer_reprogram which only happens
|
||||
if its set and there's no sooner timer to expire.
|
||||
|
||||
NOTE: This patch, being faithful to the original commit,
|
||||
introduces a bug (we don't update wall_to_monotonic),
|
||||
which will be resovled by backporting a following fix.
|
||||
|
||||
Original commit message below:
|
||||
|
||||
Since commit 7dffa3c673fbcf835cd7be80bb4aec8ad3f51168 the ntp
|
||||
subsystem has used an hrtimer for triggering the leapsecond
|
||||
adjustment. However, this can cause a potential livelock.
|
||||
|
||||
Thomas diagnosed this as the following pattern:
|
||||
CPU 0 CPU 1
|
||||
do_adjtimex()
|
||||
spin_lock_irq(&ntp_lock);
|
||||
process_adjtimex_modes(); timer_interrupt()
|
||||
process_adj_status(); do_timer()
|
||||
ntp_start_leap_timer(); write_lock(&xtime_lock);
|
||||
hrtimer_start(); update_wall_time();
|
||||
hrtimer_reprogram(); ntp_tick_length()
|
||||
tick_program_event() spin_lock(&ntp_lock);
|
||||
clockevents_program_event()
|
||||
ktime_get()
|
||||
seq = req_seqbegin(xtime_lock);
|
||||
|
||||
This patch tries to avoid the problem by reverting back to not using
|
||||
an hrtimer to inject leapseconds, and instead we handle the leapsecond
|
||||
processing in the second_overflow() function.
|
||||
|
||||
The downside to this change is that on systems that support highres
|
||||
timers, the leap second processing will occur on a HZ tick boundary,
|
||||
(ie: ~1-10ms, depending on HZ) after the leap second instead of
|
||||
possibly sooner (~34us in my tests w/ x86_64 lapic).
|
||||
|
||||
This patch applies on top of tip/timers/core.
|
||||
|
||||
CC: Sasha Levin <levinsasha928@gmail.com>
|
||||
CC: Thomas Gleixner <tglx@linutronix.de>
|
||||
Reported-by: Sasha Levin <levinsasha928@gmail.com>
|
||||
Diagnoised-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Tested-by: Sasha Levin <levinsasha928@gmail.com>
|
||||
Cc: Prarit Bhargava <prarit@redhat.com>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Linux Kernel <linux-kernel@vger.kernel.org>
|
||||
Signed-off-by: John Stultz <john.stultz@linaro.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
include/linux/timex.h | 2 +-
|
||||
kernel/time/ntp.c | 122 +++++++++++++++------------------------------
|
||||
kernel/time/timekeeping.c | 18 +++----
|
||||
3 files changed, 48 insertions(+), 94 deletions(-)
|
||||
|
||||
diff --git a/include/linux/timex.h b/include/linux/timex.h
|
||||
index aa60fe7..08e90fb 100644
|
||||
--- a/include/linux/timex.h
|
||||
+++ b/include/linux/timex.h
|
||||
@@ -266,7 +266,7 @@ static inline int ntp_synced(void)
|
||||
/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
|
||||
extern u64 tick_length;
|
||||
|
||||
-extern void second_overflow(void);
|
||||
+extern int second_overflow(unsigned long secs);
|
||||
extern void update_ntp_one_tick(void);
|
||||
extern int do_adjtimex(struct timex *);
|
||||
extern void hardpps(const struct timespec *, const struct timespec *);
|
||||
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
|
||||
index 4b85a7a..4508f7f 100644
|
||||
--- a/kernel/time/ntp.c
|
||||
+++ b/kernel/time/ntp.c
|
||||
@@ -31,8 +31,6 @@ unsigned long tick_nsec;
|
||||
u64 tick_length;
|
||||
static u64 tick_length_base;
|
||||
|
||||
-static struct hrtimer leap_timer;
|
||||
-
|
||||
#define MAX_TICKADJ 500LL /* usecs */
|
||||
#define MAX_TICKADJ_SCALED \
|
||||
(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
|
||||
@@ -350,60 +348,60 @@ void ntp_clear(void)
|
||||
}
|
||||
|
||||
/*
|
||||
- * Leap second processing. If in leap-insert state at the end of the
|
||||
- * day, the system clock is set back one second; if in leap-delete
|
||||
- * state, the system clock is set ahead one second.
|
||||
+ * this routine handles the overflow of the microsecond field
|
||||
+ *
|
||||
+ * The tricky bits of code to handle the accurate clock support
|
||||
+ * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
|
||||
+ * They were originally developed for SUN and DEC kernels.
|
||||
+ * All the kudos should go to Dave for this stuff.
|
||||
+ *
|
||||
+ * Also handles leap second processing, and returns leap offset
|
||||
*/
|
||||
-static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
|
||||
+int second_overflow(unsigned long secs)
|
||||
{
|
||||
- enum hrtimer_restart res = HRTIMER_NORESTART;
|
||||
-
|
||||
- write_seqlock(&xtime_lock);
|
||||
+ int leap = 0;
|
||||
+ s64 delta;
|
||||
|
||||
+ /*
|
||||
+ * Leap second processing. If in leap-insert state at the end of the
|
||||
+ * day, the system clock is set back one second; if in leap-delete
|
||||
+ * state, the system clock is set ahead one second.
|
||||
+ */
|
||||
switch (time_state) {
|
||||
case TIME_OK:
|
||||
+ if (time_status & STA_INS)
|
||||
+ time_state = TIME_INS;
|
||||
+ else if (time_status & STA_DEL)
|
||||
+ time_state = TIME_DEL;
|
||||
break;
|
||||
case TIME_INS:
|
||||
- timekeeping_leap_insert(-1);
|
||||
- time_state = TIME_OOP;
|
||||
- printk(KERN_NOTICE
|
||||
- "Clock: inserting leap second 23:59:60 UTC\n");
|
||||
- hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC);
|
||||
- res = HRTIMER_RESTART;
|
||||
+ if (secs % 86400 == 0) {
|
||||
+ leap = -1;
|
||||
+ time_state = TIME_OOP;
|
||||
+ printk(KERN_NOTICE
|
||||
+ "Clock: inserting leap second 23:59:60 UTC\n");
|
||||
+ }
|
||||
break;
|
||||
case TIME_DEL:
|
||||
- timekeeping_leap_insert(1);
|
||||
- time_tai--;
|
||||
- time_state = TIME_WAIT;
|
||||
- printk(KERN_NOTICE
|
||||
- "Clock: deleting leap second 23:59:59 UTC\n");
|
||||
+ if ((secs + 1) % 86400 == 0) {
|
||||
+ leap = 1;
|
||||
+ time_tai--;
|
||||
+ time_state = TIME_WAIT;
|
||||
+ printk(KERN_NOTICE
|
||||
+ "Clock: deleting leap second 23:59:59 UTC\n");
|
||||
+ }
|
||||
break;
|
||||
case TIME_OOP:
|
||||
time_tai++;
|
||||
time_state = TIME_WAIT;
|
||||
- /* fall through */
|
||||
+ break;
|
||||
+
|
||||
case TIME_WAIT:
|
||||
if (!(time_status & (STA_INS | STA_DEL)))
|
||||
time_state = TIME_OK;
|
||||
break;
|
||||
}
|
||||
|
||||
- write_sequnlock(&xtime_lock);
|
||||
-
|
||||
- return res;
|
||||
-}
|
||||
-
|
||||
-/*
|
||||
- * this routine handles the overflow of the microsecond field
|
||||
- *
|
||||
- * The tricky bits of code to handle the accurate clock support
|
||||
- * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
|
||||
- * They were originally developed for SUN and DEC kernels.
|
||||
- * All the kudos should go to Dave for this stuff.
|
||||
- */
|
||||
-void second_overflow(void)
|
||||
-{
|
||||
- s64 delta;
|
||||
|
||||
/* Bump the maxerror field */
|
||||
time_maxerror += MAXFREQ / NSEC_PER_USEC;
|
||||
@@ -423,23 +421,25 @@ void second_overflow(void)
|
||||
pps_dec_valid();
|
||||
|
||||
if (!time_adjust)
|
||||
- return;
|
||||
+ goto out;
|
||||
|
||||
if (time_adjust > MAX_TICKADJ) {
|
||||
time_adjust -= MAX_TICKADJ;
|
||||
tick_length += MAX_TICKADJ_SCALED;
|
||||
- return;
|
||||
+ goto out;
|
||||
}
|
||||
|
||||
if (time_adjust < -MAX_TICKADJ) {
|
||||
time_adjust += MAX_TICKADJ;
|
||||
tick_length -= MAX_TICKADJ_SCALED;
|
||||
- return;
|
||||
+ goto out;
|
||||
}
|
||||
|
||||
tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
|
||||
<< NTP_SCALE_SHIFT;
|
||||
time_adjust = 0;
|
||||
+out:
|
||||
+ return leap;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_GENERIC_CMOS_UPDATE
|
||||
@@ -501,27 +501,6 @@ static void notify_cmos_timer(void)
|
||||
static inline void notify_cmos_timer(void) { }
|
||||
#endif
|
||||
|
||||
-/*
|
||||
- * Start the leap seconds timer:
|
||||
- */
|
||||
-static inline void ntp_start_leap_timer(struct timespec *ts)
|
||||
-{
|
||||
- long now = ts->tv_sec;
|
||||
-
|
||||
- if (time_status & STA_INS) {
|
||||
- time_state = TIME_INS;
|
||||
- now += 86400 - now % 86400;
|
||||
- hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
|
||||
-
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- if (time_status & STA_DEL) {
|
||||
- time_state = TIME_DEL;
|
||||
- now += 86400 - (now + 1) % 86400;
|
||||
- hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
|
||||
- }
|
||||
-}
|
||||
|
||||
/*
|
||||
* Propagate a new txc->status value into the NTP state:
|
||||
@@ -546,22 +525,6 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
|
||||
time_status &= STA_RONLY;
|
||||
time_status |= txc->status & ~STA_RONLY;
|
||||
|
||||
- switch (time_state) {
|
||||
- case TIME_OK:
|
||||
- ntp_start_leap_timer(ts);
|
||||
- break;
|
||||
- case TIME_INS:
|
||||
- case TIME_DEL:
|
||||
- time_state = TIME_OK;
|
||||
- ntp_start_leap_timer(ts);
|
||||
- case TIME_WAIT:
|
||||
- if (!(time_status & (STA_INS | STA_DEL)))
|
||||
- time_state = TIME_OK;
|
||||
- break;
|
||||
- case TIME_OOP:
|
||||
- hrtimer_restart(&leap_timer);
|
||||
- break;
|
||||
- }
|
||||
}
|
||||
/*
|
||||
* Called with the xtime lock held, so we can access and modify
|
||||
@@ -643,9 +606,6 @@ int do_adjtimex(struct timex *txc)
|
||||
(txc->tick < 900000/USER_HZ ||
|
||||
txc->tick > 1100000/USER_HZ))
|
||||
return -EINVAL;
|
||||
-
|
||||
- if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
|
||||
- hrtimer_cancel(&leap_timer);
|
||||
}
|
||||
|
||||
if (txc->modes & ADJ_SETOFFSET) {
|
||||
@@ -967,6 +927,4 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup);
|
||||
void __init ntp_init(void)
|
||||
{
|
||||
ntp_clear();
|
||||
- hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
|
||||
- leap_timer.function = ntp_leap_second;
|
||||
}
|
||||
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
|
||||
index 2378413..4780a7d 100644
|
||||
--- a/kernel/time/timekeeping.c
|
||||
+++ b/kernel/time/timekeeping.c
|
||||
@@ -169,15 +169,6 @@ static struct timespec raw_time;
|
||||
/* flag for if timekeeping is suspended */
|
||||
int __read_mostly timekeeping_suspended;
|
||||
|
||||
-/* must hold xtime_lock */
|
||||
-void timekeeping_leap_insert(int leapsecond)
|
||||
-{
|
||||
- xtime.tv_sec += leapsecond;
|
||||
- wall_to_monotonic.tv_sec -= leapsecond;
|
||||
- update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
|
||||
- timekeeper.mult);
|
||||
-}
|
||||
-
|
||||
/**
|
||||
* timekeeping_forward_now - update clock to the current time
|
||||
*
|
||||
@@ -942,9 +933,11 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
|
||||
|
||||
timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
|
||||
while (timekeeper.xtime_nsec >= nsecps) {
|
||||
+ int leap;
|
||||
timekeeper.xtime_nsec -= nsecps;
|
||||
xtime.tv_sec++;
|
||||
- second_overflow();
|
||||
+ leap = second_overflow(xtime.tv_sec);
|
||||
+ xtime.tv_sec += leap;
|
||||
}
|
||||
|
||||
/* Accumulate raw time */
|
||||
@@ -1050,9 +1043,12 @@ static void update_wall_time(void)
|
||||
* xtime.tv_nsec isn't larger then NSEC_PER_SEC
|
||||
*/
|
||||
if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) {
|
||||
+ int leap;
|
||||
xtime.tv_nsec -= NSEC_PER_SEC;
|
||||
xtime.tv_sec++;
|
||||
- second_overflow();
|
||||
+ leap = second_overflow(xtime.tv_sec);
|
||||
+ xtime.tv_sec += leap;
|
||||
+
|
||||
}
|
||||
|
||||
/* check to see if there is a new clocksource to use */
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+42
@@ -0,0 +1,42 @@
|
||||
From 106227da17ad8a279e7e104b8592ada4e81dbd8b Mon Sep 17 00:00:00 2001
|
||||
From: Richard Cochran <richardcochran@gmail.com>
|
||||
Date: Thu, 26 Apr 2012 14:11:32 +0200
|
||||
Subject: [PATCH 075/109] ntp: Correct TAI offset during leap second
|
||||
|
||||
commit dd48d708ff3e917f6d6b6c2b696c3f18c019feed upstream.
|
||||
|
||||
When repeating a UTC time value during a leap second (when the UTC
|
||||
time should be 23:59:60), the TAI timescale should not stop. The kernel
|
||||
NTP code increments the TAI offset one second too late. This patch fixes
|
||||
the issue by incrementing the offset during the leap second itself.
|
||||
|
||||
Signed-off-by: Richard Cochran <richardcochran@gmail.com>
|
||||
Signed-off-by: John Stultz <john.stultz@linaro.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
kernel/time/ntp.c | 2 +-
|
||||
1 files changed, 1 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
|
||||
index 4508f7f..f1eb182 100644
|
||||
--- a/kernel/time/ntp.c
|
||||
+++ b/kernel/time/ntp.c
|
||||
@@ -378,6 +378,7 @@ int second_overflow(unsigned long secs)
|
||||
if (secs % 86400 == 0) {
|
||||
leap = -1;
|
||||
time_state = TIME_OOP;
|
||||
+ time_tai++;
|
||||
printk(KERN_NOTICE
|
||||
"Clock: inserting leap second 23:59:60 UTC\n");
|
||||
}
|
||||
@@ -392,7 +393,6 @@ int second_overflow(unsigned long secs)
|
||||
}
|
||||
break;
|
||||
case TIME_OOP:
|
||||
- time_tai++;
|
||||
time_state = TIME_WAIT;
|
||||
break;
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+53
@@ -0,0 +1,53 @@
|
||||
From 7a063ddaad98f05f7976e0e8c9c1455cc9d0f5da Mon Sep 17 00:00:00 2001
|
||||
From: John Stultz <john.stultz@linaro.org>
|
||||
Date: Wed, 30 May 2012 10:54:57 -0700
|
||||
Subject: [PATCH 076/109] timekeeping: Fix CLOCK_MONOTONIC inconsistency
|
||||
during leapsecond
|
||||
|
||||
This is a backport of fad0c66c4bb836d57a5f125ecd38bed653ca863a
|
||||
which resolves a bug the previous commit.
|
||||
|
||||
Commit 6b43ae8a61 (ntp: Fix leap-second hrtimer livelock) broke the
|
||||
leapsecond update of CLOCK_MONOTONIC. The missing leapsecond update to
|
||||
wall_to_monotonic causes discontinuities in CLOCK_MONOTONIC.
|
||||
|
||||
Adjust wall_to_monotonic when NTP inserted a leapsecond.
|
||||
|
||||
Reported-by: Richard Cochran <richardcochran@gmail.com>
|
||||
Signed-off-by: John Stultz <john.stultz@linaro.org>
|
||||
Tested-by: Richard Cochran <richardcochran@gmail.com>
|
||||
Link: http://lkml.kernel.org/r/1338400497-12420-1-git-send-email-john.stultz@linaro.org
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Prarit Bhargava <prarit@redhat.com>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Linux Kernel <linux-kernel@vger.kernel.org>
|
||||
Signed-off-by: John Stultz <johnstul@us.ibm.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
kernel/time/timekeeping.c | 3 ++-
|
||||
1 files changed, 2 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
|
||||
index 4780a7d..5c9b67e 100644
|
||||
--- a/kernel/time/timekeeping.c
|
||||
+++ b/kernel/time/timekeeping.c
|
||||
@@ -938,6 +938,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
|
||||
xtime.tv_sec++;
|
||||
leap = second_overflow(xtime.tv_sec);
|
||||
xtime.tv_sec += leap;
|
||||
+ wall_to_monotonic.tv_sec -= leap;
|
||||
}
|
||||
|
||||
/* Accumulate raw time */
|
||||
@@ -1048,7 +1049,7 @@ static void update_wall_time(void)
|
||||
xtime.tv_sec++;
|
||||
leap = second_overflow(xtime.tv_sec);
|
||||
xtime.tv_sec += leap;
|
||||
-
|
||||
+ wall_to_monotonic.tv_sec -= leap;
|
||||
}
|
||||
|
||||
/* check to see if there is a new clocksource to use */
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+99
@@ -0,0 +1,99 @@
|
||||
From 540e83f9da352839ff29ce5445fc499de8d54570 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 13 Nov 2011 23:19:49 +0000
|
||||
Subject: [PATCH 077/109] time: Move common updates to a function
|
||||
|
||||
This is a backport of cc06268c6a87db156af2daed6e96a936b955cc82
|
||||
|
||||
[John Stultz: While not a bugfix itself, it allows following fixes
|
||||
to backport in a more straightforward manner.]
|
||||
|
||||
CC: Thomas Gleixner <tglx@linutronix.de>
|
||||
CC: Eric Dumazet <eric.dumazet@gmail.com>
|
||||
CC: Richard Cochran <richardcochran@gmail.com>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Prarit Bhargava <prarit@redhat.com>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Linux Kernel <linux-kernel@vger.kernel.org>
|
||||
Signed-off-by: John Stultz <john.stultz@linaro.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
kernel/time/timekeeping.c | 34 +++++++++++++++++-----------------
|
||||
1 files changed, 17 insertions(+), 17 deletions(-)
|
||||
|
||||
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
|
||||
index 5c9b67e..5d55185 100644
|
||||
--- a/kernel/time/timekeeping.c
|
||||
+++ b/kernel/time/timekeeping.c
|
||||
@@ -166,6 +166,19 @@ static struct timespec total_sleep_time;
|
||||
*/
|
||||
static struct timespec raw_time;
|
||||
|
||||
+/* must hold write on xtime_lock */
|
||||
+static void timekeeping_update(bool clearntp)
|
||||
+{
|
||||
+ if (clearntp) {
|
||||
+ timekeeper.ntp_error = 0;
|
||||
+ ntp_clear();
|
||||
+ }
|
||||
+ update_vsyscall(&xtime, &wall_to_monotonic,
|
||||
+ timekeeper.clock, timekeeper.mult);
|
||||
+}
|
||||
+
|
||||
+
|
||||
+
|
||||
/* flag for if timekeeping is suspended */
|
||||
int __read_mostly timekeeping_suspended;
|
||||
|
||||
@@ -366,11 +379,7 @@ int do_settimeofday(const struct timespec *tv)
|
||||
|
||||
xtime = *tv;
|
||||
|
||||
- timekeeper.ntp_error = 0;
|
||||
- ntp_clear();
|
||||
-
|
||||
- update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
|
||||
- timekeeper.mult);
|
||||
+ timekeeping_update(true);
|
||||
|
||||
write_sequnlock_irqrestore(&xtime_lock, flags);
|
||||
|
||||
@@ -403,11 +412,7 @@ int timekeeping_inject_offset(struct timespec *ts)
|
||||
xtime = timespec_add(xtime, *ts);
|
||||
wall_to_monotonic = timespec_sub(wall_to_monotonic, *ts);
|
||||
|
||||
- timekeeper.ntp_error = 0;
|
||||
- ntp_clear();
|
||||
-
|
||||
- update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
|
||||
- timekeeper.mult);
|
||||
+ timekeeping_update(true);
|
||||
|
||||
write_sequnlock_irqrestore(&xtime_lock, flags);
|
||||
|
||||
@@ -636,10 +641,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
|
||||
|
||||
__timekeeping_inject_sleeptime(delta);
|
||||
|
||||
- timekeeper.ntp_error = 0;
|
||||
- ntp_clear();
|
||||
- update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
|
||||
- timekeeper.mult);
|
||||
+ timekeeping_update(true);
|
||||
|
||||
write_sequnlock_irqrestore(&xtime_lock, flags);
|
||||
|
||||
@@ -1052,9 +1054,7 @@ static void update_wall_time(void)
|
||||
wall_to_monotonic.tv_sec -= leap;
|
||||
}
|
||||
|
||||
- /* check to see if there is a new clocksource to use */
|
||||
- update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
|
||||
- timekeeper.mult);
|
||||
+ timekeeping_update(false);
|
||||
}
|
||||
|
||||
/**
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+118
@@ -0,0 +1,118 @@
|
||||
From b6da5d5a3a7e128fd17b15dc64fda7c1aea694e6 Mon Sep 17 00:00:00 2001
|
||||
From: John Stultz <johnstul@us.ibm.com>
|
||||
Date: Tue, 10 Jul 2012 18:43:19 -0400
|
||||
Subject: [PATCH 078/109] hrtimer: Provide clock_was_set_delayed()
|
||||
|
||||
commit f55a6faa384304c89cfef162768e88374d3312cb upstream.
|
||||
|
||||
clock_was_set() cannot be called from hard interrupt context because
|
||||
it calls on_each_cpu().
|
||||
|
||||
For fixing the widely reported leap seconds issue it is necessary to
|
||||
call it from hard interrupt context, i.e. the timer tick code, which
|
||||
does the timekeeping updates.
|
||||
|
||||
Provide a new function which denotes it in the hrtimer cpu base
|
||||
structure of the cpu on which it is called and raise the hrtimer
|
||||
softirq. We then execute the clock_was_set() notificiation from
|
||||
softirq context in run_hrtimer_softirq(). The hrtimer softirq is
|
||||
rarely used, so polling the flag there is not a performance issue.
|
||||
|
||||
[ tglx: Made it depend on CONFIG_HIGH_RES_TIMERS. We really should get
|
||||
rid of all this ifdeffery ASAP ]
|
||||
|
||||
Signed-off-by: John Stultz <johnstul@us.ibm.com>
|
||||
Reported-by: Jan Engelhardt <jengelh@inai.de>
|
||||
Reviewed-by: Ingo Molnar <mingo@kernel.org>
|
||||
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
|
||||
Acked-by: Prarit Bhargava <prarit@redhat.com>
|
||||
Link: http://lkml.kernel.org/r/1341960205-56738-2-git-send-email-johnstul@us.ibm.com
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
include/linux/hrtimer.h | 9 ++++++++-
|
||||
kernel/hrtimer.c | 20 ++++++++++++++++++++
|
||||
2 files changed, 28 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
|
||||
index fd0dc30..c9ec940 100644
|
||||
--- a/include/linux/hrtimer.h
|
||||
+++ b/include/linux/hrtimer.h
|
||||
@@ -165,6 +165,7 @@ enum hrtimer_base_type {
|
||||
* @lock: lock protecting the base and associated clock bases
|
||||
* and timers
|
||||
* @active_bases: Bitfield to mark bases with active timers
|
||||
+ * @clock_was_set: Indicates that clock was set from irq context.
|
||||
* @expires_next: absolute time of the next event which was scheduled
|
||||
* via clock_set_next_event()
|
||||
* @hres_active: State of high resolution mode
|
||||
@@ -177,7 +178,8 @@ enum hrtimer_base_type {
|
||||
*/
|
||||
struct hrtimer_cpu_base {
|
||||
raw_spinlock_t lock;
|
||||
- unsigned long active_bases;
|
||||
+ unsigned int active_bases;
|
||||
+ unsigned int clock_was_set;
|
||||
#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
ktime_t expires_next;
|
||||
int hres_active;
|
||||
@@ -286,6 +288,8 @@ extern void hrtimer_peek_ahead_timers(void);
|
||||
# define MONOTONIC_RES_NSEC HIGH_RES_NSEC
|
||||
# define KTIME_MONOTONIC_RES KTIME_HIGH_RES
|
||||
|
||||
+extern void clock_was_set_delayed(void);
|
||||
+
|
||||
#else
|
||||
|
||||
# define MONOTONIC_RES_NSEC LOW_RES_NSEC
|
||||
@@ -306,6 +310,9 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
+
|
||||
+static inline void clock_was_set_delayed(void) { }
|
||||
+
|
||||
#endif
|
||||
|
||||
extern void clock_was_set(void);
|
||||
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
|
||||
index ae34bf5..3c24fb2 100644
|
||||
--- a/kernel/hrtimer.c
|
||||
+++ b/kernel/hrtimer.c
|
||||
@@ -717,6 +717,19 @@ static int hrtimer_switch_to_hres(void)
|
||||
return 1;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Called from timekeeping code to reprogramm the hrtimer interrupt
|
||||
+ * device. If called from the timer interrupt context we defer it to
|
||||
+ * softirq context.
|
||||
+ */
|
||||
+void clock_was_set_delayed(void)
|
||||
+{
|
||||
+ struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
|
||||
+
|
||||
+ cpu_base->clock_was_set = 1;
|
||||
+ __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
|
||||
+}
|
||||
+
|
||||
#else
|
||||
|
||||
static inline int hrtimer_hres_active(void) { return 0; }
|
||||
@@ -1395,6 +1408,13 @@ void hrtimer_peek_ahead_timers(void)
|
||||
|
||||
static void run_hrtimer_softirq(struct softirq_action *h)
|
||||
{
|
||||
+ struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
|
||||
+
|
||||
+ if (cpu_base->clock_was_set) {
|
||||
+ cpu_base->clock_was_set = 0;
|
||||
+ clock_was_set();
|
||||
+ }
|
||||
+
|
||||
hrtimer_peek_ahead_timers();
|
||||
}
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+66
@@ -0,0 +1,66 @@
|
||||
From 61642041369832a7f8d29d27fcd9e88d523f3163 Mon Sep 17 00:00:00 2001
|
||||
From: John Stultz <johnstul@us.ibm.com>
|
||||
Date: Tue, 10 Jul 2012 18:43:20 -0400
|
||||
Subject: [PATCH 079/109] timekeeping: Fix leapsecond triggered load spike
|
||||
issue
|
||||
|
||||
This is a backport of 4873fa070ae84a4115f0b3c9dfabc224f1bc7c51
|
||||
|
||||
The timekeeping code misses an update of the hrtimer subsystem after a
|
||||
leap second happened. Due to that timers based on CLOCK_REALTIME are
|
||||
either expiring a second early or late depending on whether a leap
|
||||
second has been inserted or deleted until an operation is initiated
|
||||
which causes that update. Unless the update happens by some other
|
||||
means this discrepancy between the timekeeping and the hrtimer data
|
||||
stays forever and timers are expired either early or late.
|
||||
|
||||
The reported immediate workaround - $ data -s "`date`" - is causing a
|
||||
call to clock_was_set() which updates the hrtimer data structures.
|
||||
See: http://www.sheeri.com/content/mysql-and-leap-second-high-cpu-and-fix
|
||||
|
||||
Add the missing clock_was_set() call to update_wall_time() in case of
|
||||
a leap second event. The actual update is deferred to softirq context
|
||||
as the necessary smp function call cannot be invoked from hard
|
||||
interrupt context.
|
||||
|
||||
Signed-off-by: John Stultz <johnstul@us.ibm.com>
|
||||
Reported-by: Jan Engelhardt <jengelh@inai.de>
|
||||
Reviewed-by: Ingo Molnar <mingo@kernel.org>
|
||||
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
|
||||
Acked-by: Prarit Bhargava <prarit@redhat.com>
|
||||
Link: http://lkml.kernel.org/r/1341960205-56738-3-git-send-email-johnstul@us.ibm.com
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Prarit Bhargava <prarit@redhat.com>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Linux Kernel <linux-kernel@vger.kernel.org>
|
||||
Signed-off-by: John Stultz <johnstul@us.ibm.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
kernel/time/timekeeping.c | 4 ++++
|
||||
1 files changed, 4 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
|
||||
index 5d55185..8958ad7 100644
|
||||
--- a/kernel/time/timekeeping.c
|
||||
+++ b/kernel/time/timekeeping.c
|
||||
@@ -941,6 +941,8 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
|
||||
leap = second_overflow(xtime.tv_sec);
|
||||
xtime.tv_sec += leap;
|
||||
wall_to_monotonic.tv_sec -= leap;
|
||||
+ if (leap)
|
||||
+ clock_was_set_delayed();
|
||||
}
|
||||
|
||||
/* Accumulate raw time */
|
||||
@@ -1052,6 +1054,8 @@ static void update_wall_time(void)
|
||||
leap = second_overflow(xtime.tv_sec);
|
||||
xtime.tv_sec += leap;
|
||||
wall_to_monotonic.tv_sec -= leap;
|
||||
+ if (leap)
|
||||
+ clock_was_set_delayed();
|
||||
}
|
||||
|
||||
timekeeping_update(false);
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+104
@@ -0,0 +1,104 @@
|
||||
From 711cebfd4050d5a41606f9f8ad56986d0377df08 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 10 Jul 2012 18:43:21 -0400
|
||||
Subject: [PATCH 080/109] timekeeping: Maintain ktime_t based offsets for
|
||||
hrtimers
|
||||
|
||||
This is a backport of 5b9fe759a678e05be4937ddf03d50e950207c1c0
|
||||
|
||||
We need to update the hrtimer clock offsets from the hrtimer interrupt
|
||||
context. To avoid conversions from timespec to ktime_t maintain a
|
||||
ktime_t based representation of those offsets in the timekeeper. This
|
||||
puts the conversion overhead into the code which updates the
|
||||
underlying offsets and provides fast accessible values in the hrtimer
|
||||
interrupt.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: John Stultz <johnstul@us.ibm.com>
|
||||
Reviewed-by: Ingo Molnar <mingo@kernel.org>
|
||||
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
|
||||
Acked-by: Prarit Bhargava <prarit@redhat.com>
|
||||
Link: http://lkml.kernel.org/r/1341960205-56738-4-git-send-email-johnstul@us.ibm.com
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
[John Stultz: Backported to 3.2]
|
||||
Cc: Prarit Bhargava <prarit@redhat.com>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Linux Kernel <linux-kernel@vger.kernel.org>
|
||||
Signed-off-by: John Stultz <johnstul@us.ibm.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
kernel/time/timekeeping.c | 25 ++++++++++++++++++++++++-
|
||||
1 files changed, 24 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
|
||||
index 8958ad7..d5d0e5d 100644
|
||||
--- a/kernel/time/timekeeping.c
|
||||
+++ b/kernel/time/timekeeping.c
|
||||
@@ -161,18 +161,34 @@ static struct timespec xtime __attribute__ ((aligned (16)));
|
||||
static struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
|
||||
static struct timespec total_sleep_time;
|
||||
|
||||
+/* Offset clock monotonic -> clock realtime */
|
||||
+static ktime_t offs_real;
|
||||
+
|
||||
+/* Offset clock monotonic -> clock boottime */
|
||||
+static ktime_t offs_boot;
|
||||
+
|
||||
/*
|
||||
* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock.
|
||||
*/
|
||||
static struct timespec raw_time;
|
||||
|
||||
/* must hold write on xtime_lock */
|
||||
+static void update_rt_offset(void)
|
||||
+{
|
||||
+ struct timespec tmp, *wtm = &wall_to_monotonic;
|
||||
+
|
||||
+ set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec);
|
||||
+ offs_real = timespec_to_ktime(tmp);
|
||||
+}
|
||||
+
|
||||
+/* must hold write on xtime_lock */
|
||||
static void timekeeping_update(bool clearntp)
|
||||
{
|
||||
if (clearntp) {
|
||||
timekeeper.ntp_error = 0;
|
||||
ntp_clear();
|
||||
}
|
||||
+ update_rt_offset();
|
||||
update_vsyscall(&xtime, &wall_to_monotonic,
|
||||
timekeeper.clock, timekeeper.mult);
|
||||
}
|
||||
@@ -587,6 +603,7 @@ void __init timekeeping_init(void)
|
||||
}
|
||||
set_normalized_timespec(&wall_to_monotonic,
|
||||
-boot.tv_sec, -boot.tv_nsec);
|
||||
+ update_rt_offset();
|
||||
total_sleep_time.tv_sec = 0;
|
||||
total_sleep_time.tv_nsec = 0;
|
||||
write_sequnlock_irqrestore(&xtime_lock, flags);
|
||||
@@ -595,6 +612,12 @@ void __init timekeeping_init(void)
|
||||
/* time in seconds when suspend began */
|
||||
static struct timespec timekeeping_suspend_time;
|
||||
|
||||
+static void update_sleep_time(struct timespec t)
|
||||
+{
|
||||
+ total_sleep_time = t;
|
||||
+ offs_boot = timespec_to_ktime(t);
|
||||
+}
|
||||
+
|
||||
/**
|
||||
* __timekeeping_inject_sleeptime - Internal function to add sleep interval
|
||||
* @delta: pointer to a timespec delta value
|
||||
@@ -612,7 +635,7 @@ static void __timekeeping_inject_sleeptime(struct timespec *delta)
|
||||
|
||||
xtime = timespec_add(xtime, *delta);
|
||||
wall_to_monotonic = timespec_sub(wall_to_monotonic, *delta);
|
||||
- total_sleep_time = timespec_add(total_sleep_time, *delta);
|
||||
+ update_sleep_time(timespec_add(total_sleep_time, *delta));
|
||||
}
|
||||
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+61
@@ -0,0 +1,61 @@
|
||||
From 6783d1f7e46f4450b489d970bbf4d62db9296c1f Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 10 Jul 2012 18:43:23 -0400
|
||||
Subject: [PATCH 081/109] hrtimers: Move lock held region in
|
||||
hrtimer_interrupt()
|
||||
|
||||
commit 196951e91262fccda81147d2bcf7fdab08668b40 upstream.
|
||||
|
||||
We need to update the base offsets from this code and we need to do
|
||||
that under base->lock. Move the lock held region around the
|
||||
ktime_get() calls. The ktime_get() calls are going to be replaced with
|
||||
a function which gets the time and the offsets atomically.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Reviewed-by: Ingo Molnar <mingo@kernel.org>
|
||||
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
|
||||
Acked-by: Prarit Bhargava <prarit@redhat.com>
|
||||
Signed-off-by: John Stultz <johnstul@us.ibm.com>
|
||||
Link: http://lkml.kernel.org/r/1341960205-56738-6-git-send-email-johnstul@us.ibm.com
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
kernel/hrtimer.c | 5 +++--
|
||||
1 files changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
|
||||
index 3c24fb2..8f320af 100644
|
||||
--- a/kernel/hrtimer.c
|
||||
+++ b/kernel/hrtimer.c
|
||||
@@ -1263,11 +1263,10 @@ void hrtimer_interrupt(struct clock_event_device *dev)
|
||||
cpu_base->nr_events++;
|
||||
dev->next_event.tv64 = KTIME_MAX;
|
||||
|
||||
+ raw_spin_lock(&cpu_base->lock);
|
||||
entry_time = now = ktime_get();
|
||||
retry:
|
||||
expires_next.tv64 = KTIME_MAX;
|
||||
-
|
||||
- raw_spin_lock(&cpu_base->lock);
|
||||
/*
|
||||
* We set expires_next to KTIME_MAX here with cpu_base->lock
|
||||
* held to prevent that a timer is enqueued in our queue via
|
||||
@@ -1344,6 +1343,7 @@ retry:
|
||||
* interrupt routine. We give it 3 attempts to avoid
|
||||
* overreacting on some spurious event.
|
||||
*/
|
||||
+ raw_spin_lock(&cpu_base->lock);
|
||||
now = ktime_get();
|
||||
cpu_base->nr_retries++;
|
||||
if (++retries < 3)
|
||||
@@ -1356,6 +1356,7 @@ retry:
|
||||
*/
|
||||
cpu_base->nr_hangs++;
|
||||
cpu_base->hang_detected = 1;
|
||||
+ raw_spin_unlock(&cpu_base->lock);
|
||||
delta = ktime_sub(now, entry_time);
|
||||
if (delta.tv64 > cpu_base->max_hang_time.tv64)
|
||||
cpu_base->max_hang_time = delta;
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+94
@@ -0,0 +1,94 @@
|
||||
From d6cadfb2bf29a5913562d3f63c49a937bc98540d Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 10 Jul 2012 18:43:24 -0400
|
||||
Subject: [PATCH 082/109] timekeeping: Provide hrtimer update function
|
||||
|
||||
This is a backport of f6c06abfb3972ad4914cef57d8348fcb2932bc3b
|
||||
|
||||
To finally fix the infamous leap second issue and other race windows
|
||||
caused by functions which change the offsets between the various time
|
||||
bases (CLOCK_MONOTONIC, CLOCK_REALTIME and CLOCK_BOOTTIME) we need a
|
||||
function which atomically gets the current monotonic time and updates
|
||||
the offsets of CLOCK_REALTIME and CLOCK_BOOTTIME with minimalistic
|
||||
overhead. The previous patch which provides ktime_t offsets allows us
|
||||
to make this function almost as cheap as ktime_get() which is going to
|
||||
be replaced in hrtimer_interrupt().
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Reviewed-by: Ingo Molnar <mingo@kernel.org>
|
||||
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
|
||||
Acked-by: Prarit Bhargava <prarit@redhat.com>
|
||||
Signed-off-by: John Stultz <johnstul@us.ibm.com>
|
||||
Link: http://lkml.kernel.org/r/1341960205-56738-7-git-send-email-johnstul@us.ibm.com
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
[John Stultz: Backported to 3.2]
|
||||
Cc: Prarit Bhargava <prarit@redhat.com>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Linux Kernel <linux-kernel@vger.kernel.org>
|
||||
Signed-off-by: John Stultz <johnstul@us.ibm.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
include/linux/hrtimer.h | 1 +
|
||||
kernel/time/timekeeping.c | 34 ++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 35 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
|
||||
index c9ec940..cc07d27 100644
|
||||
--- a/include/linux/hrtimer.h
|
||||
+++ b/include/linux/hrtimer.h
|
||||
@@ -327,6 +327,7 @@ extern ktime_t ktime_get(void);
|
||||
extern ktime_t ktime_get_real(void);
|
||||
extern ktime_t ktime_get_boottime(void);
|
||||
extern ktime_t ktime_get_monotonic_offset(void);
|
||||
+extern ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot);
|
||||
|
||||
DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
|
||||
|
||||
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
|
||||
index d5d0e5d..4938c5e 100644
|
||||
--- a/kernel/time/timekeeping.c
|
||||
+++ b/kernel/time/timekeeping.c
|
||||
@@ -1240,6 +1240,40 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
|
||||
} while (read_seqretry(&xtime_lock, seq));
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
+/**
|
||||
+ * ktime_get_update_offsets - hrtimer helper
|
||||
+ * @real: pointer to storage for monotonic -> realtime offset
|
||||
+ * @_boot: pointer to storage for monotonic -> boottime offset
|
||||
+ *
|
||||
+ * Returns current monotonic time and updates the offsets
|
||||
+ * Called from hrtimer_interupt() or retrigger_next_event()
|
||||
+ */
|
||||
+ktime_t ktime_get_update_offsets(ktime_t *real, ktime_t *boot)
|
||||
+{
|
||||
+ ktime_t now;
|
||||
+ unsigned int seq;
|
||||
+ u64 secs, nsecs;
|
||||
+
|
||||
+ do {
|
||||
+ seq = read_seqbegin(&xtime_lock);
|
||||
+
|
||||
+ secs = xtime.tv_sec;
|
||||
+ nsecs = xtime.tv_nsec;
|
||||
+ nsecs += timekeeping_get_ns();
|
||||
+ /* If arch requires, add in gettimeoffset() */
|
||||
+ nsecs += arch_gettimeoffset();
|
||||
+
|
||||
+ *real = offs_real;
|
||||
+ *boot = offs_boot;
|
||||
+ } while (read_seqretry(&xtime_lock, seq));
|
||||
+
|
||||
+ now = ktime_add_ns(ktime_set(secs, 0), nsecs);
|
||||
+ now = ktime_sub(now, *real);
|
||||
+ return now;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
/**
|
||||
* ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format
|
||||
*/
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+125
@@ -0,0 +1,125 @@
|
||||
From 532c526ac9349430b41f6a16f32fc808c4270647 Mon Sep 17 00:00:00 2001
|
||||
From: John Stultz <johnstul@us.ibm.com>
|
||||
Date: Tue, 10 Jul 2012 18:43:25 -0400
|
||||
Subject: [PATCH 083/109] hrtimer: Update hrtimer base offsets each
|
||||
hrtimer_interrupt
|
||||
|
||||
commit 5baefd6d84163443215f4a99f6a20f054ef11236 upstream.
|
||||
|
||||
The update of the hrtimer base offsets on all cpus cannot be made
|
||||
atomically from the timekeeper.lock held and interrupt disabled region
|
||||
as smp function calls are not allowed there.
|
||||
|
||||
clock_was_set(), which enforces the update on all cpus, is called
|
||||
either from preemptible process context in case of do_settimeofday()
|
||||
or from the softirq context when the offset modification happened in
|
||||
the timer interrupt itself due to a leap second.
|
||||
|
||||
In both cases there is a race window for an hrtimer interrupt between
|
||||
dropping timekeeper lock, enabling interrupts and clock_was_set()
|
||||
issuing the updates. Any interrupt which arrives in that window will
|
||||
see the new time but operate on stale offsets.
|
||||
|
||||
So we need to make sure that an hrtimer interrupt always sees a
|
||||
consistent state of time and offsets.
|
||||
|
||||
ktime_get_update_offsets() allows us to get the current monotonic time
|
||||
and update the per cpu hrtimer base offsets from hrtimer_interrupt()
|
||||
to capture a consistent state of monotonic time and the offsets. The
|
||||
function replaces the existing ktime_get() calls in hrtimer_interrupt().
|
||||
|
||||
The overhead of the new function vs. ktime_get() is minimal as it just
|
||||
adds two store operations.
|
||||
|
||||
This ensures that any changes to realtime or boottime offsets are
|
||||
noticed and stored into the per-cpu hrtimer base structures, prior to
|
||||
any hrtimer expiration and guarantees that timers are not expired early.
|
||||
|
||||
Signed-off-by: John Stultz <johnstul@us.ibm.com>
|
||||
Reviewed-by: Ingo Molnar <mingo@kernel.org>
|
||||
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
|
||||
Acked-by: Prarit Bhargava <prarit@redhat.com>
|
||||
Link: http://lkml.kernel.org/r/1341960205-56738-8-git-send-email-johnstul@us.ibm.com
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
kernel/hrtimer.c | 28 ++++++++++++++--------------
|
||||
1 files changed, 14 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
|
||||
index 8f320af..6db7a5e 100644
|
||||
--- a/kernel/hrtimer.c
|
||||
+++ b/kernel/hrtimer.c
|
||||
@@ -657,6 +657,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
|
||||
+{
|
||||
+ ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
|
||||
+ ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
|
||||
+
|
||||
+ return ktime_get_update_offsets(offs_real, offs_boot);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Retrigger next event is called after clock was set
|
||||
*
|
||||
@@ -665,22 +673,12 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
|
||||
static void retrigger_next_event(void *arg)
|
||||
{
|
||||
struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
|
||||
- struct timespec realtime_offset, xtim, wtm, sleep;
|
||||
|
||||
if (!hrtimer_hres_active())
|
||||
return;
|
||||
|
||||
- /* Optimized out for !HIGH_RES */
|
||||
- get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
|
||||
- set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
|
||||
-
|
||||
- /* Adjust CLOCK_REALTIME offset */
|
||||
raw_spin_lock(&base->lock);
|
||||
- base->clock_base[HRTIMER_BASE_REALTIME].offset =
|
||||
- timespec_to_ktime(realtime_offset);
|
||||
- base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
|
||||
- timespec_to_ktime(sleep);
|
||||
-
|
||||
+ hrtimer_update_base(base);
|
||||
hrtimer_force_reprogram(base, 0);
|
||||
raw_spin_unlock(&base->lock);
|
||||
}
|
||||
@@ -710,7 +708,6 @@ static int hrtimer_switch_to_hres(void)
|
||||
base->clock_base[i].resolution = KTIME_HIGH_RES;
|
||||
|
||||
tick_setup_sched_timer();
|
||||
-
|
||||
/* "Retrigger" the interrupt to get things going */
|
||||
retrigger_next_event(NULL);
|
||||
local_irq_restore(flags);
|
||||
@@ -1264,7 +1261,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
|
||||
dev->next_event.tv64 = KTIME_MAX;
|
||||
|
||||
raw_spin_lock(&cpu_base->lock);
|
||||
- entry_time = now = ktime_get();
|
||||
+ entry_time = now = hrtimer_update_base(cpu_base);
|
||||
retry:
|
||||
expires_next.tv64 = KTIME_MAX;
|
||||
/*
|
||||
@@ -1342,9 +1339,12 @@ retry:
|
||||
* We need to prevent that we loop forever in the hrtimer
|
||||
* interrupt routine. We give it 3 attempts to avoid
|
||||
* overreacting on some spurious event.
|
||||
+ *
|
||||
+ * Acquire base lock for updating the offsets and retrieving
|
||||
+ * the current time.
|
||||
*/
|
||||
raw_spin_lock(&cpu_base->lock);
|
||||
- now = ktime_get();
|
||||
+ now = hrtimer_update_base(cpu_base);
|
||||
cpu_base->nr_retries++;
|
||||
if (++retries < 3)
|
||||
goto retry;
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+57
@@ -0,0 +1,57 @@
|
||||
From 666e725f56fc4c9a6e4f0e00b5a180866863f724 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Mon, 16 Jul 2012 12:50:42 -0400
|
||||
Subject: [PATCH 084/109] timekeeping: Add missing update call in
|
||||
timekeeping_resume()
|
||||
|
||||
This is a backport of 3e997130bd2e8c6f5aaa49d6e3161d4d29b43ab0
|
||||
|
||||
The leap second rework unearthed another issue of inconsistent data.
|
||||
|
||||
On timekeeping_resume() the timekeeper data is updated, but nothing
|
||||
calls timekeeping_update(), so now the update code in the timer
|
||||
interrupt sees stale values.
|
||||
|
||||
This has been the case before those changes, but then the timer
|
||||
interrupt was using stale data as well so this went unnoticed for quite
|
||||
some time.
|
||||
|
||||
Add the missing update call, so all the data is consistent everywhere.
|
||||
|
||||
Reported-by: Andreas Schwab <schwab@linux-m68k.org>
|
||||
Reported-and-tested-by: "Rafael J. Wysocki" <rjw@sisk.pl>
|
||||
Reported-and-tested-by: Martin Steigerwald <Martin@lichtvoll.de>
|
||||
Cc: LKML <linux-kernel@vger.kernel.org>
|
||||
Cc: Linux PM list <linux-pm@vger.kernel.org>
|
||||
Cc: John Stultz <johnstul@us.ibm.com>
|
||||
Cc: Ingo Molnar <mingo@kernel.org>
|
||||
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>,
|
||||
Cc: Prarit Bhargava <prarit@redhat.com>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: John Stultz <johnstul@us.ibm.com>
|
||||
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
[John Stultz: Backported to 3.2]
|
||||
Cc: Prarit Bhargava <prarit@redhat.com>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Linux Kernel <linux-kernel@vger.kernel.org>
|
||||
Signed-off-by: John Stultz <johnstul@us.ibm.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
kernel/time/timekeeping.c | 1 +
|
||||
1 files changed, 1 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
|
||||
index 4938c5e..03e67d4 100644
|
||||
--- a/kernel/time/timekeeping.c
|
||||
+++ b/kernel/time/timekeeping.c
|
||||
@@ -699,6 +699,7 @@ static void timekeeping_resume(void)
|
||||
timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
|
||||
timekeeper.ntp_error = 0;
|
||||
timekeeping_suspended = 0;
|
||||
+ timekeeping_update(false);
|
||||
write_sequnlock_irqrestore(&xtime_lock, flags);
|
||||
|
||||
touch_softlockup_watchdog();
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+87
@@ -0,0 +1,87 @@
|
||||
From ec9436c865d11ebd0fd6909a9ef2a63e5536ff29 Mon Sep 17 00:00:00 2001
|
||||
From: Andreas Schwab <schwab@linux-m68k.org>
|
||||
Date: Fri, 9 Dec 2011 11:35:08 +0000
|
||||
Subject: [PATCH 085/109] powerpc: Fix wrong divisor in usecs_to_cputime
|
||||
|
||||
commit 9f5072d4f63f28d30d343573830ac6c85fc0deff upstream.
|
||||
|
||||
Commit d57af9b (taskstats: use real microsecond granularity for CPU times)
|
||||
renamed msecs_to_cputime to usecs_to_cputime, but failed to update all
|
||||
numbers on the way. This causes nonsensical cpu idle/iowait values to be
|
||||
displayed in /proc/stat (the only user of usecs_to_cputime so far).
|
||||
|
||||
This also renames __cputime_msec_factor to __cputime_usec_factor, adapting
|
||||
its value and using it directly in cputime_to_usecs instead of doing two
|
||||
multiplications.
|
||||
|
||||
Signed-off-by: Andreas Schwab <schwab@linux-m68k.org>
|
||||
Acked-by: Anton Blanchard <anton@samba.org>
|
||||
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
arch/powerpc/include/asm/cputime.h | 6 +++---
|
||||
arch/powerpc/kernel/time.c | 10 +++++-----
|
||||
2 files changed, 8 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
|
||||
index 98b7c4b..fa3f921 100644
|
||||
--- a/arch/powerpc/include/asm/cputime.h
|
||||
+++ b/arch/powerpc/include/asm/cputime.h
|
||||
@@ -126,11 +126,11 @@ static inline u64 cputime64_to_jiffies64(const cputime_t ct)
|
||||
/*
|
||||
* Convert cputime <-> microseconds
|
||||
*/
|
||||
-extern u64 __cputime_msec_factor;
|
||||
+extern u64 __cputime_usec_factor;
|
||||
|
||||
static inline unsigned long cputime_to_usecs(const cputime_t ct)
|
||||
{
|
||||
- return mulhdu(ct, __cputime_msec_factor) * USEC_PER_MSEC;
|
||||
+ return mulhdu(ct, __cputime_usec_factor);
|
||||
}
|
||||
|
||||
static inline cputime_t usecs_to_cputime(const unsigned long us)
|
||||
@@ -143,7 +143,7 @@ static inline cputime_t usecs_to_cputime(const unsigned long us)
|
||||
sec = us / 1000000;
|
||||
if (ct) {
|
||||
ct *= tb_ticks_per_sec;
|
||||
- do_div(ct, 1000);
|
||||
+ do_div(ct, 1000000);
|
||||
}
|
||||
if (sec)
|
||||
ct += (cputime_t) sec * tb_ticks_per_sec;
|
||||
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
|
||||
index 5db163c..ec8affe 100644
|
||||
--- a/arch/powerpc/kernel/time.c
|
||||
+++ b/arch/powerpc/kernel/time.c
|
||||
@@ -168,13 +168,13 @@ EXPORT_SYMBOL_GPL(ppc_tb_freq);
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
|
||||
/*
|
||||
* Factors for converting from cputime_t (timebase ticks) to
|
||||
- * jiffies, milliseconds, seconds, and clock_t (1/USER_HZ seconds).
|
||||
+ * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds).
|
||||
* These are all stored as 0.64 fixed-point binary fractions.
|
||||
*/
|
||||
u64 __cputime_jiffies_factor;
|
||||
EXPORT_SYMBOL(__cputime_jiffies_factor);
|
||||
-u64 __cputime_msec_factor;
|
||||
-EXPORT_SYMBOL(__cputime_msec_factor);
|
||||
+u64 __cputime_usec_factor;
|
||||
+EXPORT_SYMBOL(__cputime_usec_factor);
|
||||
u64 __cputime_sec_factor;
|
||||
EXPORT_SYMBOL(__cputime_sec_factor);
|
||||
u64 __cputime_clockt_factor;
|
||||
@@ -192,8 +192,8 @@ static void calc_cputime_factors(void)
|
||||
|
||||
div128_by_32(HZ, 0, tb_ticks_per_sec, &res);
|
||||
__cputime_jiffies_factor = res.result_low;
|
||||
- div128_by_32(1000, 0, tb_ticks_per_sec, &res);
|
||||
- __cputime_msec_factor = res.result_low;
|
||||
+ div128_by_32(1000000, 0, tb_ticks_per_sec, &res);
|
||||
+ __cputime_usec_factor = res.result_low;
|
||||
div128_by_32(1, 0, tb_ticks_per_sec, &res);
|
||||
__cputime_sec_factor = res.result_low;
|
||||
div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res);
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+60
@@ -0,0 +1,60 @@
|
||||
From 164965e103d2cfc11c59b563aa95ce2e8c372b65 Mon Sep 17 00:00:00 2001
|
||||
From: Nadav Har'El <nyh@math.technion.ac.il>
|
||||
Date: Mon, 27 Feb 2012 15:07:29 +0200
|
||||
Subject: [PATCH 086/109] vhost: don't forget to schedule()
|
||||
|
||||
commit d550dda192c1bd039afb774b99485e88b70d7cb8 upstream.
|
||||
|
||||
This is a tiny, but important, patch to vhost.
|
||||
|
||||
Vhost's worker thread only called schedule() when it had no work to do, and
|
||||
it wanted to go to sleep. But if there's always work to do, e.g., the guest
|
||||
is running a network-intensive program like netperf with small message sizes,
|
||||
schedule() was *never* called. This had several negative implications (on
|
||||
non-preemptive kernels):
|
||||
|
||||
1. Passing time was not properly accounted to the "vhost" process (ps and
|
||||
top would wrongly show it using zero CPU time).
|
||||
|
||||
2. Sometimes error messages about RCU timeouts would be printed, if the
|
||||
core running the vhost thread didn't schedule() for a very long time.
|
||||
|
||||
3. Worst of all, a vhost thread would "hog" the core. If several vhost
|
||||
threads need to share the same core, typically one would get most of the
|
||||
CPU time (and its associated guest most of the performance), while the
|
||||
others hardly get any work done.
|
||||
|
||||
The trivial solution is to add
|
||||
|
||||
if (need_resched())
|
||||
schedule();
|
||||
|
||||
After doing every piece of work. This will not do the heavy schedule() all
|
||||
the time, just when the timer interrupt decided a reschedule is warranted
|
||||
(so need_resched returns true).
|
||||
|
||||
Thanks to Abel Gordon for this patch.
|
||||
|
||||
Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/vhost/vhost.c | 2 ++
|
||||
1 files changed, 2 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
|
||||
index c14c42b..ae66278 100644
|
||||
--- a/drivers/vhost/vhost.c
|
||||
+++ b/drivers/vhost/vhost.c
|
||||
@@ -222,6 +222,8 @@ static int vhost_worker(void *data)
|
||||
if (work) {
|
||||
__set_current_state(TASK_RUNNING);
|
||||
work->fn(work);
|
||||
+ if (need_resched())
|
||||
+ schedule();
|
||||
} else
|
||||
schedule();
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+43
@@ -0,0 +1,43 @@
|
||||
From 87b62a139d4385f3726820674127eaee29fc7cff Mon Sep 17 00:00:00 2001
|
||||
From: Devendra Naga <devendra.aaru@gmail.com>
|
||||
Date: Thu, 31 May 2012 01:51:20 +0000
|
||||
Subject: [PATCH 087/109] r8169: call netif_napi_del at errpaths and at driver
|
||||
unload
|
||||
|
||||
commit ad1be8d345416a794dea39761a374032aa471a76 upstream.
|
||||
|
||||
when register_netdev fails, the init'ed NAPIs by netif_napi_add must be
|
||||
deleted with netif_napi_del, and also when driver unloads, it should
|
||||
delete the NAPI before unregistering netdevice using unregister_netdev.
|
||||
|
||||
Signed-off-by: Devendra Naga <devendra.aaru@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/net/ethernet/realtek/r8169.c | 3 +++
|
||||
1 files changed, 3 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
|
||||
index cc2565c..9e61d6b 100644
|
||||
--- a/drivers/net/ethernet/realtek/r8169.c
|
||||
+++ b/drivers/net/ethernet/realtek/r8169.c
|
||||
@@ -4185,6 +4185,7 @@ out:
|
||||
return rc;
|
||||
|
||||
err_out_msi_4:
|
||||
+ netif_napi_del(&tp->napi);
|
||||
rtl_disable_msi(pdev, tp);
|
||||
iounmap(ioaddr);
|
||||
err_out_free_res_3:
|
||||
@@ -4210,6 +4211,8 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
|
||||
|
||||
cancel_delayed_work_sync(&tp->task);
|
||||
|
||||
+ netif_napi_del(&tp->napi);
|
||||
+
|
||||
unregister_netdev(dev);
|
||||
|
||||
rtl_release_firmware(tp);
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+112
@@ -0,0 +1,112 @@
|
||||
From 4ffefa650ebbe2ef8bc2babff2e5686c33a2dab3 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Dumazet <edumazet@google.com>
|
||||
Date: Tue, 12 Jun 2012 23:50:04 +0000
|
||||
Subject: [PATCH 088/109] bnx2x: fix checksum validation
|
||||
|
||||
commit d6cb3e41386f20fb0777d0b59a2def82c65d37f7 upstream.
|
||||
|
||||
bnx2x driver incorrectly sets ip_summed to CHECKSUM_UNNECESSARY on
|
||||
encapsulated segments. TCP stack happily accepts frames with bad
|
||||
checksums, if they are inside a GRE or IPIP encapsulation.
|
||||
|
||||
Our understanding is that if no IP or L4 csum validation was done by the
|
||||
hardware, we should leave ip_summed as is (CHECKSUM_NONE), since
|
||||
hardware doesn't provide CHECKSUM_COMPLETE support in its cqe.
|
||||
|
||||
Then, if IP/L4 checksumming was done by the hardware, set
|
||||
CHECKSUM_UNNECESSARY if no error was flagged.
|
||||
|
||||
Patch based on findings and analysis from Robert Evans
|
||||
|
||||
Signed-off-by: Eric Dumazet <edumazet@google.com>
|
||||
Cc: Eilon Greenstein <eilong@broadcom.com>
|
||||
Cc: Yaniv Rosner <yanivr@broadcom.com>
|
||||
Cc: Merav Sicron <meravs@broadcom.com>
|
||||
Cc: Tom Herbert <therbert@google.com>
|
||||
Cc: Robert Evans <evansr@google.com>
|
||||
Cc: Willem de Bruijn <willemb@google.com>
|
||||
Acked-by: Eilon Greenstein <eilong@broadcom.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
[bwh: Backported to 3.2: adjust context, indentation]
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 15 ------------
|
||||
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 28 ++++++++++++++++++-----
|
||||
2 files changed, 22 insertions(+), 21 deletions(-)
|
||||
|
||||
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
|
||||
index aec7212..8dda46a 100644
|
||||
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
|
||||
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
|
||||
@@ -723,21 +723,6 @@ struct bnx2x_fastpath {
|
||||
|
||||
#define ETH_RX_ERROR_FALGS ETH_FAST_PATH_RX_CQE_PHY_DECODE_ERR_FLG
|
||||
|
||||
-#define BNX2X_IP_CSUM_ERR(cqe) \
|
||||
- (!((cqe)->fast_path_cqe.status_flags & \
|
||||
- ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG) && \
|
||||
- ((cqe)->fast_path_cqe.type_error_flags & \
|
||||
- ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG))
|
||||
-
|
||||
-#define BNX2X_L4_CSUM_ERR(cqe) \
|
||||
- (!((cqe)->fast_path_cqe.status_flags & \
|
||||
- ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG) && \
|
||||
- ((cqe)->fast_path_cqe.type_error_flags & \
|
||||
- ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG))
|
||||
-
|
||||
-#define BNX2X_RX_CSUM_OK(cqe) \
|
||||
- (!(BNX2X_L4_CSUM_ERR(cqe) || BNX2X_IP_CSUM_ERR(cqe)))
|
||||
-
|
||||
#define BNX2X_PRS_FLAG_OVERETH_IPV4(flags) \
|
||||
(((le16_to_cpu(flags) & \
|
||||
PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) >> \
|
||||
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
|
||||
index 580b44e..27d6d6c 100644
|
||||
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
|
||||
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
|
||||
@@ -551,6 +551,26 @@ static inline void bnx2x_set_skb_rxhash(struct bnx2x *bp, union eth_rx_cqe *cqe,
|
||||
le32_to_cpu(cqe->fast_path_cqe.rss_hash_result);
|
||||
}
|
||||
|
||||
+static void bnx2x_csum_validate(struct sk_buff *skb, union eth_rx_cqe *cqe,
|
||||
+ struct bnx2x_fastpath *fp)
|
||||
+{
|
||||
+ /* Do nothing if no IP/L4 csum validation was done */
|
||||
+
|
||||
+ if (cqe->fast_path_cqe.status_flags &
|
||||
+ (ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG |
|
||||
+ ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG))
|
||||
+ return;
|
||||
+
|
||||
+ /* If both IP/L4 validation were done, check if an error was found. */
|
||||
+
|
||||
+ if (cqe->fast_path_cqe.type_error_flags &
|
||||
+ (ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG |
|
||||
+ ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG))
|
||||
+ fp->eth_q_stats.hw_csum_err++;
|
||||
+ else
|
||||
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
|
||||
+}
|
||||
+
|
||||
int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
|
||||
{
|
||||
struct bnx2x *bp = fp->bp;
|
||||
@@ -746,13 +766,9 @@ reuse_rx:
|
||||
|
||||
skb_checksum_none_assert(skb);
|
||||
|
||||
- if (bp->dev->features & NETIF_F_RXCSUM) {
|
||||
+ if (bp->dev->features & NETIF_F_RXCSUM)
|
||||
+ bnx2x_csum_validate(skb, cqe, fp);
|
||||
|
||||
- if (likely(BNX2X_RX_CSUM_OK(cqe)))
|
||||
- skb->ip_summed = CHECKSUM_UNNECESSARY;
|
||||
- else
|
||||
- fp->eth_q_stats.hw_csum_err++;
|
||||
- }
|
||||
}
|
||||
|
||||
skb_record_rx_queue(skb, fp->index);
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+75
@@ -0,0 +1,75 @@
|
||||
From 9a59f534e5f1d432bf63f0ed6cb184b1ce988063 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Dumazet <edumazet@google.com>
|
||||
Date: Wed, 13 Jun 2012 09:45:16 +0000
|
||||
Subject: [PATCH 089/109] bnx2x: fix panic when TX ring is full
|
||||
|
||||
commit bc14786a100cc6a81cd060e8031ec481241b418c upstream.
|
||||
|
||||
There is a off by one error in the minimal number of BD in
|
||||
bnx2x_start_xmit() and bnx2x_tx_int() before stopping/resuming tx queue.
|
||||
|
||||
A full size GSO packet, with data included in skb->head really needs
|
||||
(MAX_SKB_FRAGS + 4) BDs, because of bnx2x_tx_split()
|
||||
|
||||
This error triggers if BQL is disabled and heavy TCP transmit traffic
|
||||
occurs.
|
||||
|
||||
bnx2x_tx_split() definitely can be called, remove a wrong comment.
|
||||
|
||||
Reported-by: Tomas Hruby <thruby@google.com>
|
||||
Signed-off-by: Eric Dumazet <edumazet@google.com>
|
||||
Cc: Eilon Greenstein <eilong@broadcom.com>
|
||||
Cc: Yaniv Rosner <yanivr@broadcom.com>
|
||||
Cc: Merav Sicron <meravs@broadcom.com>
|
||||
Cc: Tom Herbert <therbert@google.com>
|
||||
Cc: Robert Evans <evansr@google.com>
|
||||
Cc: Willem de Bruijn <willemb@google.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 8 +++-----
|
||||
1 files changed, 3 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
|
||||
index 27d6d6c..2c1a5c0 100644
|
||||
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
|
||||
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
|
||||
@@ -220,7 +220,7 @@ int bnx2x_tx_int(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata)
|
||||
|
||||
if ((netif_tx_queue_stopped(txq)) &&
|
||||
(bp->state == BNX2X_STATE_OPEN) &&
|
||||
- (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 3))
|
||||
+ (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 4))
|
||||
netif_tx_wake_queue(txq);
|
||||
|
||||
__netif_tx_unlock(txq);
|
||||
@@ -2254,8 +2254,6 @@ int bnx2x_poll(struct napi_struct *napi, int budget)
|
||||
/* we split the first BD into headers and data BDs
|
||||
* to ease the pain of our fellow microcode engineers
|
||||
* we use one mapping for both BDs
|
||||
- * So far this has only been observed to happen
|
||||
- * in Other Operating Systems(TM)
|
||||
*/
|
||||
static noinline u16 bnx2x_tx_split(struct bnx2x *bp,
|
||||
struct bnx2x_fp_txdata *txdata,
|
||||
@@ -2906,7 +2904,7 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
|
||||
|
||||
txdata->tx_bd_prod += nbd;
|
||||
|
||||
- if (unlikely(bnx2x_tx_avail(bp, txdata) < MAX_SKB_FRAGS + 3)) {
|
||||
+ if (unlikely(bnx2x_tx_avail(bp, txdata) < MAX_SKB_FRAGS + 4)) {
|
||||
netif_tx_stop_queue(txq);
|
||||
|
||||
/* paired memory barrier is in bnx2x_tx_int(), we have to keep
|
||||
@@ -2915,7 +2913,7 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
|
||||
smp_mb();
|
||||
|
||||
fp->eth_q_stats.driver_xoff++;
|
||||
- if (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 3)
|
||||
+ if (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 4)
|
||||
netif_tx_wake_queue(txq);
|
||||
}
|
||||
txdata->tx_pkt++;
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
@@ -0,0 +1,135 @@
|
||||
From 401354ebe4d79d6edf536ad7b69e59afeec81308 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Dumazet <edumazet@google.com>
|
||||
Date: Thu, 14 Jun 2012 06:42:44 +0000
|
||||
Subject: [PATCH 090/109] net: remove skb_orphan_try()
|
||||
|
||||
commit 62b1a8ab9b3660bb820d8dfe23148ed6cda38574 upstream.
|
||||
|
||||
Orphaning skb in dev_hard_start_xmit() makes bonding behavior
|
||||
unfriendly for applications sending big UDP bursts : Once packets
|
||||
pass the bonding device and come to real device, they might hit a full
|
||||
qdisc and be dropped. Without orphaning, the sender is automatically
|
||||
throttled because sk->sk_wmemalloc reaches sk->sk_sndbuf (assuming
|
||||
sk_sndbuf is not too big)
|
||||
|
||||
We could try to defer the orphaning adding another test in
|
||||
dev_hard_start_xmit(), but all this seems of little gain,
|
||||
now that BQL tends to make packets more likely to be parked
|
||||
in Qdisc queues instead of NIC TX ring, in cases where performance
|
||||
matters.
|
||||
|
||||
Reverts commits :
|
||||
fc6055a5ba31 net: Introduce skb_orphan_try()
|
||||
87fd308cfc6b net: skb_tx_hash() fix relative to skb_orphan_try()
|
||||
and removes SKBTX_DRV_NEEDS_SK_REF flag
|
||||
|
||||
Reported-and-bisected-by: Jean-Michel Hautbois <jhautbois@gmail.com>
|
||||
Signed-off-by: Eric Dumazet <edumazet@google.com>
|
||||
Tested-by: Oliver Hartkopp <socketcan@hartkopp.net>
|
||||
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
[bwh: Backported to 3.2:
|
||||
- Adjust context
|
||||
- SKBTX_WIFI_STATUS is not defined]
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
include/linux/skbuff.h | 5 +----
|
||||
net/can/raw.c | 3 ---
|
||||
net/core/dev.c | 23 +----------------------
|
||||
net/iucv/af_iucv.c | 1 -
|
||||
4 files changed, 2 insertions(+), 30 deletions(-)
|
||||
|
||||
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
|
||||
index bdb4590..53dc7e7 100644
|
||||
--- a/include/linux/skbuff.h
|
||||
+++ b/include/linux/skbuff.h
|
||||
@@ -213,11 +213,8 @@ enum {
|
||||
/* device driver is going to provide hardware time stamp */
|
||||
SKBTX_IN_PROGRESS = 1 << 2,
|
||||
|
||||
- /* ensure the originating sk reference is available on driver level */
|
||||
- SKBTX_DRV_NEEDS_SK_REF = 1 << 3,
|
||||
-
|
||||
/* device driver supports TX zero-copy buffers */
|
||||
- SKBTX_DEV_ZEROCOPY = 1 << 4,
|
||||
+ SKBTX_DEV_ZEROCOPY = 1 << 3,
|
||||
};
|
||||
|
||||
/*
|
||||
diff --git a/net/can/raw.c b/net/can/raw.c
|
||||
index cde1b4a..46cca3a 100644
|
||||
--- a/net/can/raw.c
|
||||
+++ b/net/can/raw.c
|
||||
@@ -681,9 +681,6 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
|
||||
if (err < 0)
|
||||
goto free_skb;
|
||||
|
||||
- /* to be able to check the received tx sock reference in raw_rcv() */
|
||||
- skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF;
|
||||
-
|
||||
skb->dev = dev;
|
||||
skb->sk = sk;
|
||||
|
||||
diff --git a/net/core/dev.c b/net/core/dev.c
|
||||
index 1cbddc9..5738654 100644
|
||||
--- a/net/core/dev.c
|
||||
+++ b/net/core/dev.c
|
||||
@@ -2079,25 +2079,6 @@ static int dev_gso_segment(struct sk_buff *skb, int features)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-/*
|
||||
- * Try to orphan skb early, right before transmission by the device.
|
||||
- * We cannot orphan skb if tx timestamp is requested or the sk-reference
|
||||
- * is needed on driver level for other reasons, e.g. see net/can/raw.c
|
||||
- */
|
||||
-static inline void skb_orphan_try(struct sk_buff *skb)
|
||||
-{
|
||||
- struct sock *sk = skb->sk;
|
||||
-
|
||||
- if (sk && !skb_shinfo(skb)->tx_flags) {
|
||||
- /* skb_tx_hash() wont be able to get sk.
|
||||
- * We copy sk_hash into skb->rxhash
|
||||
- */
|
||||
- if (!skb->rxhash)
|
||||
- skb->rxhash = sk->sk_hash;
|
||||
- skb_orphan(skb);
|
||||
- }
|
||||
-}
|
||||
-
|
||||
static bool can_checksum_protocol(unsigned long features, __be16 protocol)
|
||||
{
|
||||
return ((features & NETIF_F_GEN_CSUM) ||
|
||||
@@ -2182,8 +2163,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
|
||||
if (!list_empty(&ptype_all))
|
||||
dev_queue_xmit_nit(skb, dev);
|
||||
|
||||
- skb_orphan_try(skb);
|
||||
-
|
||||
features = netif_skb_features(skb);
|
||||
|
||||
if (vlan_tx_tag_present(skb) &&
|
||||
@@ -2293,7 +2272,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
|
||||
if (skb->sk && skb->sk->sk_hash)
|
||||
hash = skb->sk->sk_hash;
|
||||
else
|
||||
- hash = (__force u16) skb->protocol ^ skb->rxhash;
|
||||
+ hash = (__force u16) skb->protocol;
|
||||
hash = jhash_1word(hash, hashrnd);
|
||||
|
||||
return (u16) (((u64) hash * qcount) >> 32) + qoffset;
|
||||
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
|
||||
index 274d150..cf98d62 100644
|
||||
--- a/net/iucv/af_iucv.c
|
||||
+++ b/net/iucv/af_iucv.c
|
||||
@@ -380,7 +380,6 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
|
||||
skb_trim(skb, skb->dev->mtu);
|
||||
}
|
||||
skb->protocol = ETH_P_AF_IUCV;
|
||||
- skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF;
|
||||
nskb = skb_clone(skb, GFP_ATOMIC);
|
||||
if (!nskb)
|
||||
return -ENOMEM;
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+78
@@ -0,0 +1,78 @@
|
||||
From 16fe1810332abe3998fa2e0760af7f8ca5f701d6 Mon Sep 17 00:00:00 2001
|
||||
From: Feng Tang <feng.tang@intel.com>
|
||||
Date: Mon, 4 Jun 2012 15:00:04 +0800
|
||||
Subject: [PATCH 091/109] ACPI: Make acpi_skip_timer_override cover all
|
||||
source_irq==0 cases
|
||||
|
||||
commit ae10ccdc3093486f8c2369d227583f9d79f628e5 upstream.
|
||||
|
||||
Currently when acpi_skip_timer_override is set, it only cover the
|
||||
(source_irq == 0 && global_irq == 2) cases. While there is also
|
||||
platform which need use this option and its global_irq is not 2.
|
||||
This patch will extend acpi_skip_timer_override to cover all
|
||||
timer overriding cases as long as the source irq is 0.
|
||||
|
||||
This is the first part of a fix to kernel bug bugzilla 40002:
|
||||
"IRQ 0 assigned to VGA"
|
||||
https://bugzilla.kernel.org/show_bug.cgi?id=40002
|
||||
|
||||
Reported-and-tested-by: Szymon Kowalczyk <fazerxlo@o2.pl>
|
||||
Signed-off-by: Feng Tang <feng.tang@intel.com>
|
||||
Signed-off-by: Len Brown <len.brown@intel.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
arch/x86/kernel/acpi/boot.c | 14 ++++++++------
|
||||
1 files changed, 8 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
|
||||
index 4558f0d..a94dc95 100644
|
||||
--- a/arch/x86/kernel/acpi/boot.c
|
||||
+++ b/arch/x86/kernel/acpi/boot.c
|
||||
@@ -416,12 +416,14 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
|
||||
return 0;
|
||||
}
|
||||
|
||||
- if (intsrc->source_irq == 0 && intsrc->global_irq == 2) {
|
||||
+ if (intsrc->source_irq == 0) {
|
||||
if (acpi_skip_timer_override) {
|
||||
- printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
|
||||
+ printk(PREFIX "BIOS IRQ0 override ignored.\n");
|
||||
return 0;
|
||||
}
|
||||
- if (acpi_fix_pin2_polarity && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) {
|
||||
+
|
||||
+ if ((intsrc->global_irq == 2) && acpi_fix_pin2_polarity
|
||||
+ && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) {
|
||||
intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK;
|
||||
printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n");
|
||||
}
|
||||
@@ -1327,7 +1329,7 @@ static int __init dmi_disable_acpi(const struct dmi_system_id *d)
|
||||
}
|
||||
|
||||
/*
|
||||
- * Force ignoring BIOS IRQ0 pin2 override
|
||||
+ * Force ignoring BIOS IRQ0 override
|
||||
*/
|
||||
static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
|
||||
{
|
||||
@@ -1337,7 +1339,7 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
|
||||
*/
|
||||
if (!acpi_skip_timer_override) {
|
||||
WARN(1, KERN_ERR "ati_ixp4x0 quirk not complete.\n");
|
||||
- pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n",
|
||||
+ pr_notice("%s detected: Ignoring BIOS IRQ0 override\n",
|
||||
d->ident);
|
||||
acpi_skip_timer_override = 1;
|
||||
}
|
||||
@@ -1431,7 +1433,7 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = {
|
||||
* is enabled. This input is incorrectly designated the
|
||||
* ISA IRQ 0 via an interrupt source override even though
|
||||
* it is wired to the output of the master 8259A and INTIN0
|
||||
- * is not connected at all. Force ignoring BIOS IRQ0 pin2
|
||||
+ * is not connected at all. Force ignoring BIOS IRQ0
|
||||
* override in that cases.
|
||||
*/
|
||||
{
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+42
@@ -0,0 +1,42 @@
|
||||
From b9c36e346bdb1c7bd2edd7489561f94155195e6e Mon Sep 17 00:00:00 2001
|
||||
From: Feng Tang <feng.tang@intel.com>
|
||||
Date: Mon, 4 Jun 2012 15:00:05 +0800
|
||||
Subject: [PATCH 092/109] ACPI: Remove one board specific WARN when ignoring
|
||||
timer overriding
|
||||
|
||||
commit 5752cdb805ff89942d99d12118e2844e7db34df8 upstream.
|
||||
|
||||
commit 7f68b4c2e158019c2ec494b5cfbd9c83b4e5b253 upstream.
|
||||
|
||||
Current WARN msg is only for the ati_ixp4x0 board, while this function
|
||||
is used by mulitple platforms. So this one board specific warning
|
||||
is not appropriate any more.
|
||||
|
||||
Signed-off-by: Feng Tang <feng.tang@intel.com>
|
||||
Signed-off-by: Len Brown <len.brown@intel.com>
|
||||
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
arch/x86/kernel/acpi/boot.c | 5 -----
|
||||
1 files changed, 0 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
|
||||
index a94dc95..882960e 100644
|
||||
--- a/arch/x86/kernel/acpi/boot.c
|
||||
+++ b/arch/x86/kernel/acpi/boot.c
|
||||
@@ -1333,12 +1333,7 @@ static int __init dmi_disable_acpi(const struct dmi_system_id *d)
|
||||
*/
|
||||
static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
|
||||
{
|
||||
- /*
|
||||
- * The ati_ixp4x0_rev() early PCI quirk should have set
|
||||
- * the acpi_skip_timer_override flag already:
|
||||
- */
|
||||
if (!acpi_skip_timer_override) {
|
||||
- WARN(1, KERN_ERR "ati_ixp4x0 quirk not complete.\n");
|
||||
pr_notice("%s detected: Ignoring BIOS IRQ0 override\n",
|
||||
d->ident);
|
||||
acpi_skip_timer_override = 1;
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+50
@@ -0,0 +1,50 @@
|
||||
From 0430a8402ee6125c909ae3f4c3a89696e9a24077 Mon Sep 17 00:00:00 2001
|
||||
From: Feng Tang <feng.tang@intel.com>
|
||||
Date: Mon, 4 Jun 2012 15:00:06 +0800
|
||||
Subject: [PATCH 093/109] ACPI: Add a quirk for "AMILO PRO V2030" to ignore
|
||||
the timer overriding
|
||||
|
||||
commit b939c2acf1dc42b08407ef5174f2e8d6f43dd5ea upstream.
|
||||
|
||||
commit f6b54f083cc66cf9b11d2120d8df3c2ad4e0836d upstream.
|
||||
|
||||
This is the 2nd part of fix for kernel bugzilla 40002:
|
||||
"IRQ 0 assigned to VGA"
|
||||
https://bugzilla.kernel.org/show_bug.cgi?id=40002
|
||||
|
||||
The root cause is the buggy FW, whose ACPI tables assign the GSI 16
|
||||
to 2 irqs 0 and 16(VGA), and the VGA is the right owner of GSI 16.
|
||||
So add a quirk to ignore the irq0 overriding GSI 16 for the
|
||||
FUJITSU SIEMENS AMILO PRO V2030 platform will solve this issue.
|
||||
|
||||
Reported-and-tested-by: Szymon Kowalczyk <fazerxlo@o2.pl>
|
||||
Signed-off-by: Feng Tang <feng.tang@intel.com>
|
||||
Signed-off-by: Len Brown <len.brown@intel.com>
|
||||
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
arch/x86/kernel/acpi/boot.c | 8 ++++++++
|
||||
1 files changed, 8 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
|
||||
index 882960e..479d03c 100644
|
||||
--- a/arch/x86/kernel/acpi/boot.c
|
||||
+++ b/arch/x86/kernel/acpi/boot.c
|
||||
@@ -1463,6 +1463,14 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = {
|
||||
DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"),
|
||||
},
|
||||
},
|
||||
+ {
|
||||
+ .callback = dmi_ignore_irq0_timer_override,
|
||||
+ .ident = "FUJITSU SIEMENS",
|
||||
+ .matches = {
|
||||
+ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
|
||||
+ DMI_MATCH(DMI_PRODUCT_NAME, "AMILO PRO V2030"),
|
||||
+ },
|
||||
+ },
|
||||
{}
|
||||
};
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+43
@@ -0,0 +1,43 @@
|
||||
From 66b7502a0d37876e547c5440aa34bee18e3b0f1e Mon Sep 17 00:00:00 2001
|
||||
From: Zhang Rui <rui.zhang@intel.com>
|
||||
Date: Mon, 20 Feb 2012 14:20:06 +0800
|
||||
Subject: [PATCH 094/109] ACPI, x86: fix Dell M6600 ACPI reboot regression via
|
||||
DMI
|
||||
|
||||
commit 76eb9a30db4bc8fd172f9155247264b5f2686d7b upstream.
|
||||
|
||||
Dell Precision M6600 is known to require PCI reboot, so add it to
|
||||
the reboot blacklist in pci_reboot_dmi_table[].
|
||||
|
||||
https://bugzilla.kernel.org/show_bug.cgi?id=42749
|
||||
|
||||
cc: x86@kernel.org
|
||||
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
|
||||
Signed-off-by: Len Brown <len.brown@intel.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
arch/x86/kernel/reboot.c | 8 ++++++++
|
||||
1 files changed, 8 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
|
||||
index 37a458b..e61f79c 100644
|
||||
--- a/arch/x86/kernel/reboot.c
|
||||
+++ b/arch/x86/kernel/reboot.c
|
||||
@@ -460,6 +460,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
|
||||
DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"),
|
||||
},
|
||||
},
|
||||
+ { /* Handle problems with rebooting on the Precision M6600. */
|
||||
+ .callback = set_pci_reboot,
|
||||
+ .ident = "Dell OptiPlex 990",
|
||||
+ .matches = {
|
||||
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
|
||||
+ DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"),
|
||||
+ },
|
||||
+ },
|
||||
{ }
|
||||
};
|
||||
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
From b563da2fc80658815355acc804d1b8c21d1a88f9 Mon Sep 17 00:00:00 2001
|
||||
From: Pavel Vasilyev <pavel@pavlinux.ru>
|
||||
Date: Tue, 5 Jun 2012 00:02:05 -0400
|
||||
Subject: [PATCH 095/109] ACPI sysfs.c strlen fix
|
||||
|
||||
commit 9f132652d94c96476b0b0a8caf0c10e96ab10fa8 upstream.
|
||||
|
||||
Current code is ignoring the last character of "enable" and "disable"
|
||||
in comparisons.
|
||||
|
||||
https://bugzilla.kernel.org/show_bug.cgi?id=33732
|
||||
|
||||
Signed-off-by: Len Brown <len.brown@intel.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/acpi/sysfs.c | 4 ++--
|
||||
1 files changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
|
||||
index 9f66181..240a244 100644
|
||||
--- a/drivers/acpi/sysfs.c
|
||||
+++ b/drivers/acpi/sysfs.c
|
||||
@@ -173,7 +173,7 @@ static int param_set_trace_state(const char *val, struct kernel_param *kp)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
- if (!strncmp(val, "enable", strlen("enable") - 1)) {
|
||||
+ if (!strncmp(val, "enable", strlen("enable"))) {
|
||||
result = acpi_debug_trace(trace_method_name, trace_debug_level,
|
||||
trace_debug_layer, 0);
|
||||
if (result)
|
||||
@@ -181,7 +181,7 @@ static int param_set_trace_state(const char *val, struct kernel_param *kp)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
- if (!strncmp(val, "disable", strlen("disable") - 1)) {
|
||||
+ if (!strncmp(val, "disable", strlen("disable"))) {
|
||||
int name = 0;
|
||||
result = acpi_debug_trace((char *)&name, trace_debug_level,
|
||||
trace_debug_layer, 0);
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+98
@@ -0,0 +1,98 @@
|
||||
From 5daf178c74f17e523291b0c4eabbf3b3f3740b75 Mon Sep 17 00:00:00 2001
|
||||
From: Tyler Hicks <tyhicks@canonical.com>
|
||||
Date: Mon, 11 Jun 2012 09:24:11 -0700
|
||||
Subject: [PATCH 096/109] eCryptfs: Gracefully refuse miscdev file ops on
|
||||
inherited/passed files
|
||||
|
||||
commit 8dc6780587c99286c0d3de747a2946a76989414a upstream.
|
||||
|
||||
File operations on /dev/ecryptfs would BUG() when the operations were
|
||||
performed by processes other than the process that originally opened the
|
||||
file. This could happen with open files inherited after fork() or file
|
||||
descriptors passed through IPC mechanisms. Rather than calling BUG(), an
|
||||
error code can be safely returned in most situations.
|
||||
|
||||
In ecryptfs_miscdev_release(), eCryptfs still needs to handle the
|
||||
release even if the last file reference is being held by a process that
|
||||
didn't originally open the file. ecryptfs_find_daemon_by_euid() will not
|
||||
be successful, so a pointer to the daemon is stored in the file's
|
||||
private_data. The private_data pointer is initialized when the miscdev
|
||||
file is opened and only used when the file is released.
|
||||
|
||||
https://launchpad.net/bugs/994247
|
||||
|
||||
Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
|
||||
Reported-by: Sasha Levin <levinsasha928@gmail.com>
|
||||
Tested-by: Sasha Levin <levinsasha928@gmail.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
fs/ecryptfs/miscdev.c | 23 ++++++++++++++++-------
|
||||
1 files changed, 16 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
|
||||
index 0dc5a3d..a050e4b 100644
|
||||
--- a/fs/ecryptfs/miscdev.c
|
||||
+++ b/fs/ecryptfs/miscdev.c
|
||||
@@ -49,7 +49,10 @@ ecryptfs_miscdev_poll(struct file *file, poll_table *pt)
|
||||
mutex_lock(&ecryptfs_daemon_hash_mux);
|
||||
/* TODO: Just use file->private_data? */
|
||||
rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
|
||||
- BUG_ON(rc || !daemon);
|
||||
+ if (rc || !daemon) {
|
||||
+ mutex_unlock(&ecryptfs_daemon_hash_mux);
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
mutex_lock(&daemon->mux);
|
||||
mutex_unlock(&ecryptfs_daemon_hash_mux);
|
||||
if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
|
||||
@@ -122,6 +125,7 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file)
|
||||
goto out_unlock_daemon;
|
||||
}
|
||||
daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN;
|
||||
+ file->private_data = daemon;
|
||||
atomic_inc(&ecryptfs_num_miscdev_opens);
|
||||
out_unlock_daemon:
|
||||
mutex_unlock(&daemon->mux);
|
||||
@@ -152,9 +156,9 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file)
|
||||
|
||||
mutex_lock(&ecryptfs_daemon_hash_mux);
|
||||
rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
|
||||
- BUG_ON(rc || !daemon);
|
||||
+ if (rc || !daemon)
|
||||
+ daemon = file->private_data;
|
||||
mutex_lock(&daemon->mux);
|
||||
- BUG_ON(daemon->pid != task_pid(current));
|
||||
BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN));
|
||||
daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN;
|
||||
atomic_dec(&ecryptfs_num_miscdev_opens);
|
||||
@@ -246,8 +250,16 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count,
|
||||
mutex_lock(&ecryptfs_daemon_hash_mux);
|
||||
/* TODO: Just use file->private_data? */
|
||||
rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
|
||||
- BUG_ON(rc || !daemon);
|
||||
+ if (rc || !daemon) {
|
||||
+ mutex_unlock(&ecryptfs_daemon_hash_mux);
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
mutex_lock(&daemon->mux);
|
||||
+ if (task_pid(current) != daemon->pid) {
|
||||
+ mutex_unlock(&daemon->mux);
|
||||
+ mutex_unlock(&ecryptfs_daemon_hash_mux);
|
||||
+ return -EPERM;
|
||||
+ }
|
||||
if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
|
||||
rc = 0;
|
||||
mutex_unlock(&ecryptfs_daemon_hash_mux);
|
||||
@@ -284,9 +296,6 @@ check_list:
|
||||
* message from the queue; try again */
|
||||
goto check_list;
|
||||
}
|
||||
- BUG_ON(euid != daemon->euid);
|
||||
- BUG_ON(current_user_ns() != daemon->user_ns);
|
||||
- BUG_ON(task_pid(current) != daemon->pid);
|
||||
msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue,
|
||||
struct ecryptfs_msg_ctx, daemon_out_list);
|
||||
BUG_ON(!msg_ctx);
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+105
@@ -0,0 +1,105 @@
|
||||
From 3b0dfe936fb38efde98e2650ff18587c3285eb2a Mon Sep 17 00:00:00 2001
|
||||
From: Tyler Hicks <tyhicks@canonical.com>
|
||||
Date: Mon, 11 Jun 2012 10:21:34 -0700
|
||||
Subject: [PATCH 097/109] eCryptfs: Fix lockdep warning in miscdev operations
|
||||
|
||||
commit 60d65f1f07a7d81d3eb3b91fc13fca80f2fdbb12 upstream.
|
||||
|
||||
Don't grab the daemon mutex while holding the message context mutex.
|
||||
Addresses this lockdep warning:
|
||||
|
||||
ecryptfsd/2141 is trying to acquire lock:
|
||||
(&ecryptfs_msg_ctx_arr[i].mux){+.+.+.}, at: [<ffffffffa029c213>] ecryptfs_miscdev_read+0x143/0x470 [ecryptfs]
|
||||
|
||||
but task is already holding lock:
|
||||
(&(*daemon)->mux){+.+...}, at: [<ffffffffa029c2ec>] ecryptfs_miscdev_read+0x21c/0x470 [ecryptfs]
|
||||
|
||||
which lock already depends on the new lock.
|
||||
|
||||
the existing dependency chain (in reverse order) is:
|
||||
|
||||
-> #1 (&(*daemon)->mux){+.+...}:
|
||||
[<ffffffff810a3b8d>] lock_acquire+0x9d/0x220
|
||||
[<ffffffff8151c6da>] __mutex_lock_common+0x5a/0x4b0
|
||||
[<ffffffff8151cc64>] mutex_lock_nested+0x44/0x50
|
||||
[<ffffffffa029c5d7>] ecryptfs_send_miscdev+0x97/0x120 [ecryptfs]
|
||||
[<ffffffffa029b744>] ecryptfs_send_message+0x134/0x1e0 [ecryptfs]
|
||||
[<ffffffffa029a24e>] ecryptfs_generate_key_packet_set+0x2fe/0xa80 [ecryptfs]
|
||||
[<ffffffffa02960f8>] ecryptfs_write_metadata+0x108/0x250 [ecryptfs]
|
||||
[<ffffffffa0290f80>] ecryptfs_create+0x130/0x250 [ecryptfs]
|
||||
[<ffffffff811963a4>] vfs_create+0xb4/0x120
|
||||
[<ffffffff81197865>] do_last+0x8c5/0xa10
|
||||
[<ffffffff811998f9>] path_openat+0xd9/0x460
|
||||
[<ffffffff81199da2>] do_filp_open+0x42/0xa0
|
||||
[<ffffffff81187998>] do_sys_open+0xf8/0x1d0
|
||||
[<ffffffff81187a91>] sys_open+0x21/0x30
|
||||
[<ffffffff81527d69>] system_call_fastpath+0x16/0x1b
|
||||
|
||||
-> #0 (&ecryptfs_msg_ctx_arr[i].mux){+.+.+.}:
|
||||
[<ffffffff810a3418>] __lock_acquire+0x1bf8/0x1c50
|
||||
[<ffffffff810a3b8d>] lock_acquire+0x9d/0x220
|
||||
[<ffffffff8151c6da>] __mutex_lock_common+0x5a/0x4b0
|
||||
[<ffffffff8151cc64>] mutex_lock_nested+0x44/0x50
|
||||
[<ffffffffa029c213>] ecryptfs_miscdev_read+0x143/0x470 [ecryptfs]
|
||||
[<ffffffff811887d3>] vfs_read+0xb3/0x180
|
||||
[<ffffffff811888ed>] sys_read+0x4d/0x90
|
||||
[<ffffffff81527d69>] system_call_fastpath+0x16/0x1b
|
||||
|
||||
Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
fs/ecryptfs/miscdev.c | 25 +++++++++++++------------
|
||||
1 files changed, 13 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
|
||||
index a050e4b..de42310 100644
|
||||
--- a/fs/ecryptfs/miscdev.c
|
||||
+++ b/fs/ecryptfs/miscdev.c
|
||||
@@ -195,31 +195,32 @@ int ecryptfs_send_miscdev(char *data, size_t data_size,
|
||||
struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
|
||||
u16 msg_flags, struct ecryptfs_daemon *daemon)
|
||||
{
|
||||
- int rc = 0;
|
||||
+ struct ecryptfs_message *msg;
|
||||
|
||||
- mutex_lock(&msg_ctx->mux);
|
||||
- msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size),
|
||||
- GFP_KERNEL);
|
||||
- if (!msg_ctx->msg) {
|
||||
- rc = -ENOMEM;
|
||||
+ msg = kmalloc((sizeof(*msg) + data_size), GFP_KERNEL);
|
||||
+ if (!msg) {
|
||||
printk(KERN_ERR "%s: Out of memory whilst attempting "
|
||||
"to kmalloc(%zd, GFP_KERNEL)\n", __func__,
|
||||
- (sizeof(*msg_ctx->msg) + data_size));
|
||||
- goto out_unlock;
|
||||
+ (sizeof(*msg) + data_size));
|
||||
+ return -ENOMEM;
|
||||
}
|
||||
+
|
||||
+ mutex_lock(&msg_ctx->mux);
|
||||
+ msg_ctx->msg = msg;
|
||||
msg_ctx->msg->index = msg_ctx->index;
|
||||
msg_ctx->msg->data_len = data_size;
|
||||
msg_ctx->type = msg_type;
|
||||
memcpy(msg_ctx->msg->data, data, data_size);
|
||||
msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size);
|
||||
- mutex_lock(&daemon->mux);
|
||||
list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue);
|
||||
+ mutex_unlock(&msg_ctx->mux);
|
||||
+
|
||||
+ mutex_lock(&daemon->mux);
|
||||
daemon->num_queued_msg_ctx++;
|
||||
wake_up_interruptible(&daemon->wait);
|
||||
mutex_unlock(&daemon->mux);
|
||||
-out_unlock:
|
||||
- mutex_unlock(&msg_ctx->mux);
|
||||
- return rc;
|
||||
+
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+45
@@ -0,0 +1,45 @@
|
||||
From 590d0b9de4bb4ef7a84bb0a8a13d85353556e7ae Mon Sep 17 00:00:00 2001
|
||||
From: Tyler Hicks <tyhicks@canonical.com>
|
||||
Date: Tue, 12 Jun 2012 11:17:01 -0700
|
||||
Subject: [PATCH 098/109] eCryptfs: Properly check for O_RDONLY flag before
|
||||
doing privileged open
|
||||
|
||||
commit 9fe79d7600497ed8a95c3981cbe5b73ab98222f0 upstream.
|
||||
|
||||
If the first attempt at opening the lower file read/write fails,
|
||||
eCryptfs will retry using a privileged kthread. However, the privileged
|
||||
retry should not happen if the lower file's inode is read-only because a
|
||||
read/write open will still be unsuccessful.
|
||||
|
||||
The check for determining if the open should be retried was intended to
|
||||
be based on the access mode of the lower file's open flags being
|
||||
O_RDONLY, but the check was incorrectly performed. This would cause the
|
||||
open to be retried by the privileged kthread, resulting in a second
|
||||
failed open of the lower file. This patch corrects the check to
|
||||
determine if the open request should be handled by the privileged
|
||||
kthread.
|
||||
|
||||
Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
|
||||
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
|
||||
Acked-by: Dan Carpenter <dan.carpenter@oracle.com>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
fs/ecryptfs/kthread.c | 2 +-
|
||||
1 files changed, 1 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
|
||||
index 69f994a..0dbe58a 100644
|
||||
--- a/fs/ecryptfs/kthread.c
|
||||
+++ b/fs/ecryptfs/kthread.c
|
||||
@@ -149,7 +149,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
|
||||
(*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred);
|
||||
if (!IS_ERR(*lower_file))
|
||||
goto out;
|
||||
- if (flags & O_RDONLY) {
|
||||
+ if ((flags & O_ACCMODE) == O_RDONLY) {
|
||||
rc = PTR_ERR((*lower_file));
|
||||
goto out;
|
||||
}
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
+49
@@ -0,0 +1,49 @@
|
||||
From 074935f3d2b0e862c66af5032619659b070e1ebb Mon Sep 17 00:00:00 2001
|
||||
From: "Rafael J. Wysocki" <rjw@sisk.pl>
|
||||
Date: Tue, 29 May 2012 21:21:07 +0200
|
||||
Subject: [PATCH 099/109] ACPI / PM: Make acpi_pm_device_sleep_state() follow
|
||||
the specification
|
||||
|
||||
commit dbe9a2edd17d843d80faf2b99f20a691c1853418 upstream.
|
||||
|
||||
The comparison between the system sleep state being entered
|
||||
and the lowest system sleep state the given device may wake up
|
||||
from in acpi_pm_device_sleep_state() is reversed, because the
|
||||
specification (ACPI 5.0) says that for wakeup to work:
|
||||
|
||||
"The sleeping state being entered must be less than or equal to the
|
||||
power state declared in element 1 of the _PRW object."
|
||||
|
||||
In other words, the state returned by _PRW is the deepest
|
||||
(lowest-power) system sleep state the device is capable of waking up
|
||||
the system from.
|
||||
|
||||
Moreover, acpi_pm_device_sleep_state() also should check if the
|
||||
wakeup capability is supported through ACPI, because in principle it
|
||||
may be done via native PCIe PME, for example, in which case _SxW
|
||||
should not be evaluated.
|
||||
|
||||
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
drivers/acpi/sleep.c | 4 ++--
|
||||
1 files changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
|
||||
index ca191ff..ed6bc52 100644
|
||||
--- a/drivers/acpi/sleep.c
|
||||
+++ b/drivers/acpi/sleep.c
|
||||
@@ -702,8 +702,8 @@ int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p)
|
||||
* can wake the system. _S0W may be valid, too.
|
||||
*/
|
||||
if (acpi_target_sleep_state == ACPI_STATE_S0 ||
|
||||
- (device_may_wakeup(dev) &&
|
||||
- adev->wakeup.sleep_state <= acpi_target_sleep_state)) {
|
||||
+ (device_may_wakeup(dev) && adev->wakeup.flags.valid &&
|
||||
+ adev->wakeup.sleep_state >= acpi_target_sleep_state)) {
|
||||
acpi_status status;
|
||||
|
||||
acpi_method[3] = 'W';
|
||||
--
|
||||
1.7.7.6
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user