From be646c2d2ba8e2e56596d72633705f8286698c25 Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Wed, 15 May 2013 00:44:07 -0700 Subject: target: Remove unused wait_for_tasks bit in target_wait_for_sess_cmds Drop unused transport_wait_for_tasks() check in target_wait_for_sess_cmds shutdown code, and convert tcm_qla2xxx + ib_srpt fabric drivers. Cc: Joern Engel Cc: Roland Dreier Signed-off-by: Nicholas Bellinger --- include/target/target_core_fabric.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index ba3471b73c07..8a26f0d55fd1 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -123,7 +123,7 @@ int transport_send_check_condition_and_sense(struct se_cmd *, int target_get_sess_cmd(struct se_session *, struct se_cmd *, bool); int target_put_sess_cmd(struct se_session *, struct se_cmd *); void target_sess_cmd_list_set_waiting(struct se_session *); -void target_wait_for_sess_cmds(struct se_session *, int); +void target_wait_for_sess_cmds(struct se_session *); int core_alua_check_nonop_delay(struct se_cmd *); -- cgit v1.2.3-71-gd317 From 576ebd74928fd60ae112b33c42b89602015fadbd Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Tue, 21 May 2013 16:08:22 +0200 Subject: kernel: Fix s390 absolute memory access for /dev/mem On s390 the prefix page and absolute zero pages are not correctly returned when reading /dev/mem. The reason is that the s390 asm/io.h file includes the asm-generic/io.h file which then defines xlate_dev_mem_ptr() and therefore overwrites the s390 specific version that does the correct swap operation for prefix and absolute zero pages. The problem is a regression that was introduced with git commit cd248341 (s390/pci: base support). To fix the problem add "#ifndef xlate_dev_mem_ptr" in asm-generic/io.h and "#define xlate_dev_mem_ptr" in asm/io.h. This ensures that the s390 version is used. For completeness also add the "#ifndef" construct for xlate_dev_kmem_ptr(). Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/io.h | 1 + include/asm-generic/io.h | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index 379d96e2105e..fd9be010f9b2 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -36,6 +36,7 @@ static inline void * phys_to_virt(unsigned long address) } void *xlate_dev_mem_ptr(unsigned long phys); +#define xlate_dev_mem_ptr xlate_dev_mem_ptr void unxlate_dev_mem_ptr(unsigned long phys, void *addr); /* diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index ac9da00e9f2c..d5afe96adba6 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -343,8 +343,12 @@ extern void ioport_unmap(void __iomem *p); #endif /* CONFIG_GENERIC_IOMAP */ #endif /* CONFIG_HAS_IOPORT */ +#ifndef xlate_dev_kmem_ptr #define xlate_dev_kmem_ptr(p) p +#endif +#ifndef xlate_dev_mem_ptr #define xlate_dev_mem_ptr(p) __va(p) +#endif #ifdef CONFIG_VIRT_TO_BUS #ifndef virt_to_bus -- cgit v1.2.3-71-gd317 From 591a0ac7f14aae6bf11b1cb6b5a68480bd644ddb Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 23 May 2013 12:07:50 +0300 Subject: OMAPDSS: Fix crash with DT boot When booting with DT, there's a crash when omapfb is probed. This is caused by the fact that omapdss+DT is not yet supported, and thus omapdss is not probed at all. On the other hand, omapfb is always probed. When omapfb tries to use omapdss, there's a NULL pointer dereference crash. The same error should most likely happen with omapdrm and omap_vout also. To fix this, add an "initialized" state to omapdss. When omapdss has been probed, it's marked as initialized. omapfb, omapdrm and omap_vout check this state when they are probed to see that omapdss is actually there. Signed-off-by: Tomi Valkeinen Tested-by: Peter Ujfalusi --- drivers/gpu/drm/omapdrm/omap_drv.c | 3 +++ drivers/media/platform/omap/omap_vout.c | 3 +++ drivers/video/omap2/dss/core.c | 20 +++++++++++++++++++- drivers/video/omap2/omapfb/omapfb-main.c | 3 +++ include/video/omapdss.h | 1 + 5 files changed, 29 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c index 079c54c6f94c..902074bbd1f4 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.c +++ b/drivers/gpu/drm/omapdrm/omap_drv.c @@ -548,6 +548,9 @@ static void pdev_shutdown(struct platform_device *device) static int pdev_probe(struct platform_device *device) { + if (omapdss_is_initialized() == false) + return -EPROBE_DEFER; + DBG("%s", device->name); return drm_platform_init(&omap_drm_driver, device); } diff --git a/drivers/media/platform/omap/omap_vout.c b/drivers/media/platform/omap/omap_vout.c index 96c4a17e4280..0a489bd29d6b 100644 --- a/drivers/media/platform/omap/omap_vout.c +++ b/drivers/media/platform/omap/omap_vout.c @@ -2144,6 +2144,9 @@ static int __init omap_vout_probe(struct platform_device *pdev) struct omap_dss_device *def_display; struct omap2video_device *vid_dev = NULL; + if (omapdss_is_initialized() == false) + return -EPROBE_DEFER; + ret = omapdss_compat_init(); if (ret) { dev_err(&pdev->dev, "failed to init dss\n"); diff --git a/drivers/video/omap2/dss/core.c b/drivers/video/omap2/dss/core.c index f8779d4750ba..f49ddb9e7c82 100644 --- a/drivers/video/omap2/dss/core.c +++ b/drivers/video/omap2/dss/core.c @@ -53,6 +53,8 @@ static char *def_disp_name; module_param_named(def_disp, def_disp_name, charp, 0); MODULE_PARM_DESC(def_disp, "default display name"); +static bool dss_initialized; + const char *omapdss_get_default_display_name(void) { return core.default_display_name; @@ -66,6 +68,12 @@ enum omapdss_version omapdss_get_version(void) } EXPORT_SYMBOL(omapdss_get_version); +bool omapdss_is_initialized(void) +{ + return dss_initialized; +} +EXPORT_SYMBOL(omapdss_is_initialized); + struct platform_device *dss_get_core_pdev(void) { return core.pdev; @@ -606,6 +614,8 @@ static int __init omap_dss_init(void) return r; } + dss_initialized = true; + return 0; } @@ -636,7 +646,15 @@ static int __init omap_dss_init(void) static int __init omap_dss_init2(void) { - return omap_dss_register_drivers(); + int r; + + r = omap_dss_register_drivers(); + if (r) + return r; + + dss_initialized = true; + + return 0; } core_initcall(omap_dss_init); diff --git a/drivers/video/omap2/omapfb/omapfb-main.c b/drivers/video/omap2/omapfb/omapfb-main.c index 717f13a93351..bb5f9fee3659 100644 --- a/drivers/video/omap2/omapfb/omapfb-main.c +++ b/drivers/video/omap2/omapfb/omapfb-main.c @@ -2416,6 +2416,9 @@ static int __init omapfb_probe(struct platform_device *pdev) DBG("omapfb_probe\n"); + if (omapdss_is_initialized() == false) + return -EPROBE_DEFER; + if (pdev->num_resources != 0) { dev_err(&pdev->dev, "probed for an unknown device\n"); r = -ENODEV; diff --git a/include/video/omapdss.h b/include/video/omapdss.h index caefa093337d..9b52340ec3ff 100644 --- a/include/video/omapdss.h +++ b/include/video/omapdss.h @@ -741,6 +741,7 @@ struct omap_dss_driver { }; enum omapdss_version omapdss_get_version(void); +bool omapdss_is_initialized(void); int omap_dss_register_driver(struct omap_dss_driver *); void omap_dss_unregister_driver(struct omap_dss_driver *); -- cgit v1.2.3-71-gd317 From 2a7851bffb008ff4882eee673da74718997b4265 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 17 May 2013 03:56:10 +0000 Subject: netfilter: add nf_ipv6_ops hook to fix xt_addrtype with IPv6 Quoting https://bugzilla.netfilter.org/show_bug.cgi?id=812: [ ip6tables -m addrtype ] When I tried to use in the nat/PREROUTING it messes up the routing cache even if the rule didn't matched at all. [..] If I remove the --limit-iface-in from the non-working scenario, so just use the -m addrtype --dst-type LOCAL it works! This happens when LOCAL type matching is requested with --limit-iface-in, and the default ipv6 route is via the interface the packet we test arrived on. Because xt_addrtype uses ip6_route_output, the ipv6 routing implementation creates an unwanted cached entry, and the packet won't make it to the real/expected destination. Silently ignoring --limit-iface-in makes the routing work but it breaks rule matching (--dst-type LOCAL with limit-iface-in is supposed to only match if the dst address is configured on the incoming interface; without --limit-iface-in it will match if the address is reachable via lo). The test should call ipv6_chk_addr() instead. However, this would add a link-time dependency on ipv6. There are two possible solutions: 1) Revert the commit that moved ipt_addrtype to xt_addrtype, and put ipv6 specific code into ip6t_addrtype. 2) add new "nf_ipv6_ops" struct to register pointers to ipv6 functions. While the former might seem preferable, Pablo pointed out that there are more xt modules with link-time dependeny issues regarding ipv6, so lets go for 2). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 16 ++++++++++++++++ include/net/addrconf.h | 2 +- net/ipv6/addrconf.c | 2 +- net/ipv6/netfilter.c | 7 +++++++ net/netfilter/core.c | 2 ++ net/netfilter/xt_addrtype.c | 27 ++++++++++++++++----------- 6 files changed, 43 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 98ffb54988b6..2d4df6ce043e 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -17,6 +17,22 @@ extern __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, extern int ipv6_netfilter_init(void); extern void ipv6_netfilter_fini(void); + +/* + * Hook functions for ipv6 to allow xt_* modules to be built-in even + * if IPv6 is a module. + */ +struct nf_ipv6_ops { + int (*chk_addr)(struct net *net, const struct in6_addr *addr, + const struct net_device *dev, int strict); +}; + +extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops; +static inline const struct nf_ipv6_ops *nf_get_ipv6_ops(void) +{ + return rcu_dereference(nf_ipv6_ops); +} + #else /* CONFIG_NETFILTER */ static inline int ipv6_netfilter_init(void) { return 0; } static inline void ipv6_netfilter_fini(void) { return; } diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 84a6440f1f19..21f702704f24 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -65,7 +65,7 @@ extern int addrconf_set_dstaddr(struct net *net, extern int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, - struct net_device *dev, + const struct net_device *dev, int strict); #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d1ab6ab29a55..d1b2d8034b54 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1487,7 +1487,7 @@ static int ipv6_count_addresses(struct inet6_dev *idev) } int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, - struct net_device *dev, int strict) + const struct net_device *dev, int strict) { struct inet6_ifaddr *ifp; unsigned int hash = inet6_addr_hash(addr); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 72836f40b730..95f3f1da0d7f 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -186,6 +187,10 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook, return csum; }; +static const struct nf_ipv6_ops ipv6ops = { + .chk_addr = ipv6_chk_addr, +}; + static const struct nf_afinfo nf_ip6_afinfo = { .family = AF_INET6, .checksum = nf_ip6_checksum, @@ -198,6 +203,7 @@ static const struct nf_afinfo nf_ip6_afinfo = { int __init ipv6_netfilter_init(void) { + RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops); return nf_register_afinfo(&nf_ip6_afinfo); } @@ -206,5 +212,6 @@ int __init ipv6_netfilter_init(void) */ void ipv6_netfilter_fini(void) { + RCU_INIT_POINTER(nf_ipv6_ops, NULL); nf_unregister_afinfo(&nf_ip6_afinfo); } diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 07c865a31a3d..857ca9f35177 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -30,6 +30,8 @@ static DEFINE_MUTEX(afinfo_mutex); const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; EXPORT_SYMBOL(nf_afinfo); +const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly; +EXPORT_SYMBOL_GPL(nf_ipv6_ops); int nf_register_afinfo(const struct nf_afinfo *afinfo) { diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index 49c5ff7f6dd6..68ff29f60867 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -22,6 +22,7 @@ #include #endif +#include #include #include @@ -33,12 +34,12 @@ MODULE_ALIAS("ip6t_addrtype"); #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, - const struct in6_addr *addr) + const struct in6_addr *addr, u16 mask) { const struct nf_afinfo *afinfo; struct flowi6 flow; struct rt6_info *rt; - u32 ret; + u32 ret = 0; int route_err; memset(&flow, 0, sizeof(flow)); @@ -49,12 +50,19 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, rcu_read_lock(); afinfo = nf_get_afinfo(NFPROTO_IPV6); - if (afinfo != NULL) + if (afinfo != NULL) { + const struct nf_ipv6_ops *v6ops; + + if (dev && (mask & XT_ADDRTYPE_LOCAL)) { + v6ops = nf_get_ipv6_ops(); + if (v6ops && v6ops->chk_addr(net, addr, dev, true)) + ret = XT_ADDRTYPE_LOCAL; + } route_err = afinfo->route(net, (struct dst_entry **)&rt, - flowi6_to_flowi(&flow), !!dev); - else + flowi6_to_flowi(&flow), false); + } else { route_err = 1; - + } rcu_read_unlock(); if (route_err) @@ -62,15 +70,12 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, if (rt->rt6i_flags & RTF_REJECT) ret = XT_ADDRTYPE_UNREACHABLE; - else - ret = 0; - if (rt->rt6i_flags & RTF_LOCAL) + if (dev == NULL && rt->rt6i_flags & RTF_LOCAL) ret |= XT_ADDRTYPE_LOCAL; if (rt->rt6i_flags & RTF_ANYCAST) ret |= XT_ADDRTYPE_ANYCAST; - dst_release(&rt->dst); return ret; } @@ -90,7 +95,7 @@ static bool match_type6(struct net *net, const struct net_device *dev, if ((XT_ADDRTYPE_LOCAL | XT_ADDRTYPE_ANYCAST | XT_ADDRTYPE_UNREACHABLE) & mask) - return !!(mask & match_lookup_rt6(net, dev, addr)); + return !!(mask & match_lookup_rt6(net, dev, addr, mask)); return true; } -- cgit v1.2.3-71-gd317 From 7805d000db30a3787a4c969bab6ae4d8a5fd8ce6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 May 2013 10:50:24 +0900 Subject: cgroup: fix a subtle bug in descendant pre-order walk When cgroup_next_descendant_pre() initiates a walk, it checks whether the subtree root doesn't have any children and if not returns NULL. Later code assumes that the subtree isn't empty. This is broken because the subtree may become empty inbetween, which can lead to the traversal escaping the subtree by walking to the sibling of the subtree root. There's no reason to have the early exit path. Remove it along with the later assumption that the subtree isn't empty. This simplifies the code a bit and fixes the subtle bug. While at it, fix the comment of cgroup_for_each_descendant_pre() which was incorrectly referring to ->css_offline() instead of ->css_online(). Signed-off-by: Tejun Heo Reviewed-by: Michal Hocko Cc: stable@vger.kernel.org --- include/linux/cgroup.h | 2 +- kernel/cgroup.c | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 5047355b9a0f..8bda1294c035 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -707,7 +707,7 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos); * * If a subsystem synchronizes against the parent in its ->css_online() and * before starting iterating, and synchronizes against @pos on each - * iteration, any descendant cgroup which finished ->css_offline() is + * iteration, any descendant cgroup which finished ->css_online() is * guaranteed to be visible in the future iterations. * * In other words, the following guarantees that a descendant can't escape diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 38b136553044..31e9ef319070 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2954,11 +2954,8 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, WARN_ON_ONCE(!rcu_read_lock_held()); /* if first iteration, pretend we just visited @cgroup */ - if (!pos) { - if (list_empty(&cgroup->children)) - return NULL; + if (!pos) pos = cgroup; - } /* visit the first child if exists */ next = list_first_or_null_rcu(&pos->children, struct cgroup, sibling); @@ -2966,14 +2963,14 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, return next; /* no child, visit my or the closest ancestor's next sibling */ - do { + while (pos != cgroup) { next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling); if (&next->sibling != &pos->parent->children) return next; pos = pos->parent; - } while (pos != cgroup); + } return NULL; } -- cgit v1.2.3-71-gd317 From 12bcbe66d7b3cc9f9f86cd02f925666eaa3c2107 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 28 May 2013 14:38:42 -0400 Subject: rcu: Add _notrace variation of rcu_dereference_raw() and hlist_for_each_entry_rcu() As rcu_dereference_raw() under RCU debug config options can add quite a bit of checks, and that tracing uses rcu_dereference_raw(), these checks happen with the function tracer. The function tracer also happens to trace these debug checks too. This added overhead can livelock the system. Add a new interface to RCU for both rcu_dereference_raw_notrace() as well as hlist_for_each_entry_rcu_notrace() as the hlist iterator uses the rcu_dereference_raw() as well, and is used a bit with the function tracer. Link: http://lkml.kernel.org/r/20130528184209.304356745@goodmis.org Acked-by: Paul E. McKenney Signed-off-by: Steven Rostedt --- include/linux/rculist.h | 20 ++++++++++++++++++++ include/linux/rcupdate.h | 9 +++++++++ 2 files changed, 29 insertions(+) (limited to 'include') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 8089e35d47ac..f4b1001a4676 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -460,6 +460,26 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\ &(pos)->member)), typeof(*(pos)), member)) +/** + * hlist_for_each_entry_rcu_notrace - iterate over rcu list of given type (for tracing) + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as hlist_add_head_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + * + * This is the same as hlist_for_each_entry_rcu() except that it does + * not do any RCU debugging or tracing. + */ +#define hlist_for_each_entry_rcu_notrace(pos, head, member) \ + for (pos = hlist_entry_safe (rcu_dereference_raw_notrace(hlist_first_rcu(head)),\ + typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference_raw_notrace(hlist_next_rcu(\ + &(pos)->member)), typeof(*(pos)), member)) + /** * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type * @pos: the type * to use as a loop cursor. diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 4ccd68e49b00..ddcc7826d907 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -640,6 +640,15 @@ static inline void rcu_preempt_sleep_check(void) #define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/ +/* + * The tracing infrastructure traces RCU (we want that), but unfortunately + * some of the RCU checks causes tracing to lock up the system. + * + * The tracing version of rcu_dereference_raw() must not call + * rcu_read_lock_held(). + */ +#define rcu_dereference_raw_notrace(p) __rcu_dereference_check((p), 1, __rcu) + /** * rcu_access_index() - fetch RCU index with no dereferencing * @p: The index to read -- cgit v1.2.3-71-gd317 From 2abb274629614bef4044a0b98ada42e977feadfd Mon Sep 17 00:00:00 2001 From: Aurelien Chartier Date: Tue, 28 May 2013 18:09:56 +0100 Subject: xenbus: delay xenbus frontend resume if xenstored is not running If the xenbus frontend is located in a domain running xenstored, the device resume is hanging because it is happening before the process resume. This patch adds extra logic to the resume code to check if we are the domain running xenstored and delay the resume if needed. Signed-off-by: Aurelien Chartier [Changes in v2: - Instead of bypassing the resume, process it in a workqueue] [Changes in v3: - Add a struct work in xenbus_device to avoid dynamic allocation - Several small code fixes] [Changes in v4: - Use a dedicated workqueue] [Changes in v5: - Move create_workqueue error handling to xenbus_frontend_dev_resume] Acked-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_probe_frontend.c | 37 +++++++++++++++++++++++++++++- include/xen/xenbus.h | 1 + 2 files changed, 37 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index 3159a37d966d..a7e25073de19 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -29,6 +29,8 @@ #include "xenbus_probe.h" +static struct workqueue_struct *xenbus_frontend_wq; + /* device// => - */ static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) { @@ -89,9 +91,40 @@ static void backend_changed(struct xenbus_watch *watch, xenbus_otherend_changed(watch, vec, len, 1); } +static void xenbus_frontend_delayed_resume(struct work_struct *w) +{ + struct xenbus_device *xdev = container_of(w, struct xenbus_device, work); + + xenbus_dev_resume(&xdev->dev); +} + +static int xenbus_frontend_dev_resume(struct device *dev) +{ + /* + * If xenstored is running in this domain, we cannot access the backend + * state at the moment, so we need to defer xenbus_dev_resume + */ + if (xen_store_domain_type == XS_LOCAL) { + struct xenbus_device *xdev = to_xenbus_device(dev); + + if (!xenbus_frontend_wq) { + pr_err("%s: no workqueue to process delayed resume\n", + xdev->nodename); + return -EFAULT; + } + + INIT_WORK(&xdev->work, xenbus_frontend_delayed_resume); + queue_work(xenbus_frontend_wq, &xdev->work); + + return 0; + } + + return xenbus_dev_resume(dev); +} + static const struct dev_pm_ops xenbus_pm_ops = { .suspend = xenbus_dev_suspend, - .resume = xenbus_dev_resume, + .resume = xenbus_frontend_dev_resume, .freeze = xenbus_dev_suspend, .thaw = xenbus_dev_cancel, .restore = xenbus_dev_resume, @@ -440,6 +473,8 @@ static int __init xenbus_probe_frontend_init(void) register_xenstore_notifier(&xenstore_notifier); + xenbus_frontend_wq = create_workqueue("xenbus_frontend"); + return 0; } subsys_initcall(xenbus_probe_frontend_init); diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 0a7515c1e3a4..569c07f2e344 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -70,6 +70,7 @@ struct xenbus_device { struct device dev; enum xenbus_state state; struct completion down; + struct work_struct work; }; static inline struct xenbus_device *to_xenbus_device(struct device *dev) -- cgit v1.2.3-71-gd317 From 9b31a328e344e62e7cc98ae574edcb7b674719bb Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 15 May 2013 00:52:44 -0700 Subject: target: Re-instate sess_wait_list for target_wait_for_sess_cmds Switch back to pre commit 1c7b13fe652 list splicing logic for active I/O shutdown with tcm_qla2xxx + ib_srpt fabrics. The original commit was done under the incorrect assumption that it's safe to walk se_sess->sess_cmd_list unprotected in target_wait_for_sess_cmds() after sess->sess_tearing_down = 1 has been set by target_sess_cmd_list_set_waiting() during session shutdown. So instead of adding sess->sess_cmd_lock protection around sess->sess_cmd_list during target_wait_for_sess_cmds(), switch back to sess->sess_wait_list to allow wait_for_completion() + TFO->release_cmd() to occur without having to walk ->sess_cmd_list after the list_splice. Also add a check to exit if target_sess_cmd_list_set_waiting() has already been called, and add a WARN_ON to check for any fabric bug where new se_cmds are added to sess->sess_cmd_list after sess->sess_tearing_down = 1 has already been set. Cc: Joern Engel Cc: Roland Dreier Cc: stable@vger.kernel.org Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 18 ++++++++++++++---- include/target/target_core_base.h | 1 + 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 311c11349aab..bbca144821c5 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -221,6 +221,7 @@ struct se_session *transport_init_session(void) INIT_LIST_HEAD(&se_sess->sess_list); INIT_LIST_HEAD(&se_sess->sess_acl_list); INIT_LIST_HEAD(&se_sess->sess_cmd_list); + INIT_LIST_HEAD(&se_sess->sess_wait_list); spin_lock_init(&se_sess->sess_cmd_lock); kref_init(&se_sess->sess_kref); @@ -2250,11 +2251,14 @@ void target_sess_cmd_list_set_waiting(struct se_session *se_sess) unsigned long flags; spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); - - WARN_ON(se_sess->sess_tearing_down); + if (se_sess->sess_tearing_down) { + spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); + return; + } se_sess->sess_tearing_down = 1; + list_splice_init(&se_sess->sess_cmd_list, &se_sess->sess_wait_list); - list_for_each_entry(se_cmd, &se_sess->sess_cmd_list, se_cmd_list) + list_for_each_entry(se_cmd, &se_sess->sess_wait_list, se_cmd_list) se_cmd->cmd_wait_set = 1; spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); @@ -2267,9 +2271,10 @@ EXPORT_SYMBOL(target_sess_cmd_list_set_waiting); void target_wait_for_sess_cmds(struct se_session *se_sess) { struct se_cmd *se_cmd, *tmp_cmd; + unsigned long flags; list_for_each_entry_safe(se_cmd, tmp_cmd, - &se_sess->sess_cmd_list, se_cmd_list) { + &se_sess->sess_wait_list, se_cmd_list) { list_del(&se_cmd->se_cmd_list); pr_debug("Waiting for se_cmd: %p t_state: %d, fabric state:" @@ -2283,6 +2288,11 @@ void target_wait_for_sess_cmds(struct se_session *se_sess) se_cmd->se_tfo->release_cmd(se_cmd); } + + spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); + WARN_ON(!list_empty(&se_sess->sess_cmd_list)); + spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); + } EXPORT_SYMBOL(target_wait_for_sess_cmds); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index e773dfa5f98f..4ea4f985f394 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -543,6 +543,7 @@ struct se_session { struct list_head sess_list; struct list_head sess_acl_list; struct list_head sess_cmd_list; + struct list_head sess_wait_list; spinlock_t sess_cmd_lock; struct kref sess_kref; }; -- cgit v1.2.3-71-gd317 From ac4e97abce9b80c020e7113325f49e58b7b15e3f Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 30 May 2013 09:19:35 +0200 Subject: scatterlist: sg_set_buf() argument must be in linear mapping Add a check behind CONFIG_DEBUG_SG to verify this. Signed-off-by: Rusty Russell Signed-off-by: Jens Axboe --- include/linux/scatterlist.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 5951e3f38878..26806775b11b 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -111,6 +111,9 @@ static inline struct page *sg_page(struct scatterlist *sg) static inline void sg_set_buf(struct scatterlist *sg, const void *buf, unsigned int buflen) { +#ifdef CONFIG_DEBUG_SG + BUG_ON(!virt_addr_valid(buf)); +#endif sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf)); } -- cgit v1.2.3-71-gd317 From 37448adfc7ce0d6d5892b87aa8d57edde4126f49 Mon Sep 17 00:00:00 2001 From: Lance Ortiz Date: Thu, 30 May 2013 08:25:12 -0600 Subject: aerdrv: Move cper_print_aer() call out of interrupt context The following warning was seen on 3.9 when a corrected PCIe error was being handled by the AER subsystem. WARNING: at .../drivers/pci/search.c:214 pci_get_dev_by_id+0x8a/0x90() This occurred because a call to pci_get_domain_bus_and_slot() was added to cper_print_pcie() to setup for the call to cper_print_aer(). The warning showed up because cper_print_pcie() is called in an interrupt context and pci_get* functions are not supposed to be called in that context. The solution is to move the cper_print_aer() call out of the interrupt context and into aer_recover_work_func() to avoid any warnings when calling pci_get* functions. Signed-off-by: Lance Ortiz Acked-by: Borislav Petkov Acked-by: Rafael J. Wysocki Signed-off-by: Tony Luck --- drivers/acpi/apei/cper.c | 18 ------------------ drivers/acpi/apei/ghes.c | 4 +++- drivers/pci/pcie/aer/aerdrv_core.c | 5 ++++- drivers/pci/pcie/aer/aerdrv_errprint.c | 4 ++-- include/linux/aer.h | 5 +++-- 5 files changed, 12 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c index fefc2ca7cc3e..33dc6a004802 100644 --- a/drivers/acpi/apei/cper.c +++ b/drivers/acpi/apei/cper.c @@ -250,10 +250,6 @@ static const char *cper_pcie_port_type_strs[] = { static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, const struct acpi_hest_generic_data *gdata) { -#ifdef CONFIG_ACPI_APEI_PCIEAER - struct pci_dev *dev; -#endif - if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE) printk("%s""port_type: %d, %s\n", pfx, pcie->port_type, pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ? @@ -285,20 +281,6 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, printk( "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n", pfx, pcie->bridge.secondary_status, pcie->bridge.control); -#ifdef CONFIG_ACPI_APEI_PCIEAER - dev = pci_get_domain_bus_and_slot(pcie->device_id.segment, - pcie->device_id.bus, pcie->device_id.function); - if (!dev) { - pr_err("PCI AER Cannot get PCI device %04x:%02x:%02x.%d\n", - pcie->device_id.segment, pcie->device_id.bus, - pcie->device_id.slot, pcie->device_id.function); - return; - } - if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) - cper_print_aer(pfx, dev, gdata->error_severity, - (struct aer_capability_regs *) pcie->aer_info); - pci_dev_put(dev); -#endif } static const char *apei_estatus_section_flag_strs[] = { diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index d668a8ae602b..403baf4dffc1 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -454,7 +454,9 @@ static void ghes_do_proc(struct ghes *ghes, aer_severity = cper_severity_to_aer(sev); aer_recover_queue(pcie_err->device_id.segment, pcie_err->device_id.bus, - devfn, aer_severity); + devfn, aer_severity, + (struct aer_capability_regs *) + pcie_err->aer_info); } } diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 8ec8b4f48560..0f4554e48cc5 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -580,6 +580,7 @@ struct aer_recover_entry u8 devfn; u16 domain; int severity; + struct aer_capability_regs *regs; }; static DEFINE_KFIFO(aer_recover_ring, struct aer_recover_entry, @@ -593,7 +594,7 @@ static DEFINE_SPINLOCK(aer_recover_ring_lock); static DECLARE_WORK(aer_recover_work, aer_recover_work_func); void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, - int severity) + int severity, struct aer_capability_regs *aer_regs) { unsigned long flags; struct aer_recover_entry entry = { @@ -601,6 +602,7 @@ void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, .devfn = devfn, .domain = domain, .severity = severity, + .regs = aer_regs, }; spin_lock_irqsave(&aer_recover_ring_lock, flags); @@ -627,6 +629,7 @@ static void aer_recover_work_func(struct work_struct *work) PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn)); continue; } + cper_print_aer(pdev, entry.severity, entry.regs); do_recovery(pdev, entry.severity); pci_dev_put(pdev); } diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c index 5ab14251839d..2c7c9f5f592c 100644 --- a/drivers/pci/pcie/aer/aerdrv_errprint.c +++ b/drivers/pci/pcie/aer/aerdrv_errprint.c @@ -220,7 +220,7 @@ int cper_severity_to_aer(int cper_severity) } EXPORT_SYMBOL_GPL(cper_severity_to_aer); -void cper_print_aer(const char *prefix, struct pci_dev *dev, int cper_severity, +void cper_print_aer(struct pci_dev *dev, int cper_severity, struct aer_capability_regs *aer) { int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0; @@ -244,7 +244,7 @@ void cper_print_aer(const char *prefix, struct pci_dev *dev, int cper_severity, agent = AER_GET_AGENT(aer_severity, status); dev_err(&dev->dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask); - cper_print_bits(prefix, status, status_strs, status_strs_size); + cper_print_bits("", status, status_strs, status_strs_size); dev_err(&dev->dev, "aer_layer=%s, aer_agent=%s\n", aer_error_layer[layer], aer_agent_string[agent]); if (aer_severity != AER_CORRECTABLE) diff --git a/include/linux/aer.h b/include/linux/aer.h index ec10e1b24c1c..737f90ab4b62 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -49,10 +49,11 @@ static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) } #endif -extern void cper_print_aer(const char *prefix, struct pci_dev *dev, +extern void cper_print_aer(struct pci_dev *dev, int cper_severity, struct aer_capability_regs *aer); extern int cper_severity_to_aer(int cper_severity); extern void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, - int severity); + int severity, + struct aer_capability_regs *aer_regs); #endif //_AER_H_ -- cgit v1.2.3-71-gd317 From d5ddad4168348337d98d6b8f156a3892de444411 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Fri, 31 May 2013 00:46:11 -0700 Subject: target: Propigate up ->cmd_kref put return via transport_generic_free_cmd Go ahead and propigate up the ->cmd_kref put return value from target_put_sess_cmd() -> transport_release_cmd() -> transport_put_cmd() -> transport_generic_free_cmd(). This is useful for certain fabrics when determining the active I/O shutdown case with SCF_ACK_KREF where a final target_put_sess_cmd() is still required by the caller. Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 28 +++++++++++++++------------- include/target/target_core_fabric.h | 2 +- 2 files changed, 16 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index bbca144821c5..21e315874a54 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -65,7 +65,7 @@ static void transport_complete_task_attr(struct se_cmd *cmd); static void transport_handle_queue_full(struct se_cmd *cmd, struct se_device *dev); static int transport_generic_get_mem(struct se_cmd *cmd); -static void transport_put_cmd(struct se_cmd *cmd); +static int transport_put_cmd(struct se_cmd *cmd); static void target_complete_ok_work(struct work_struct *work); int init_se_kmem_caches(void) @@ -1944,7 +1944,7 @@ static inline void transport_free_pages(struct se_cmd *cmd) * This routine unconditionally frees a command, and reference counting * or list removal must be done in the caller. */ -static void transport_release_cmd(struct se_cmd *cmd) +static int transport_release_cmd(struct se_cmd *cmd) { BUG_ON(!cmd->se_tfo); @@ -1956,11 +1956,11 @@ static void transport_release_cmd(struct se_cmd *cmd) * If this cmd has been setup with target_get_sess_cmd(), drop * the kref and call ->release_cmd() in kref callback. */ - if (cmd->check_release != 0) { - target_put_sess_cmd(cmd->se_sess, cmd); - return; - } + if (cmd->check_release != 0) + return target_put_sess_cmd(cmd->se_sess, cmd); + cmd->se_tfo->release_cmd(cmd); + return 1; } /** @@ -1969,7 +1969,7 @@ static void transport_release_cmd(struct se_cmd *cmd) * * This routine releases our reference to the command and frees it if possible. */ -static void transport_put_cmd(struct se_cmd *cmd) +static int transport_put_cmd(struct se_cmd *cmd) { unsigned long flags; @@ -1977,7 +1977,7 @@ static void transport_put_cmd(struct se_cmd *cmd) if (atomic_read(&cmd->t_fe_count) && !atomic_dec_and_test(&cmd->t_fe_count)) { spin_unlock_irqrestore(&cmd->t_state_lock, flags); - return; + return 0; } if (cmd->transport_state & CMD_T_DEV_ACTIVE) { @@ -1987,8 +1987,7 @@ static void transport_put_cmd(struct se_cmd *cmd) spin_unlock_irqrestore(&cmd->t_state_lock, flags); transport_free_pages(cmd); - transport_release_cmd(cmd); - return; + return transport_release_cmd(cmd); } void *transport_kmap_data_sg(struct se_cmd *cmd) @@ -2153,13 +2152,15 @@ static void transport_write_pending_qf(struct se_cmd *cmd) } } -void transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) +int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) { + int ret = 0; + if (!(cmd->se_cmd_flags & SCF_SE_LUN_CMD)) { if (wait_for_tasks && (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) transport_wait_for_tasks(cmd); - transport_release_cmd(cmd); + ret = transport_release_cmd(cmd); } else { if (wait_for_tasks) transport_wait_for_tasks(cmd); @@ -2167,8 +2168,9 @@ void transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) if (cmd->se_lun) transport_lun_remove_cmd(cmd); - transport_put_cmd(cmd); + ret = transport_put_cmd(cmd); } + return ret; } EXPORT_SYMBOL(transport_generic_free_cmd); diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 8a26f0d55fd1..1dcce9cc99b9 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -114,7 +114,7 @@ sense_reason_t transport_generic_new_cmd(struct se_cmd *); void target_execute_cmd(struct se_cmd *cmd); -void transport_generic_free_cmd(struct se_cmd *, int); +int transport_generic_free_cmd(struct se_cmd *, int); bool transport_wait_for_tasks(struct se_cmd *); int transport_check_aborted_status(struct se_cmd *, int); -- cgit v1.2.3-71-gd317 From 1e2bd517c108816220f262d7954b697af03b5f9c Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 30 May 2013 06:45:27 +0000 Subject: udp6: Fix udp fragmentation for tunnel traffic. udp6 over GRE tunnel does not work after to GRE tso changes. GRE tso handler passes inner packet but keeps track of outer header start in SKB_GSO_CB(skb)->mac_offset. udp6 fragment need to take care of outer header, which start at the mac_offset, while adding fragment header. This bug is introduced by commit 68c3316311 (GRE: Add TCP segmentation offload for GRE). Reported-by: Dmitry Kravkov Signed-off-by: Pravin B Shelar Tested-by: Dmitry Kravkov Signed-off-by: David S. Miller --- include/linux/skbuff.h | 15 +++++++++++++++ net/ipv6/udp_offload.c | 20 ++++++++++++-------- 2 files changed, 27 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2e0ced1af3b1..9c676eae3968 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2852,6 +2852,21 @@ static inline int skb_tnl_header_len(const struct sk_buff *inner_skb) SKB_GSO_CB(inner_skb)->mac_offset; } +static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra) +{ + int new_headroom, headroom; + int ret; + + headroom = skb_headroom(skb); + ret = pskb_expand_head(skb, extra, 0, GFP_ATOMIC); + if (ret) + return ret; + + new_headroom = skb_headroom(skb); + SKB_GSO_CB(skb)->mac_offset += (new_headroom - headroom); + return 0; +} + static inline bool skb_is_gso(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_size; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 3bb3a891a424..d3cfaf9c7a08 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -46,11 +46,12 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, unsigned int mss; unsigned int unfrag_ip6hlen, unfrag_len; struct frag_hdr *fptr; - u8 *mac_start, *prevhdr; + u8 *packet_start, *prevhdr; u8 nexthdr; u8 frag_hdr_sz = sizeof(struct frag_hdr); int offset; __wsum csum; + int tnl_hlen; mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) @@ -83,9 +84,11 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, skb->ip_summed = CHECKSUM_NONE; /* Check if there is enough headroom to insert fragment header. */ - if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) && - pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC)) - goto out; + tnl_hlen = skb_tnl_header_len(skb); + if (skb_headroom(skb) < (tnl_hlen + frag_hdr_sz)) { + if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz)) + goto out; + } /* Find the unfragmentable header and shift it left by frag_hdr_sz * bytes to insert fragment header. @@ -93,11 +96,12 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); nexthdr = *prevhdr; *prevhdr = NEXTHDR_FRAGMENT; - unfrag_len = skb_network_header(skb) - skb_mac_header(skb) + - unfrag_ip6hlen; - mac_start = skb_mac_header(skb); - memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len); + unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) + + unfrag_ip6hlen + tnl_hlen; + packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset; + memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len); + SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz; skb->mac_header -= frag_hdr_sz; skb->network_header -= frag_hdr_sz; -- cgit v1.2.3-71-gd317 From e4c1721642bbd42d8142f4811cde0588c28db51d Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Wed, 29 May 2013 07:36:25 +0000 Subject: xfrm: force a garbage collection after deleting a policy In some cases after deleting a policy from the SPD the policy would remain in the dst/flow/route cache for an extended period of time which caused problems for SELinux as its dynamic network access controls key off of the number of XFRM policy and state entries. This patch corrects this problem by forcing a XFRM garbage collection whenever a policy is sucessfully removed. Reported-by: Ondrej Moris Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/net/xfrm.h | 5 +++++ net/key/af_key.c | 4 ++++ net/xfrm/xfrm_policy.c | 3 ++- net/xfrm/xfrm_user.c | 2 ++ 4 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index ae16531d0d35..94ce082b29dc 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1160,6 +1160,8 @@ static inline void xfrm_sk_free_policy(struct sock *sk) } } +extern void xfrm_garbage_collect(struct net *net); + #else static inline void xfrm_sk_free_policy(struct sock *sk) {} @@ -1194,6 +1196,9 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir, { return 1; } +static inline void xfrm_garbage_collect(struct net *net) +{ +} #endif static __inline__ diff --git a/net/key/af_key.c b/net/key/af_key.c index 5b1e5af25713..c5fbd7589681 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2366,6 +2366,8 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa out: xfrm_pol_put(xp); + if (err == 0) + xfrm_garbage_collect(net); return err; } @@ -2615,6 +2617,8 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_ out: xfrm_pol_put(xp); + if (delete && err == 0) + xfrm_garbage_collect(net); return err; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 23cea0f74336..ea970b8002a2 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2557,11 +2557,12 @@ static void __xfrm_garbage_collect(struct net *net) } } -static void xfrm_garbage_collect(struct net *net) +void xfrm_garbage_collect(struct net *net) { flow_cache_flush(); __xfrm_garbage_collect(net); } +EXPORT_SYMBOL(xfrm_garbage_collect); static void xfrm_garbage_collect_deferred(struct net *net) { diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index aa778748c565..3f565e495ac6 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1681,6 +1681,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, out: xfrm_pol_put(xp); + if (delete && err == 0) + xfrm_garbage_collect(net); return err; } -- cgit v1.2.3-71-gd317 From 6d7581e62f8be462440d7b22c6361f7c9fa4902b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 29 May 2013 05:02:56 +0000 Subject: list: introduce list_first_entry_or_null non-rcu variant of list_first_or_null_rcu Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/list.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/list.h b/include/linux/list.h index 6a1f8df9144b..b83e5657365a 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -361,6 +361,17 @@ static inline void list_splice_tail_init(struct list_head *list, #define list_first_entry(ptr, type, member) \ list_entry((ptr)->next, type, member) +/** + * list_first_entry_or_null - get the first element from a list + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + * + * Note that if the list is empty, it returns NULL. + */ +#define list_first_entry_or_null(ptr, type, member) \ + (!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL) + /** * list_for_each - iterate over a list * @pos: the &struct list_head to use as a loop cursor. -- cgit v1.2.3-71-gd317 From c87a124a5d5e8cf8e21c4363c3372bcaf53ea190 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 May 2013 09:06:27 +0000 Subject: net: force a reload of first item in hlist_nulls_for_each_entry_rcu Roman Gushchin discovered that udp4_lib_lookup2() was not reloading first item in the rcu protected list, in case the loop was restarted. This produced soft lockups as in https://lkml.org/lkml/2013/4/16/37 rcu_dereference(X)/ACCESS_ONCE(X) seem to not work as intended if X is ptr->field : In some cases, gcc caches the value or ptr->field in a register. Use a barrier() to disallow such caching, as documented in Documentation/atomic_ops.txt line 114 Thanks a lot to Roman for providing analysis and numerous patches. Diagnosed-by: Roman Gushchin Signed-off-by: Eric Dumazet Reported-by: Boris Zhmurov Signed-off-by: Roman Gushchin Acked-by: Paul E. McKenney Signed-off-by: David S. Miller --- include/linux/rculist_nulls.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index 2ae13714828b..1c33dd7da4a7 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -105,9 +105,14 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, * @head: the head for your list. * @member: the name of the hlist_nulls_node within the struct. * + * The barrier() is needed to make sure compiler doesn't cache first element [1], + * as this loop can be restarted [2] + * [1] Documentation/atomic_ops.txt around line 114 + * [2] Documentation/RCU/rculist_nulls.txt around line 146 */ #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ + for (({barrier();}), \ + pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ (!is_a_nulls(pos)) && \ ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos))) -- cgit v1.2.3-71-gd317 From 01cb71d2d47b78354358e4bb938bb06323e17498 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 2 Jun 2013 13:55:05 +0000 Subject: net_sched: restore "overhead xxx" handling commit 56b765b79 ("htb: improved accuracy at high rates") broke the "overhead xxx" handling, as well as the "linklayer atm" attribute. tc class add ... htb rate X ceil Y linklayer atm overhead 10 This patch restores the "overhead xxx" handling, for htb, tbf and act_police The "linklayer atm" thing needs a separate fix. Reported-by: Jesper Dangaard Brouer Signed-off-by: Eric Dumazet Cc: Vimalkumar Cc: Jiri Pirko Signed-off-by: David S. Miller --- include/net/sch_generic.h | 18 +++++++++++------- net/sched/act_police.c | 8 ++++---- net/sched/sch_generic.c | 8 +++++--- net/sched/sch_htb.c | 8 ++++---- net/sched/sch_tbf.c | 8 ++++---- 5 files changed, 28 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f10818fc8804..e7f4e21cc3e1 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -679,22 +679,26 @@ static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask, #endif struct psched_ratecfg { - u64 rate_bps; - u32 mult; - u32 shift; + u64 rate_bps; + u32 mult; + u16 overhead; + u8 shift; }; static inline u64 psched_l2t_ns(const struct psched_ratecfg *r, unsigned int len) { - return ((u64)len * r->mult) >> r->shift; + return ((u64)(len + r->overhead) * r->mult) >> r->shift; } -extern void psched_ratecfg_precompute(struct psched_ratecfg *r, u32 rate); +extern void psched_ratecfg_precompute(struct psched_ratecfg *r, const struct tc_ratespec *conf); -static inline u32 psched_ratecfg_getrate(const struct psched_ratecfg *r) +static inline void psched_ratecfg_getrate(struct tc_ratespec *res, + const struct psched_ratecfg *r) { - return r->rate_bps >> 3; + memset(res, 0, sizeof(*res)); + res->rate = r->rate_bps >> 3; + res->overhead = r->overhead; } #endif diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 823463adbd21..189e3c5b3d09 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -231,14 +231,14 @@ override: } if (R_tab) { police->rate_present = true; - psched_ratecfg_precompute(&police->rate, R_tab->rate.rate); + psched_ratecfg_precompute(&police->rate, &R_tab->rate); qdisc_put_rtab(R_tab); } else { police->rate_present = false; } if (P_tab) { police->peak_present = true; - psched_ratecfg_precompute(&police->peak, P_tab->rate.rate); + psched_ratecfg_precompute(&police->peak, &P_tab->rate); qdisc_put_rtab(P_tab); } else { police->peak_present = false; @@ -376,9 +376,9 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) }; if (police->rate_present) - opt.rate.rate = psched_ratecfg_getrate(&police->rate); + psched_ratecfg_getrate(&opt.rate, &police->rate); if (police->peak_present) - opt.peakrate.rate = psched_ratecfg_getrate(&police->peak); + psched_ratecfg_getrate(&opt.peakrate, &police->peak); if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt)) goto nla_put_failure; if (police->tcfp_result && diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index eac7e0ee23c1..20224086cc28 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -898,14 +898,16 @@ void dev_shutdown(struct net_device *dev) WARN_ON(timer_pending(&dev->watchdog_timer)); } -void psched_ratecfg_precompute(struct psched_ratecfg *r, u32 rate) +void psched_ratecfg_precompute(struct psched_ratecfg *r, + const struct tc_ratespec *conf) { u64 factor; u64 mult; int shift; - r->rate_bps = (u64)rate << 3; - r->shift = 0; + memset(r, 0, sizeof(*r)); + r->overhead = conf->overhead; + r->rate_bps = (u64)conf->rate << 3; r->mult = 1; /* * Calibrate mult, shift so that token counting is accurate diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 79b1876b6cd2..f87fb850b7ef 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1090,9 +1090,9 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, memset(&opt, 0, sizeof(opt)); - opt.rate.rate = psched_ratecfg_getrate(&cl->rate); + psched_ratecfg_getrate(&opt.rate, &cl->rate); opt.buffer = PSCHED_NS2TICKS(cl->buffer); - opt.ceil.rate = psched_ratecfg_getrate(&cl->ceil); + psched_ratecfg_getrate(&opt.ceil, &cl->ceil); opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer); opt.quantum = cl->quantum; opt.prio = cl->prio; @@ -1459,8 +1459,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->prio = TC_HTB_NUMPRIO - 1; } - psched_ratecfg_precompute(&cl->rate, hopt->rate.rate); - psched_ratecfg_precompute(&cl->ceil, hopt->ceil.rate); + psched_ratecfg_precompute(&cl->rate, &hopt->rate); + psched_ratecfg_precompute(&cl->ceil, &hopt->ceil); cl->buffer = PSCHED_TICKS2NS(hopt->buffer); cl->cbuffer = PSCHED_TICKS2NS(hopt->buffer); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index c8388f3c3426..e478d316602b 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -298,9 +298,9 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) q->tokens = q->buffer; q->ptokens = q->mtu; - psched_ratecfg_precompute(&q->rate, rtab->rate.rate); + psched_ratecfg_precompute(&q->rate, &rtab->rate); if (ptab) { - psched_ratecfg_precompute(&q->peak, ptab->rate.rate); + psched_ratecfg_precompute(&q->peak, &ptab->rate); q->peak_present = true; } else { q->peak_present = false; @@ -350,9 +350,9 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) goto nla_put_failure; opt.limit = q->limit; - opt.rate.rate = psched_ratecfg_getrate(&q->rate); + psched_ratecfg_getrate(&opt.rate, &q->rate); if (q->peak_present) - opt.peakrate.rate = psched_ratecfg_getrate(&q->peak); + psched_ratecfg_getrate(&opt.peakrate, &q->peak); else memset(&opt.peakrate, 0, sizeof(opt.peakrate)); opt.mtu = PSCHED_NS2TICKS(q->mtu); -- cgit v1.2.3-71-gd317 From 066a1a5fca0e188c41636d0874ab7495f24f595b Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 22 May 2013 12:29:22 +0100 Subject: KVM: add kvm_para_available to asm-generic/kvm_para.h According to include/uapi/linux/kvm_para.h architectures should define kvm_para_available, so add an implementation to asm-generic/kvm_para.h which just returns false. This fixes intel8x0.c build failure on mips with KVM enabled. Signed-off-by: James Hogan Cc: Marcelo Tosatti Cc: Gleb Natapov Cc: Arnd Bergmann Signed-off-by: Gleb Natapov --- include/asm-generic/kvm_para.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/asm-generic/kvm_para.h b/include/asm-generic/kvm_para.h index 9d96605f160a..fa25becbdcaf 100644 --- a/include/asm-generic/kvm_para.h +++ b/include/asm-generic/kvm_para.h @@ -18,4 +18,9 @@ static inline unsigned int kvm_arch_para_features(void) return 0; } +static inline bool kvm_para_available(void) +{ + return false; +} + #endif -- cgit v1.2.3-71-gd317 From 29eb77825cc7da8d45b642de2de3d423dc8a363f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 5 Jun 2013 12:26:50 +0200 Subject: arch, mm: Remove tlb_fast_mode() Since the introduction of preemptible mmu_gather TLB fast mode has been broken. TLB fast mode relies on there being absolutely no concurrency; it frees pages first and invalidates TLBs later. However now we can get concurrency and stuff goes *bang*. This patch removes all tlb_fast_mode() code; it was found the better option vs trying to patch the hole by entangling tlb invalidation with the scheduler. Cc: Thomas Gleixner Cc: Russell King Cc: Tony Luck Reported-by: Max Filippov Signed-off-by: Peter Zijlstra Signed-off-by: Linus Torvalds --- arch/arm/include/asm/tlb.h | 27 ++++----------------------- arch/ia64/include/asm/tlb.h | 41 ++++++++--------------------------------- include/asm-generic/tlb.h | 17 +---------------- mm/memory.c | 9 --------- 4 files changed, 13 insertions(+), 81 deletions(-) (limited to 'include') diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index 99a19512ee26..bdf2b8458ec1 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -33,18 +33,6 @@ #include #include -/* - * We need to delay page freeing for SMP as other CPUs can access pages - * which have been removed but not yet had their TLB entries invalidated. - * Also, as ARMv7 speculative prefetch can drag new entries into the TLB, - * we need to apply this same delaying tactic to ensure correct operation. - */ -#if defined(CONFIG_SMP) || defined(CONFIG_CPU_32v7) -#define tlb_fast_mode(tlb) 0 -#else -#define tlb_fast_mode(tlb) 1 -#endif - #define MMU_GATHER_BUNDLE 8 /* @@ -112,12 +100,10 @@ static inline void __tlb_alloc_page(struct mmu_gather *tlb) static inline void tlb_flush_mmu(struct mmu_gather *tlb) { tlb_flush(tlb); - if (!tlb_fast_mode(tlb)) { - free_pages_and_swap_cache(tlb->pages, tlb->nr); - tlb->nr = 0; - if (tlb->pages == tlb->local) - __tlb_alloc_page(tlb); - } + free_pages_and_swap_cache(tlb->pages, tlb->nr); + tlb->nr = 0; + if (tlb->pages == tlb->local) + __tlb_alloc_page(tlb); } static inline void @@ -178,11 +164,6 @@ tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) { - if (tlb_fast_mode(tlb)) { - free_page_and_swap_cache(page); - return 1; /* avoid calling tlb_flush_mmu */ - } - tlb->pages[tlb->nr++] = page; VM_BUG_ON(tlb->nr > tlb->max); return tlb->max - tlb->nr; diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h index c3ffe3e54edc..ef3a9de01954 100644 --- a/arch/ia64/include/asm/tlb.h +++ b/arch/ia64/include/asm/tlb.h @@ -46,12 +46,6 @@ #include #include -#ifdef CONFIG_SMP -# define tlb_fast_mode(tlb) ((tlb)->nr == ~0U) -#else -# define tlb_fast_mode(tlb) (1) -#endif - /* * If we can't allocate a page to make a big batch of page pointers * to work on, then just handle a few from the on-stack structure. @@ -60,7 +54,7 @@ struct mmu_gather { struct mm_struct *mm; - unsigned int nr; /* == ~0U => fast mode */ + unsigned int nr; unsigned int max; unsigned char fullmm; /* non-zero means full mm flush */ unsigned char need_flush; /* really unmapped some PTEs? */ @@ -103,6 +97,7 @@ extern struct ia64_tr_entry *ia64_idtrs[NR_CPUS]; static inline void ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end) { + unsigned long i; unsigned int nr; if (!tlb->need_flush) @@ -141,13 +136,11 @@ ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long e /* lastly, release the freed pages */ nr = tlb->nr; - if (!tlb_fast_mode(tlb)) { - unsigned long i; - tlb->nr = 0; - tlb->start_addr = ~0UL; - for (i = 0; i < nr; ++i) - free_page_and_swap_cache(tlb->pages[i]); - } + + tlb->nr = 0; + tlb->start_addr = ~0UL; + for (i = 0; i < nr; ++i) + free_page_and_swap_cache(tlb->pages[i]); } static inline void __tlb_alloc_page(struct mmu_gather *tlb) @@ -167,20 +160,7 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_m tlb->mm = mm; tlb->max = ARRAY_SIZE(tlb->local); tlb->pages = tlb->local; - /* - * Use fast mode if only 1 CPU is online. - * - * It would be tempting to turn on fast-mode for full_mm_flush as well. But this - * doesn't work because of speculative accesses and software prefetching: the page - * table of "mm" may (and usually is) the currently active page table and even - * though the kernel won't do any user-space accesses during the TLB shoot down, a - * compiler might use speculation or lfetch.fault on what happens to be a valid - * user-space address. This in turn could trigger a TLB miss fault (or a VHPT - * walk) and re-insert a TLB entry we just removed. Slow mode avoids such - * problems. (We could make fast-mode work by switching the current task to a - * different "mm" during the shootdown.) --davidm 08/02/2002 - */ - tlb->nr = (num_online_cpus() == 1) ? ~0U : 0; + tlb->nr = 0; tlb->fullmm = full_mm_flush; tlb->start_addr = ~0UL; } @@ -214,11 +194,6 @@ static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) { tlb->need_flush = 1; - if (tlb_fast_mode(tlb)) { - free_page_and_swap_cache(page); - return 1; /* avoid calling tlb_flush_mmu */ - } - if (!tlb->nr && tlb->pages == tlb->local) __tlb_alloc_page(tlb); diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index b1b1fa6ffffe..13821c339a41 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -97,11 +97,9 @@ struct mmu_gather { unsigned long start; unsigned long end; unsigned int need_flush : 1, /* Did free PTEs */ - fast_mode : 1; /* No batching */ - /* we are in the middle of an operation to clear * a full mm and can make some optimizations */ - unsigned int fullmm : 1, + fullmm : 1, /* we have performed an operation which * requires a complete flush of the tlb */ need_flush_all : 1; @@ -114,19 +112,6 @@ struct mmu_gather { #define HAVE_GENERIC_MMU_GATHER -static inline int tlb_fast_mode(struct mmu_gather *tlb) -{ -#ifdef CONFIG_SMP - return tlb->fast_mode; -#else - /* - * For UP we don't need to worry about TLB flush - * and page free order so much.. - */ - return 1; -#endif -} - void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm); void tlb_flush_mmu(struct mmu_gather *tlb); void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, diff --git a/mm/memory.c b/mm/memory.c index 6dc1882fbd72..61a262b08e53 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -220,7 +220,6 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm) tlb->start = -1UL; tlb->end = 0; tlb->need_flush = 0; - tlb->fast_mode = (num_possible_cpus() == 1); tlb->local.next = NULL; tlb->local.nr = 0; tlb->local.max = ARRAY_SIZE(tlb->__pages); @@ -244,9 +243,6 @@ void tlb_flush_mmu(struct mmu_gather *tlb) tlb_table_flush(tlb); #endif - if (tlb_fast_mode(tlb)) - return; - for (batch = &tlb->local; batch; batch = batch->next) { free_pages_and_swap_cache(batch->pages, batch->nr); batch->nr = 0; @@ -288,11 +284,6 @@ int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) VM_BUG_ON(!tlb->need_flush); - if (tlb_fast_mode(tlb)) { - free_page_and_swap_cache(page); - return 1; /* avoid calling tlb_flush_mmu() */ - } - batch = tlb->active; batch->pages[batch->nr++] = page; if (batch->nr == batch->max) { -- cgit v1.2.3-71-gd317 From a7526eb5d06b0084ef12d7b168d008fcf516caab Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 5 Jun 2013 19:38:26 +0000 Subject: net: Unbreak compat_sys_{send,recv}msg I broke them in this commit: commit 1be374a0518a288147c6a7398792583200a67261 Author: Andy Lutomirski Date: Wed May 22 14:07:44 2013 -0700 net: Block MSG_CMSG_COMPAT in send(m)msg and recv(m)msg This patch adds __sys_sendmsg and __sys_sendmsg as common helpers that accept MSG_CMSG_COMPAT and blocks MSG_CMSG_COMPAT at the syscall entrypoints. It also reverts some unnecessary checks in sys_socketcall. Apparently I was suffering from underscore blindness the first time around. Signed-off-by: Andy Lutomirski Tested-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/socket.h | 3 +++ net/compat.c | 13 +++++++-- net/socket.c | 72 +++++++++++++++++++++++--------------------------- 3 files changed, 47 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/linux/socket.h b/include/linux/socket.h index 33bf2dfab19d..b10ce4b341ea 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -320,6 +320,9 @@ extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); struct timespec; +/* The __sys_...msg variants allow MSG_CMSG_COMPAT */ +extern long __sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags); +extern long __sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags); extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, unsigned int flags, struct timespec *timeout); extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, diff --git a/net/compat.c b/net/compat.c index 79ae88485001..f0a1ba6c8086 100644 --- a/net/compat.c +++ b/net/compat.c @@ -734,19 +734,25 @@ static unsigned char nas[21] = { asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags) { - return sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); + if (flags & MSG_CMSG_COMPAT) + return -EINVAL; + return __sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); } asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, unsigned int vlen, unsigned int flags) { + if (flags & MSG_CMSG_COMPAT) + return -EINVAL; return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, flags | MSG_CMSG_COMPAT); } asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags) { - return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); + if (flags & MSG_CMSG_COMPAT) + return -EINVAL; + return __sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); } asmlinkage long compat_sys_recv(int fd, void __user *buf, size_t len, unsigned int flags) @@ -768,6 +774,9 @@ asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg, int datagrams; struct timespec ktspec; + if (flags & MSG_CMSG_COMPAT) + return -EINVAL; + if (COMPAT_USE_64BIT_TIME) return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, flags | MSG_CMSG_COMPAT, diff --git a/net/socket.c b/net/socket.c index 9ff6366fee13..4ca1526db756 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1956,7 +1956,7 @@ struct used_address { unsigned int name_len; }; -static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg, +static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg, struct msghdr *msg_sys, unsigned int flags, struct used_address *used_address) { @@ -2071,26 +2071,30 @@ out: * BSD sendmsg interface */ -SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags) +long __sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) { int fput_needed, err; struct msghdr msg_sys; struct socket *sock; - if (flags & MSG_CMSG_COMPAT) - return -EINVAL; - sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; - err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL); + err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL); fput_light(sock->file, fput_needed); out: return err; } +SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags) +{ + if (flags & MSG_CMSG_COMPAT) + return -EINVAL; + return __sys_sendmsg(fd, msg, flags); +} + /* * Linux sendmmsg interface */ @@ -2121,15 +2125,16 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, while (datagrams < vlen) { if (MSG_CMSG_COMPAT & flags) { - err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry, - &msg_sys, flags, &used_address); + err = ___sys_sendmsg(sock, (struct msghdr __user *)compat_entry, + &msg_sys, flags, &used_address); if (err < 0) break; err = __put_user(err, &compat_entry->msg_len); ++compat_entry; } else { - err = __sys_sendmsg(sock, (struct msghdr __user *)entry, - &msg_sys, flags, &used_address); + err = ___sys_sendmsg(sock, + (struct msghdr __user *)entry, + &msg_sys, flags, &used_address); if (err < 0) break; err = put_user(err, &entry->msg_len); @@ -2158,7 +2163,7 @@ SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg, return __sys_sendmmsg(fd, mmsg, vlen, flags); } -static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, +static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, struct msghdr *msg_sys, unsigned int flags, int nosec) { struct compat_msghdr __user *msg_compat = @@ -2250,27 +2255,31 @@ out: * BSD recvmsg interface */ -SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, - unsigned int, flags) +long __sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags) { int fput_needed, err; struct msghdr msg_sys; struct socket *sock; - if (flags & MSG_CMSG_COMPAT) - return -EINVAL; - sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; - err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0); + err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0); fput_light(sock->file, fput_needed); out: return err; } +SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, + unsigned int, flags) +{ + if (flags & MSG_CMSG_COMPAT) + return -EINVAL; + return __sys_recvmsg(fd, msg, flags); +} + /* * Linux recvmmsg interface */ @@ -2308,17 +2317,18 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, * No need to ask LSM for more than the first datagram. */ if (MSG_CMSG_COMPAT & flags) { - err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry, - &msg_sys, flags & ~MSG_WAITFORONE, - datagrams); + err = ___sys_recvmsg(sock, (struct msghdr __user *)compat_entry, + &msg_sys, flags & ~MSG_WAITFORONE, + datagrams); if (err < 0) break; err = __put_user(err, &compat_entry->msg_len); ++compat_entry; } else { - err = __sys_recvmsg(sock, (struct msghdr __user *)entry, - &msg_sys, flags & ~MSG_WAITFORONE, - datagrams); + err = ___sys_recvmsg(sock, + (struct msghdr __user *)entry, + &msg_sys, flags & ~MSG_WAITFORONE, + datagrams); if (err < 0) break; err = put_user(err, &entry->msg_len); @@ -2505,31 +2515,15 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) (int __user *)a[4]); break; case SYS_SENDMSG: - if (a[2] & MSG_CMSG_COMPAT) { - err = -EINVAL; - break; - } err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); break; case SYS_SENDMMSG: - if (a[3] & MSG_CMSG_COMPAT) { - err = -EINVAL; - break; - } err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]); break; case SYS_RECVMSG: - if (a[2] & MSG_CMSG_COMPAT) { - err = -EINVAL; - break; - } err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); break; case SYS_RECVMMSG: - if (a[3] & MSG_CMSG_COMPAT) { - err = -EINVAL; - break; - } err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3], (struct timespec __user *)a[4]); break; -- cgit v1.2.3-71-gd317 From 4616274d3382fa7698536d61b351e63cf0ce27f0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 5 Jun 2013 19:36:11 +0100 Subject: ASoC: dapm: Treat DAI widgets like AIF widgets for power Even though they are virtual widgets DAI widgets still get counted for the DAPM context power management so we can't just use the active state to check if they should be powered as they may not be part of a complete path. Instead split them into input and output widgets and do the same power checks as we perform on AIFs. Reported-by: Stephen Warren Tested-by: Stephen Warren Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 3 ++- sound/soc/soc-dapm.c | 49 +++++++++++++++++++++++++----------------------- sound/soc/soc-pcm.c | 7 ++++++- 3 files changed, 34 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index d4609029f014..385c6329a967 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -450,7 +450,8 @@ enum snd_soc_dapm_type { snd_soc_dapm_aif_in, /* audio interface input */ snd_soc_dapm_aif_out, /* audio interface output */ snd_soc_dapm_siggen, /* signal generator */ - snd_soc_dapm_dai, /* link to DAI structure */ + snd_soc_dapm_dai_in, /* link to DAI structure */ + snd_soc_dapm_dai_out, snd_soc_dapm_dai_link, /* link between two DAI structures */ }; diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index a80c883bb8be..c7051c457b75 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -55,7 +55,8 @@ static int dapm_up_seq[] = { [snd_soc_dapm_clock_supply] = 1, [snd_soc_dapm_micbias] = 2, [snd_soc_dapm_dai_link] = 2, - [snd_soc_dapm_dai] = 3, + [snd_soc_dapm_dai_in] = 3, + [snd_soc_dapm_dai_out] = 3, [snd_soc_dapm_aif_in] = 3, [snd_soc_dapm_aif_out] = 3, [snd_soc_dapm_mic] = 4, @@ -92,7 +93,8 @@ static int dapm_down_seq[] = { [snd_soc_dapm_value_mux] = 9, [snd_soc_dapm_aif_in] = 10, [snd_soc_dapm_aif_out] = 10, - [snd_soc_dapm_dai] = 10, + [snd_soc_dapm_dai_in] = 10, + [snd_soc_dapm_dai_out] = 10, [snd_soc_dapm_dai_link] = 11, [snd_soc_dapm_clock_supply] = 12, [snd_soc_dapm_regulator_supply] = 12, @@ -419,7 +421,8 @@ static void dapm_set_path_status(struct snd_soc_dapm_widget *w, case snd_soc_dapm_clock_supply: case snd_soc_dapm_aif_in: case snd_soc_dapm_aif_out: - case snd_soc_dapm_dai: + case snd_soc_dapm_dai_in: + case snd_soc_dapm_dai_out: case snd_soc_dapm_hp: case snd_soc_dapm_mic: case snd_soc_dapm_spk: @@ -820,7 +823,7 @@ static int is_connected_output_ep(struct snd_soc_dapm_widget *widget, switch (widget->id) { case snd_soc_dapm_adc: case snd_soc_dapm_aif_out: - case snd_soc_dapm_dai: + case snd_soc_dapm_dai_out: if (widget->active) { widget->outputs = snd_soc_dapm_suspend_check(widget); return widget->outputs; @@ -916,7 +919,7 @@ static int is_connected_input_ep(struct snd_soc_dapm_widget *widget, switch (widget->id) { case snd_soc_dapm_dac: case snd_soc_dapm_aif_in: - case snd_soc_dapm_dai: + case snd_soc_dapm_dai_in: if (widget->active) { widget->inputs = snd_soc_dapm_suspend_check(widget); return widget->inputs; @@ -1135,16 +1138,6 @@ static int dapm_generic_check_power(struct snd_soc_dapm_widget *w) return out != 0 && in != 0; } -static int dapm_dai_check_power(struct snd_soc_dapm_widget *w) -{ - DAPM_UPDATE_STAT(w, power_checks); - - if (w->active) - return w->active; - - return dapm_generic_check_power(w); -} - /* Check to see if an ADC has power */ static int dapm_adc_check_power(struct snd_soc_dapm_widget *w) { @@ -2318,7 +2311,8 @@ static int snd_soc_dapm_add_route(struct snd_soc_dapm_context *dapm, case snd_soc_dapm_clock_supply: case snd_soc_dapm_aif_in: case snd_soc_dapm_aif_out: - case snd_soc_dapm_dai: + case snd_soc_dapm_dai_in: + case snd_soc_dapm_dai_out: case snd_soc_dapm_dai_link: list_add(&path->list, &dapm->card->paths); list_add(&path->list_sink, &wsink->sources); @@ -3129,10 +3123,12 @@ snd_soc_dapm_new_control(struct snd_soc_dapm_context *dapm, break; case snd_soc_dapm_adc: case snd_soc_dapm_aif_out: + case snd_soc_dapm_dai_out: w->power_check = dapm_adc_check_power; break; case snd_soc_dapm_dac: case snd_soc_dapm_aif_in: + case snd_soc_dapm_dai_in: w->power_check = dapm_dac_check_power; break; case snd_soc_dapm_pga: @@ -3152,9 +3148,6 @@ snd_soc_dapm_new_control(struct snd_soc_dapm_context *dapm, case snd_soc_dapm_clock_supply: w->power_check = dapm_supply_check_power; break; - case snd_soc_dapm_dai: - w->power_check = dapm_dai_check_power; - break; default: w->power_check = dapm_always_on_check_power; break; @@ -3375,7 +3368,7 @@ int snd_soc_dapm_new_dai_widgets(struct snd_soc_dapm_context *dapm, template.reg = SND_SOC_NOPM; if (dai->driver->playback.stream_name) { - template.id = snd_soc_dapm_dai; + template.id = snd_soc_dapm_dai_in; template.name = dai->driver->playback.stream_name; template.sname = dai->driver->playback.stream_name; @@ -3393,7 +3386,7 @@ int snd_soc_dapm_new_dai_widgets(struct snd_soc_dapm_context *dapm, } if (dai->driver->capture.stream_name) { - template.id = snd_soc_dapm_dai; + template.id = snd_soc_dapm_dai_out; template.name = dai->driver->capture.stream_name; template.sname = dai->driver->capture.stream_name; @@ -3423,8 +3416,13 @@ int snd_soc_dapm_link_dai_widgets(struct snd_soc_card *card) /* For each DAI widget... */ list_for_each_entry(dai_w, &card->widgets, list) { - if (dai_w->id != snd_soc_dapm_dai) + switch (dai_w->id) { + case snd_soc_dapm_dai_in: + case snd_soc_dapm_dai_out: + break; + default: continue; + } dai = dai_w->priv; @@ -3433,8 +3431,13 @@ int snd_soc_dapm_link_dai_widgets(struct snd_soc_card *card) if (w->dapm != dai_w->dapm) continue; - if (w->id == snd_soc_dapm_dai) + switch (w->id) { + case snd_soc_dapm_dai_in: + case snd_soc_dapm_dai_out: continue; + default: + break; + } if (!w->sname) continue; diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index a9fddf0fea19..ccb6be4d658d 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -928,8 +928,13 @@ static int dpcm_add_paths(struct snd_soc_pcm_runtime *fe, int stream, /* Create any new FE <--> BE connections */ for (i = 0; i < list->num_widgets; i++) { - if (list->widgets[i]->id != snd_soc_dapm_dai) + switch (list->widgets[i]->id) { + case snd_soc_dapm_dai_in: + case snd_soc_dapm_dai_out: + break; + default: continue; + } /* is there a valid BE rtd for this widget */ be = dpcm_get_be(card, list->widgets[i], stream); -- cgit v1.2.3-71-gd317 From d62840995a99c9766803d54e9d7923f247a1c1db Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 22 May 2013 02:41:36 -0700 Subject: trace: Allow idle-safe tracepoints to be called from irq __DECLARE_TRACE_RCU() currently creates an _rcuidle() tracepoint which may safely be invoked from what RCU considers to be an idle CPU. However, these _rcuidle() tracepoints may -not- be invoked from the handler of an irq taken from idle, because rcu_idle_enter() zeroes RCU's nesting-level counter, so that the rcu_irq_exit() returning to idle will trigger a WARN_ON_ONCE(). This commit therefore substitutes rcu_irq_enter() for rcu_idle_exit() and rcu_irq_exit() for rcu_idle_enter() in order to make the _rcuidle() tracepoints usable from irq handlers as well as from process context. Reported-by: Dave Jones Signed-off-by: Paul E. McKenney Cc: Steven Rostedt --- include/linux/tracepoint.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 2f322c38bd4d..f8e084d0fc77 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -145,8 +145,8 @@ static inline void tracepoint_synchronize_unregister(void) TP_PROTO(data_proto), \ TP_ARGS(data_args), \ TP_CONDITION(cond), \ - rcu_idle_exit(), \ - rcu_idle_enter()); \ + rcu_irq_enter(), \ + rcu_irq_exit()); \ } #else #define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args) -- cgit v1.2.3-71-gd317 From ed13998c319b050fc9abdb73915859dfdbe1fb38 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 5 Jun 2013 15:30:55 +0200 Subject: sock_diag: fix filter code sent to userspace Filters need to be translated to real BPF code for userland, like SO_GETFILTER. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/filter.h | 1 + net/core/filter.c | 2 +- net/core/sock_diag.c | 9 +++++++-- 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index c050dcc322a4..f65f5a69db8f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -46,6 +46,7 @@ extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); extern int sk_detach_filter(struct sock *sk); extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen); extern int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned len); +extern void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to); #ifdef CONFIG_BPF_JIT #include diff --git a/net/core/filter.c b/net/core/filter.c index dad2a178f9f8..6438f29ff266 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -778,7 +778,7 @@ int sk_detach_filter(struct sock *sk) } EXPORT_SYMBOL_GPL(sk_detach_filter); -static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) +void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) { static const u16 decodes[] = { [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K, diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index d5bef0b0f639..a0e9cf6379de 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -73,8 +73,13 @@ int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, goto out; } - if (filter) - memcpy(nla_data(attr), filter->insns, len); + if (filter) { + struct sock_filter *fb = (struct sock_filter *)nla_data(attr); + int i; + + for (i = 0; i < filter->len; i++, fb++) + sk_decode_filter(&filter->insns[i], fb); + } out: rcu_read_unlock(); -- cgit v1.2.3-71-gd317 From 2a8fedd0c142d4328ab4667847e05afe17c3295c Mon Sep 17 00:00:00 2001 From: David Daney Date: Mon, 10 Jun 2013 12:33:47 -0700 Subject: kvm: Add definition of KVM_REG_MIPS We use 0x7000000000000000ULL as 0x6000000000000000ULL is reserved for ARM64. Signed-off-by: David Daney Signed-off-by: Gleb Natapov --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index a5c86fc34a37..d88c8ee00c8b 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -783,6 +783,7 @@ struct kvm_dirty_tlb { #define KVM_REG_IA64 0x3000000000000000ULL #define KVM_REG_ARM 0x4000000000000000ULL #define KVM_REG_S390 0x5000000000000000ULL +#define KVM_REG_MIPS 0x7000000000000000ULL #define KVM_REG_SIZE_SHIFT 52 #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL -- cgit v1.2.3-71-gd317 From b79462a8b9f9a452edc20c64a70a89ba3b0a6a88 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 8 Jun 2013 15:00:55 +0200 Subject: team: fix checks in team_get_first_port_txable_rcu() should be checked if "cur" is txable, not "port". Introduced by commit 6e88e1357c "team: use function team_port_txable() for determing enabled and up port" Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/if_team.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 4474557904f6..16fae6436d0e 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -249,12 +249,12 @@ team_get_first_port_txable_rcu(struct team *team, struct team_port *port) return port; cur = port; list_for_each_entry_continue_rcu(cur, &team->port_list, list) - if (team_port_txable(port)) + if (team_port_txable(cur)) return cur; list_for_each_entry_rcu(cur, &team->port_list, list) { if (cur == port) break; - if (team_port_txable(port)) + if (team_port_txable(cur)) return cur; } return NULL; -- cgit v1.2.3-71-gd317 From 96570ffcca0b872dc8626e97569d2697f374d868 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 29 May 2013 09:51:29 +0300 Subject: Bluetooth: Fix mgmt handling of power on failures If hci_dev_open fails we need to ensure that the corresponding mgmt_set_powered command gets an appropriate response. This patch fixes the missing response by adding a new mgmt_set_powered_failed function that's used to indicate a power on failure to mgmt. Since a situation with the device being rfkilled may require special handling in user space the patch uses a new dedicated mgmt status code for this. Signed-off-by: Johan Hedberg Cc: stable@vger.kernel.org Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan Signed-off-by: John W. Linville --- include/net/bluetooth/hci_core.h | 1 + include/net/bluetooth/mgmt.h | 1 + net/bluetooth/hci_core.c | 6 +++++- net/bluetooth/mgmt.c | 21 +++++++++++++++++++++ 4 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 35a57cd1704c..7cb6d360d147 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1117,6 +1117,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event); int mgmt_control(struct sock *sk, struct msghdr *msg, size_t len); int mgmt_index_added(struct hci_dev *hdev); int mgmt_index_removed(struct hci_dev *hdev); +int mgmt_set_powered_failed(struct hci_dev *hdev, int err); int mgmt_powered(struct hci_dev *hdev, u8 powered); int mgmt_discoverable(struct hci_dev *hdev, u8 discoverable); int mgmt_connectable(struct hci_dev *hdev, u8 connectable); diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 22980a7c3873..9944c3e68c5d 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -42,6 +42,7 @@ #define MGMT_STATUS_NOT_POWERED 0x0f #define MGMT_STATUS_CANCELLED 0x10 #define MGMT_STATUS_INVALID_INDEX 0x11 +#define MGMT_STATUS_RFKILLED 0x12 struct mgmt_hdr { __le16 opcode; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 33843c5c4939..d817c932d634 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1555,11 +1555,15 @@ static const struct rfkill_ops hci_rfkill_ops = { static void hci_power_on(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, power_on); + int err; BT_DBG("%s", hdev->name); - if (hci_dev_open(hdev->id) < 0) + err = hci_dev_open(hdev->id); + if (err < 0) { + mgmt_set_powered_failed(hdev, err); return; + } if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) queue_delayed_work(hdev->req_workqueue, &hdev->power_off, diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 5e93b24d01fd..f8ecbc70293d 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3418,6 +3418,27 @@ new_settings: return err; } +int mgmt_set_powered_failed(struct hci_dev *hdev, int err) +{ + struct pending_cmd *cmd; + u8 status; + + cmd = mgmt_pending_find(MGMT_OP_SET_POWERED, hdev); + if (!cmd) + return -ENOENT; + + if (err == -ERFKILL) + status = MGMT_STATUS_RFKILLED; + else + status = MGMT_STATUS_FAILED; + + err = cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_POWERED, status); + + mgmt_pending_remove(cmd); + + return err; +} + int mgmt_discoverable(struct hci_dev *hdev, u8 discoverable) { struct cmd_lookup match = { NULL, hdev }; -- cgit v1.2.3-71-gd317 From 16e53dbf10a2d7e228709a7286310e629ede5e45 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Wed, 12 Jun 2013 14:04:36 -0700 Subject: CPU hotplug: provide a generic helper to disable/enable CPU hotplug There are instances in the kernel where we would like to disable CPU hotplug (from sysfs) during some important operation. Today the freezer code depends on this and the code to do it was kinda tailor-made for that. Restructure the code and make it generic enough to be useful for other usecases too. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Robin Holt Cc: H. Peter Anvin Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Russ Anderson Cc: Robin Holt Cc: Russell King Cc: Guan Xuetao Cc: Shawn Guo Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpu.h | 4 ++++ kernel/cpu.c | 55 ++++++++++++++++++++++------------------------------- 2 files changed, 27 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index c6f6e0839b61..9f3c7e81270a 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -175,6 +175,8 @@ extern struct bus_type cpu_subsys; extern void get_online_cpus(void); extern void put_online_cpus(void); +extern void cpu_hotplug_disable(void); +extern void cpu_hotplug_enable(void); #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri) #define register_hotcpu_notifier(nb) register_cpu_notifier(nb) #define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb) @@ -198,6 +200,8 @@ static inline void cpu_hotplug_driver_unlock(void) #define get_online_cpus() do { } while (0) #define put_online_cpus() do { } while (0) +#define cpu_hotplug_disable() do { } while (0) +#define cpu_hotplug_enable() do { } while (0) #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) /* These aren't inline functions due to a GCC bug. */ #define register_hotcpu_notifier(nb) ({ (void)(nb); 0; }) diff --git a/kernel/cpu.c b/kernel/cpu.c index b5e4ab2d427e..198a38883e64 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -133,6 +133,27 @@ static void cpu_hotplug_done(void) mutex_unlock(&cpu_hotplug.lock); } +/* + * Wait for currently running CPU hotplug operations to complete (if any) and + * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects + * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the + * hotplug path before performing hotplug operations. So acquiring that lock + * guarantees mutual exclusion from any currently running hotplug operations. + */ +void cpu_hotplug_disable(void) +{ + cpu_maps_update_begin(); + cpu_hotplug_disabled = 1; + cpu_maps_update_done(); +} + +void cpu_hotplug_enable(void) +{ + cpu_maps_update_begin(); + cpu_hotplug_disabled = 0; + cpu_maps_update_done(); +} + #else /* #if CONFIG_HOTPLUG_CPU */ static void cpu_hotplug_begin(void) {} static void cpu_hotplug_done(void) {} @@ -540,36 +561,6 @@ static int __init alloc_frozen_cpus(void) } core_initcall(alloc_frozen_cpus); -/* - * Prevent regular CPU hotplug from racing with the freezer, by disabling CPU - * hotplug when tasks are about to be frozen. Also, don't allow the freezer - * to continue until any currently running CPU hotplug operation gets - * completed. - * To modify the 'cpu_hotplug_disabled' flag, we need to acquire the - * 'cpu_add_remove_lock'. And this same lock is also taken by the regular - * CPU hotplug path and released only after it is complete. Thus, we - * (and hence the freezer) will block here until any currently running CPU - * hotplug operation gets completed. - */ -void cpu_hotplug_disable_before_freeze(void) -{ - cpu_maps_update_begin(); - cpu_hotplug_disabled = 1; - cpu_maps_update_done(); -} - - -/* - * When tasks have been thawed, re-enable regular CPU hotplug (which had been - * disabled while beginning to freeze tasks). - */ -void cpu_hotplug_enable_after_thaw(void) -{ - cpu_maps_update_begin(); - cpu_hotplug_disabled = 0; - cpu_maps_update_done(); -} - /* * When callbacks for CPU hotplug notifications are being executed, we must * ensure that the state of the system with respect to the tasks being frozen @@ -589,12 +580,12 @@ cpu_hotplug_pm_callback(struct notifier_block *nb, case PM_SUSPEND_PREPARE: case PM_HIBERNATION_PREPARE: - cpu_hotplug_disable_before_freeze(); + cpu_hotplug_disable(); break; case PM_POST_SUSPEND: case PM_POST_HIBERNATION: - cpu_hotplug_enable_after_thaw(); + cpu_hotplug_enable(); break; default: -- cgit v1.2.3-71-gd317 From 637241a900cbd982f744d44646b48a273d609b34 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 12 Jun 2013 14:04:39 -0700 Subject: kmsg: honor dmesg_restrict sysctl on /dev/kmsg The dmesg_restrict sysctl currently covers the syslog method for access dmesg, however /dev/kmsg isn't covered by the same protections. Most people haven't noticed because util-linux dmesg(1) defaults to using the syslog method for access in older versions. With util-linux dmesg(1) defaults to reading directly from /dev/kmsg. To fix /dev/kmsg, let's compare the existing interfaces and what they allow: - /proc/kmsg allows: - open (SYSLOG_ACTION_OPEN) if CAP_SYSLOG since it uses a destructive single-reader interface (SYSLOG_ACTION_READ). - everything, after an open. - syslog syscall allows: - anything, if CAP_SYSLOG. - SYSLOG_ACTION_READ_ALL and SYSLOG_ACTION_SIZE_BUFFER, if dmesg_restrict==0. - nothing else (EPERM). The use-cases were: - dmesg(1) needs to do non-destructive SYSLOG_ACTION_READ_ALLs. - sysklog(1) needs to open /proc/kmsg, drop privs, and still issue the destructive SYSLOG_ACTION_READs. AIUI, dmesg(1) is moving to /dev/kmsg, and systemd-journald doesn't clear the ring buffer. Based on the comments in devkmsg_llseek, it sounds like actions besides reading aren't going to be supported by /dev/kmsg (i.e. SYSLOG_ACTION_CLEAR), so we have a strict subset of the non-destructive syslog syscall actions. To this end, move the check as Josh had done, but also rename the constants to reflect their new uses (SYSLOG_FROM_CALL becomes SYSLOG_FROM_READER, and SYSLOG_FROM_FILE becomes SYSLOG_FROM_PROC). SYSLOG_FROM_READER allows non-destructive actions, and SYSLOG_FROM_PROC allows destructive actions after a capabilities-constrained SYSLOG_ACTION_OPEN check. - /dev/kmsg allows: - open if CAP_SYSLOG or dmesg_restrict==0 - reading/polling, after open Addresses https://bugzilla.redhat.com/show_bug.cgi?id=903192 [akpm@linux-foundation.org: use pr_warn_once()] Signed-off-by: Kees Cook Reported-by: Christian Kujau Tested-by: Josh Boyer Cc: Kay Sievers Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/kmsg.c | 10 +++--- include/linux/syslog.h | 4 +-- kernel/printk.c | 91 +++++++++++++++++++++++++++----------------------- 3 files changed, 57 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c index bd4b5a740ff1..bdfabdaefdce 100644 --- a/fs/proc/kmsg.c +++ b/fs/proc/kmsg.c @@ -21,12 +21,12 @@ extern wait_queue_head_t log_wait; static int kmsg_open(struct inode * inode, struct file * file) { - return do_syslog(SYSLOG_ACTION_OPEN, NULL, 0, SYSLOG_FROM_FILE); + return do_syslog(SYSLOG_ACTION_OPEN, NULL, 0, SYSLOG_FROM_PROC); } static int kmsg_release(struct inode * inode, struct file * file) { - (void) do_syslog(SYSLOG_ACTION_CLOSE, NULL, 0, SYSLOG_FROM_FILE); + (void) do_syslog(SYSLOG_ACTION_CLOSE, NULL, 0, SYSLOG_FROM_PROC); return 0; } @@ -34,15 +34,15 @@ static ssize_t kmsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { if ((file->f_flags & O_NONBLOCK) && - !do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_FILE)) + !do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_PROC)) return -EAGAIN; - return do_syslog(SYSLOG_ACTION_READ, buf, count, SYSLOG_FROM_FILE); + return do_syslog(SYSLOG_ACTION_READ, buf, count, SYSLOG_FROM_PROC); } static unsigned int kmsg_poll(struct file *file, poll_table *wait) { poll_wait(file, &log_wait, wait); - if (do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_FILE)) + if (do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_PROC)) return POLLIN | POLLRDNORM; return 0; } diff --git a/include/linux/syslog.h b/include/linux/syslog.h index 38911391a139..98a3153c0f96 100644 --- a/include/linux/syslog.h +++ b/include/linux/syslog.h @@ -44,8 +44,8 @@ /* Return size of the log buffer */ #define SYSLOG_ACTION_SIZE_BUFFER 10 -#define SYSLOG_FROM_CALL 0 -#define SYSLOG_FROM_FILE 1 +#define SYSLOG_FROM_READER 0 +#define SYSLOG_FROM_PROC 1 int do_syslog(int type, char __user *buf, int count, bool from_file); diff --git a/kernel/printk.c b/kernel/printk.c index fa36e1494420..8212c1aef125 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -363,6 +363,53 @@ static void log_store(int facility, int level, log_next_seq++; } +#ifdef CONFIG_SECURITY_DMESG_RESTRICT +int dmesg_restrict = 1; +#else +int dmesg_restrict; +#endif + +static int syslog_action_restricted(int type) +{ + if (dmesg_restrict) + return 1; + /* + * Unless restricted, we allow "read all" and "get buffer size" + * for everybody. + */ + return type != SYSLOG_ACTION_READ_ALL && + type != SYSLOG_ACTION_SIZE_BUFFER; +} + +static int check_syslog_permissions(int type, bool from_file) +{ + /* + * If this is from /proc/kmsg and we've already opened it, then we've + * already done the capabilities checks at open time. + */ + if (from_file && type != SYSLOG_ACTION_OPEN) + return 0; + + if (syslog_action_restricted(type)) { + if (capable(CAP_SYSLOG)) + return 0; + /* + * For historical reasons, accept CAP_SYS_ADMIN too, with + * a warning. + */ + if (capable(CAP_SYS_ADMIN)) { + pr_warn_once("%s (%d): Attempt to access syslog with " + "CAP_SYS_ADMIN but no CAP_SYSLOG " + "(deprecated).\n", + current->comm, task_pid_nr(current)); + return 0; + } + return -EPERM; + } + return security_syslog(type); +} + + /* /dev/kmsg - userspace message inject/listen interface */ struct devkmsg_user { u64 seq; @@ -620,7 +667,8 @@ static int devkmsg_open(struct inode *inode, struct file *file) if ((file->f_flags & O_ACCMODE) == O_WRONLY) return 0; - err = security_syslog(SYSLOG_ACTION_READ_ALL); + err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL, + SYSLOG_FROM_READER); if (err) return err; @@ -813,45 +861,6 @@ static inline void boot_delay_msec(int level) } #endif -#ifdef CONFIG_SECURITY_DMESG_RESTRICT -int dmesg_restrict = 1; -#else -int dmesg_restrict; -#endif - -static int syslog_action_restricted(int type) -{ - if (dmesg_restrict) - return 1; - /* Unless restricted, we allow "read all" and "get buffer size" for everybody */ - return type != SYSLOG_ACTION_READ_ALL && type != SYSLOG_ACTION_SIZE_BUFFER; -} - -static int check_syslog_permissions(int type, bool from_file) -{ - /* - * If this is from /proc/kmsg and we've already opened it, then we've - * already done the capabilities checks at open time. - */ - if (from_file && type != SYSLOG_ACTION_OPEN) - return 0; - - if (syslog_action_restricted(type)) { - if (capable(CAP_SYSLOG)) - return 0; - /* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */ - if (capable(CAP_SYS_ADMIN)) { - printk_once(KERN_WARNING "%s (%d): " - "Attempt to access syslog with CAP_SYS_ADMIN " - "but no CAP_SYSLOG (deprecated).\n", - current->comm, task_pid_nr(current)); - return 0; - } - return -EPERM; - } - return 0; -} - #if defined(CONFIG_PRINTK_TIME) static bool printk_time = 1; #else @@ -1249,7 +1258,7 @@ out: SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) { - return do_syslog(type, buf, len, SYSLOG_FROM_CALL); + return do_syslog(type, buf, len, SYSLOG_FROM_READER); } /* -- cgit v1.2.3-71-gd317 From 30dad30922ccc733cfdbfe232090cf674dc374dc Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 12 Jun 2013 14:05:04 -0700 Subject: mm: migration: add migrate_entry_wait_huge() When we have a page fault for the address which is backed by a hugepage under migration, the kernel can't wait correctly and do busy looping on hugepage fault until the migration finishes. As a result, users who try to kick hugepage migration (via soft offlining, for example) occasionally experience long delay or soft lockup. This is because pte_offset_map_lock() can't get a correct migration entry or a correct page table lock for hugepage. This patch introduces migration_entry_wait_huge() to solve this. Signed-off-by: Naoya Horiguchi Reviewed-by: Rik van Riel Reviewed-by: Wanpeng Li Reviewed-by: Michal Hocko Cc: Mel Gorman Cc: Andi Kleen Cc: KOSAKI Motohiro Cc: [2.6.35+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swapops.h | 3 +++ mm/hugetlb.c | 2 +- mm/migrate.c | 23 ++++++++++++++++++----- 3 files changed, 22 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 47ead515c811..c5fd30d2a415 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -137,6 +137,7 @@ static inline void make_migration_entry_read(swp_entry_t *entry) extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address); +extern void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte); #else #define make_migration_entry(page, write) swp_entry(0, 0) @@ -148,6 +149,8 @@ static inline int is_migration_entry(swp_entry_t swp) static inline void make_migration_entry_read(swp_entry_t *entryp) { } static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { } +static inline void migration_entry_wait_huge(struct mm_struct *mm, + pte_t *pte) { } static inline int is_write_migration_entry(swp_entry_t entry) { return 0; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f8feeeca6686..e2bfbf73a551 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2839,7 +2839,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (ptep) { entry = huge_ptep_get(ptep); if (unlikely(is_hugetlb_entry_migration(entry))) { - migration_entry_wait(mm, (pmd_t *)ptep, address); + migration_entry_wait_huge(mm, ptep); return 0; } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) return VM_FAULT_HWPOISON_LARGE | diff --git a/mm/migrate.c b/mm/migrate.c index b1f57501de9c..6f0c24438bba 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -200,15 +200,14 @@ static void remove_migration_ptes(struct page *old, struct page *new) * get to the page and wait until migration is finished. * When we return from this function the fault will be retried. */ -void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, - unsigned long address) +static void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, + spinlock_t *ptl) { - pte_t *ptep, pte; - spinlock_t *ptl; + pte_t pte; swp_entry_t entry; struct page *page; - ptep = pte_offset_map_lock(mm, pmd, address, &ptl); + spin_lock(ptl); pte = *ptep; if (!is_swap_pte(pte)) goto out; @@ -236,6 +235,20 @@ out: pte_unmap_unlock(ptep, ptl); } +void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, + unsigned long address) +{ + spinlock_t *ptl = pte_lockptr(mm, pmd); + pte_t *ptep = pte_offset_map(pmd, address); + __migration_entry_wait(mm, ptep, ptl); +} + +void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte) +{ + spinlock_t *ptl = &(mm)->page_table_lock; + __migration_entry_wait(mm, pte, ptl); +} + #ifdef CONFIG_BLOCK /* Returns true if all buffers are successfully locked */ static bool buffer_migrate_lock_buffers(struct buffer_head *head, -- cgit v1.2.3-71-gd317 From c2853c8df57f49620d26f317d7d43347c29bfc2e Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Wed, 12 Jun 2013 14:05:10 -0700 Subject: include/linux/math64.h: add div64_ul() There is div64_long() to handle the s64/long division, but no mocro do u64/ul division. It is necessary in some scenarios, so add this function. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Alex Shi Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/math64.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/math64.h b/include/linux/math64.h index b8ba85544721..2913b86eb12a 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -6,7 +6,8 @@ #if BITS_PER_LONG == 64 -#define div64_long(x,y) div64_s64((x),(y)) +#define div64_long(x, y) div64_s64((x), (y)) +#define div64_ul(x, y) div64_u64((x), (y)) /** * div_u64_rem - unsigned 64bit divide with 32bit divisor with remainder @@ -47,7 +48,8 @@ static inline s64 div64_s64(s64 dividend, s64 divisor) #elif BITS_PER_LONG == 32 -#define div64_long(x,y) div_s64((x),(y)) +#define div64_long(x, y) div_s64((x), (y)) +#define div64_ul(x, y) div_u64((x), (y)) #ifndef div_u64_rem static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) -- cgit v1.2.3-71-gd317 From d3b6f6141831b6e2d414edea6cc7af5b9bc6fac2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Jun 2013 13:26:05 -0700 Subject: ip_tunnel: remove __net_init/exit from exported functions If CONFIG_NET_NS is not set then __net_init is the same as __init and __net_exit is the same as __exit. These functions will be removed from memory after the module loads or is removed. Functions that are exported for use by other functions should never be labeled for removal. Bug introduced by commit c54419321455631079c ("GRE: Refactor GRE tunneling code.") Reported-by: Steinar H. Gunderson Signed-off-by: Steven Rostedt Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 6 +++--- net/ipv4/ip_tunnel.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 4b6f0b28f41f..09b1360e10bf 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -95,10 +95,10 @@ struct ip_tunnel_net { int ip_tunnel_init(struct net_device *dev); void ip_tunnel_uninit(struct net_device *dev); void ip_tunnel_dellink(struct net_device *dev, struct list_head *head); -int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, - struct rtnl_link_ops *ops, char *devname); +int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, + struct rtnl_link_ops *ops, char *devname); -void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn); +void ip_tunnel_delete_net(struct ip_tunnel_net *itn); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index be2f8da0ae8e..7fa8f08fa7ae 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -853,7 +853,7 @@ void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) } EXPORT_SYMBOL_GPL(ip_tunnel_dellink); -int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, +int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname) { struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); @@ -899,7 +899,7 @@ static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head) unregister_netdevice_queue(itn->fb_tunnel_dev, head); } -void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn) +void ip_tunnel_delete_net(struct ip_tunnel_net *itn) { LIST_HEAD(list); -- cgit v1.2.3-71-gd317 From f21afc25f9ed45b8ffe200d0f071b0caec3ed2ef Mon Sep 17 00:00:00 2001 From: David Daney Date: Fri, 14 Jun 2013 11:13:59 -0700 Subject: smp.h: Use local_irq_{save,restore}() in !SMP version of on_each_cpu(). Thanks to commit f91eb62f71b3 ("init: scream bloody murder if interrupts are enabled too early"), "bloody murder" is now being screamed. With a MIPS OCTEON config, we use on_each_cpu() in our irq_chip.irq_bus_sync_unlock() function. This gets called in early as a result of the time_init() call. Because the !SMP version of on_each_cpu() unconditionally enables irqs, we get: WARNING: at init/main.c:560 start_kernel+0x250/0x410() Interrupts were enabled early CPU: 0 PID: 0 Comm: swapper Not tainted 3.10.0-rc5-Cavium-Octeon+ #801 Call Trace: show_stack+0x68/0x80 warn_slowpath_common+0x78/0xb0 warn_slowpath_fmt+0x38/0x48 start_kernel+0x250/0x410 Suggested fix: Do what we already do in the SMP version of on_each_cpu(), and use local_irq_save/local_irq_restore. Because we need a flags variable, make it a static inline to avoid name space issues. [ Change from v1: Convert on_each_cpu to a static inline function, add #include to avoid build breakage on some files. on_each_cpu_mask() and on_each_cpu_cond() suffer the same problem as on_each_cpu(), but they are not causing !SMP bugs for me, so I will defer changing them to a less urgent patch. ] Signed-off-by: David Daney Cc: Ralf Baechle Cc: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/smp.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/smp.h b/include/linux/smp.h index e6564c1dc552..c8488763277f 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -11,6 +11,7 @@ #include #include #include +#include extern void cpu_idle(void); @@ -139,13 +140,17 @@ static inline int up_smp_call_function(smp_call_func_t func, void *info) } #define smp_call_function(func, info, wait) \ (up_smp_call_function(func, info)) -#define on_each_cpu(func,info,wait) \ - ({ \ - local_irq_disable(); \ - func(info); \ - local_irq_enable(); \ - 0; \ - }) + +static inline int on_each_cpu(smp_call_func_t func, void *info, int wait) +{ + unsigned long flags; + + local_irq_save(flags); + func(info); + local_irq_restore(flags); + return 0; +} + /* * Note we still need to test the mask even for UP * because we actually can get an empty mask from -- cgit v1.2.3-71-gd317