From 546125d1614264d26080817d0c8cddb9b25081fa Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Thu, 5 Jan 2017 16:34:51 -0500 Subject: sunrpc: don't call sleeping functions from the notifier block callbacks The inet6addr_chain is an atomic notifier chain, so we can't call anything that might sleep (like lock_sock)... instead of closing the socket from svc_age_temp_xprts_now (which is called by the notifier function), just have the rpc service threads do it instead. Cc: stable@vger.kernel.org Fixes: c3d4879e01be "sunrpc: Add a function to close..." Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index e5d193440374..7440290f64ac 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -66,6 +66,7 @@ struct svc_xprt { #define XPT_LISTENER 10 /* listening endpoint */ #define XPT_CACHE_AUTH 11 /* cache auth info */ #define XPT_LOCAL 12 /* connection from loopback interface */ +#define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */ struct svc_serv *xpt_server; /* service for transport */ atomic_t xpt_reserved; /* space on outq that is rsvd */ -- cgit v1.2.3-71-gd317 From 003c941057eaa868ca6fedd29a274c863167230d Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 12 Jan 2017 14:24:58 -0800 Subject: tcp: fix tcp_fastopen unaligned access complaints on sparc Fix up a data alignment issue on sparc by swapping the order of the cookie byte array field with the length field in struct tcp_fastopen_cookie, and making it a proper union to clean up the typecasting. This addresses log complaints like these: log_unaligned: 113 callbacks suppressed Kernel unaligned access at TPC[976490] tcp_try_fastopen+0x2d0/0x360 Kernel unaligned access at TPC[9764ac] tcp_try_fastopen+0x2ec/0x360 Kernel unaligned access at TPC[9764c8] tcp_try_fastopen+0x308/0x360 Kernel unaligned access at TPC[9764e4] tcp_try_fastopen+0x324/0x360 Kernel unaligned access at TPC[976490] tcp_try_fastopen+0x2d0/0x360 Cc: Eric Dumazet Signed-off-by: Shannon Nelson Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 7 ++++++- net/ipv4/tcp_fastopen.c | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index fc5848dad7a4..c93f4b3a59cb 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -62,8 +62,13 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb) /* TCP Fast Open Cookie as stored in memory */ struct tcp_fastopen_cookie { + union { + u8 val[TCP_FASTOPEN_COOKIE_MAX]; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr addr; +#endif + }; s8 len; - u8 val[TCP_FASTOPEN_COOKIE_MAX]; bool exp; /* In RFC6994 experimental option format */ }; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 4e777a3243f9..f51919535ca7 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -113,7 +113,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req, struct tcp_fastopen_cookie tmp; if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) { - struct in6_addr *buf = (struct in6_addr *) tmp.val; + struct in6_addr *buf = &tmp.addr; int i; for (i = 0; i < 4; i++) -- cgit v1.2.3-71-gd317 From 52d7e48b86fc108e45a656d8e53e4237993c481d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 10 Jan 2017 02:28:26 -0800 Subject: rcu: Narrow early boot window of illegal synchronous grace periods The current preemptible RCU implementation goes through three phases during bootup. In the first phase, there is only one CPU that is running with preemption disabled, so that a no-op is a synchronous grace period. In the second mid-boot phase, the scheduler is running, but RCU has not yet gotten its kthreads spawned (and, for expedited grace periods, workqueues are not yet running. During this time, any attempt to do a synchronous grace period will hang the system (or complain bitterly, depending). In the third and final phase, RCU is fully operational and everything works normally. This has been OK for some time, but there has recently been some synchronous grace periods showing up during the second mid-boot phase. This code worked "by accident" for awhile, but started failing as soon as expedited RCU grace periods switched over to workqueues in commit 8b355e3bc140 ("rcu: Drive expedited grace periods from workqueue"). Note that the code was buggy even before this commit, as it was subject to failure on real-time systems that forced all expedited grace periods to run as normal grace periods (for example, using the rcu_normal ksysfs parameter). The callchain from the failure case is as follows: early_amd_iommu_init() |-> acpi_put_table(ivrs_base); |-> acpi_tb_put_table(table_desc); |-> acpi_tb_invalidate_table(table_desc); |-> acpi_tb_release_table(...) |-> acpi_os_unmap_memory |-> acpi_os_unmap_iomem |-> acpi_os_map_cleanup |-> synchronize_rcu_expedited The kernel showing this callchain was built with CONFIG_PREEMPT_RCU=y, which caused the code to try using workqueues before they were initialized, which did not go well. This commit therefore reworks RCU to permit synchronous grace periods to proceed during this mid-boot phase. This commit is therefore a fix to a regression introduced in v4.9, and is therefore being put forward post-merge-window in v4.10. This commit sets a flag from the existing rcu_scheduler_starting() function which causes all synchronous grace periods to take the expedited path. The expedited path now checks this flag, using the requesting task to drive the expedited grace period forward during the mid-boot phase. Finally, this flag is updated by a core_initcall() function named rcu_exp_runtime_mode(), which causes the runtime codepaths to be used. Note that this arrangement assumes that tasks are not sent POSIX signals (or anything similar) from the time that the first task is spawned through core_initcall() time. Fixes: 8b355e3bc140 ("rcu: Drive expedited grace periods from workqueue") Reported-by: "Zheng, Lv" Reported-by: Borislav Petkov Signed-off-by: Paul E. McKenney Tested-by: Stan Kain Tested-by: Ivan Tested-by: Emanuel Castelo Tested-by: Bruno Pesavento Tested-by: Borislav Petkov Tested-by: Frederic Bezies Cc: # 4.9.0- --- include/linux/rcupdate.h | 4 ++++ kernel/rcu/rcu.h | 1 + kernel/rcu/tiny_plugin.h | 9 +++++++-- kernel/rcu/tree.c | 33 ++++++++++++++++++------------ kernel/rcu/tree_exp.h | 52 ++++++++++++++++++++++++++++++++++++++---------- kernel/rcu/tree_plugin.h | 2 +- kernel/rcu/update.c | 38 +++++++++++++++++++++++++++-------- 7 files changed, 104 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 321f9ed552a9..01f71e1d2e94 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -444,6 +444,10 @@ bool __rcu_is_watching(void); #error "Unknown RCU implementation specified to kernel configuration" #endif +#define RCU_SCHEDULER_INACTIVE 0 +#define RCU_SCHEDULER_INIT 1 +#define RCU_SCHEDULER_RUNNING 2 + /* * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic * initialization and destruction of rcu_head on the stack. rcu_head structures diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 80adef7d4c3d..0d6ff3e471be 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -136,6 +136,7 @@ int rcu_jiffies_till_stall_check(void); #define TPS(x) tracepoint_string(x) void rcu_early_boot_tests(void); +void rcu_test_sync_prims(void); /* * This function really isn't for public consumption, but RCU is special in diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h index 196f0302e2f4..c64b827ecbca 100644 --- a/kernel/rcu/tiny_plugin.h +++ b/kernel/rcu/tiny_plugin.h @@ -60,12 +60,17 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active); /* * During boot, we forgive RCU lockdep issues. After this function is - * invoked, we start taking RCU lockdep issues seriously. + * invoked, we start taking RCU lockdep issues seriously. Note that unlike + * Tree RCU, Tiny RCU transitions directly from RCU_SCHEDULER_INACTIVE + * to RCU_SCHEDULER_RUNNING, skipping the RCU_SCHEDULER_INIT stage. + * The reason for this is that Tiny RCU does not need kthreads, so does + * not have to care about the fact that the scheduler is half-initialized + * at a certain phase of the boot process. */ void __init rcu_scheduler_starting(void) { WARN_ON(nr_context_switches() > 0); - rcu_scheduler_active = 1; + rcu_scheduler_active = RCU_SCHEDULER_RUNNING; } #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 96c52e43f7ca..cb4e2056ccf3 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -127,13 +127,16 @@ int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ int sysctl_panic_on_rcu_stall __read_mostly; /* - * The rcu_scheduler_active variable transitions from zero to one just - * before the first task is spawned. So when this variable is zero, RCU - * can assume that there is but one task, allowing RCU to (for example) + * The rcu_scheduler_active variable is initialized to the value + * RCU_SCHEDULER_INACTIVE and transitions RCU_SCHEDULER_INIT just before the + * first task is spawned. So when this variable is RCU_SCHEDULER_INACTIVE, + * RCU can assume that there is but one task, allowing RCU to (for example) * optimize synchronize_rcu() to a simple barrier(). When this variable - * is one, RCU must actually do all the hard work required to detect real - * grace periods. This variable is also used to suppress boot-time false - * positives from lockdep-RCU error checking. + * is RCU_SCHEDULER_INIT, RCU must actually do all the hard work required + * to detect real grace periods. This variable is also used to suppress + * boot-time false positives from lockdep-RCU error checking. Finally, it + * transitions from RCU_SCHEDULER_INIT to RCU_SCHEDULER_RUNNING after RCU + * is fully initialized, including all of its kthreads having been spawned. */ int rcu_scheduler_active __read_mostly; EXPORT_SYMBOL_GPL(rcu_scheduler_active); @@ -3980,18 +3983,22 @@ static int __init rcu_spawn_gp_kthread(void) early_initcall(rcu_spawn_gp_kthread); /* - * This function is invoked towards the end of the scheduler's initialization - * process. Before this is called, the idle task might contain - * RCU read-side critical sections (during which time, this idle - * task is booting the system). After this function is called, the - * idle tasks are prohibited from containing RCU read-side critical - * sections. This function also enables RCU lockdep checking. + * This function is invoked towards the end of the scheduler's + * initialization process. Before this is called, the idle task might + * contain synchronous grace-period primitives (during which time, this idle + * task is booting the system, and such primitives are no-ops). After this + * function is called, any synchronous grace-period primitives are run as + * expedited, with the requesting task driving the grace period forward. + * A later core_initcall() rcu_exp_runtime_mode() will switch to full + * runtime RCU functionality. */ void rcu_scheduler_starting(void) { WARN_ON(num_online_cpus() != 1); WARN_ON(nr_context_switches() > 0); - rcu_scheduler_active = 1; + rcu_test_sync_prims(); + rcu_scheduler_active = RCU_SCHEDULER_INIT; + rcu_test_sync_prims(); } /* diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index d3053e99fdb6..e59e1849b89a 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -531,6 +531,20 @@ struct rcu_exp_work { struct work_struct rew_work; }; +/* + * Common code to drive an expedited grace period forward, used by + * workqueues and mid-boot-time tasks. + */ +static void rcu_exp_sel_wait_wake(struct rcu_state *rsp, + smp_call_func_t func, unsigned long s) +{ + /* Initialize the rcu_node tree in preparation for the wait. */ + sync_rcu_exp_select_cpus(rsp, func); + + /* Wait and clean up, including waking everyone. */ + rcu_exp_wait_wake(rsp, s); +} + /* * Work-queue handler to drive an expedited grace period forward. */ @@ -538,12 +552,8 @@ static void wait_rcu_exp_gp(struct work_struct *wp) { struct rcu_exp_work *rewp; - /* Initialize the rcu_node tree in preparation for the wait. */ rewp = container_of(wp, struct rcu_exp_work, rew_work); - sync_rcu_exp_select_cpus(rewp->rew_rsp, rewp->rew_func); - - /* Wait and clean up, including waking everyone. */ - rcu_exp_wait_wake(rewp->rew_rsp, rewp->rew_s); + rcu_exp_sel_wait_wake(rewp->rew_rsp, rewp->rew_func, rewp->rew_s); } /* @@ -569,12 +579,18 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp, if (exp_funnel_lock(rsp, s)) return; /* Someone else did our work for us. */ - /* Marshall arguments and schedule the expedited grace period. */ - rew.rew_func = func; - rew.rew_rsp = rsp; - rew.rew_s = s; - INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp); - schedule_work(&rew.rew_work); + /* Ensure that load happens before action based on it. */ + if (unlikely(rcu_scheduler_active == RCU_SCHEDULER_INIT)) { + /* Direct call during scheduler init and early_initcalls(). */ + rcu_exp_sel_wait_wake(rsp, func, s); + } else { + /* Marshall arguments & schedule the expedited grace period. */ + rew.rew_func = func; + rew.rew_rsp = rsp; + rew.rew_s = s; + INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp); + schedule_work(&rew.rew_work); + } /* Wait for expedited grace period to complete. */ rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id()); @@ -676,6 +692,8 @@ void synchronize_rcu_expedited(void) { struct rcu_state *rsp = rcu_state_p; + if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) + return; _synchronize_rcu_expedited(rsp, sync_rcu_exp_handler); } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); @@ -693,3 +711,15 @@ void synchronize_rcu_expedited(void) EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ + +/* + * Switch to run-time mode once Tree RCU has fully initialized. + */ +static int __init rcu_exp_runtime_mode(void) +{ + rcu_test_sync_prims(); + rcu_scheduler_active = RCU_SCHEDULER_RUNNING; + rcu_test_sync_prims(); + return 0; +} +core_initcall(rcu_exp_runtime_mode); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 85c5a883c6e3..56583e764ebf 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -670,7 +670,7 @@ void synchronize_rcu(void) lock_is_held(&rcu_lock_map) || lock_is_held(&rcu_sched_lock_map), "Illegal synchronize_rcu() in RCU read-side critical section"); - if (!rcu_scheduler_active) + if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) return; if (rcu_gp_is_expedited()) synchronize_rcu_expedited(); diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index f19271dce0a9..4f6db7e6a117 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -121,11 +121,14 @@ EXPORT_SYMBOL(rcu_read_lock_sched_held); * Should expedited grace-period primitives always fall back to their * non-expedited counterparts? Intended for use within RCU. Note * that if the user specifies both rcu_expedited and rcu_normal, then - * rcu_normal wins. + * rcu_normal wins. (Except during the time period during boot from + * when the first task is spawned until the rcu_exp_runtime_mode() + * core_initcall() is invoked, at which point everything is expedited.) */ bool rcu_gp_is_normal(void) { - return READ_ONCE(rcu_normal); + return READ_ONCE(rcu_normal) && + rcu_scheduler_active != RCU_SCHEDULER_INIT; } EXPORT_SYMBOL_GPL(rcu_gp_is_normal); @@ -135,13 +138,14 @@ static atomic_t rcu_expedited_nesting = /* * Should normal grace-period primitives be expedited? Intended for * use within RCU. Note that this function takes the rcu_expedited - * sysfs/boot variable into account as well as the rcu_expedite_gp() - * nesting. So looping on rcu_unexpedite_gp() until rcu_gp_is_expedited() - * returns false is a -really- bad idea. + * sysfs/boot variable and rcu_scheduler_active into account as well + * as the rcu_expedite_gp() nesting. So looping on rcu_unexpedite_gp() + * until rcu_gp_is_expedited() returns false is a -really- bad idea. */ bool rcu_gp_is_expedited(void) { - return rcu_expedited || atomic_read(&rcu_expedited_nesting); + return rcu_expedited || atomic_read(&rcu_expedited_nesting) || + rcu_scheduler_active == RCU_SCHEDULER_INIT; } EXPORT_SYMBOL_GPL(rcu_gp_is_expedited); @@ -257,7 +261,7 @@ EXPORT_SYMBOL_GPL(rcu_callback_map); int notrace debug_lockdep_rcu_enabled(void) { - return rcu_scheduler_active && debug_locks && + return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks && current->lockdep_recursion == 0; } EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); @@ -591,7 +595,7 @@ EXPORT_SYMBOL_GPL(call_rcu_tasks); void synchronize_rcu_tasks(void) { /* Complain if the scheduler has not started. */ - RCU_LOCKDEP_WARN(!rcu_scheduler_active, + RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE, "synchronize_rcu_tasks called too soon"); /* Wait for the grace period. */ @@ -813,6 +817,23 @@ static void rcu_spawn_tasks_kthread(void) #endif /* #ifdef CONFIG_TASKS_RCU */ +/* + * Test each non-SRCU synchronous grace-period wait API. This is + * useful just after a change in mode for these primitives, and + * during early boot. + */ +void rcu_test_sync_prims(void) +{ + if (!IS_ENABLED(CONFIG_PROVE_RCU)) + return; + synchronize_rcu(); + synchronize_rcu_bh(); + synchronize_sched(); + synchronize_rcu_expedited(); + synchronize_rcu_bh_expedited(); + synchronize_sched_expedited(); +} + #ifdef CONFIG_PROVE_RCU /* @@ -865,6 +886,7 @@ void rcu_early_boot_tests(void) early_boot_test_call_rcu_bh(); if (rcu_self_test_sched) early_boot_test_call_rcu_sched(); + rcu_test_sync_prims(); } static int rcu_verify_early_boot_tests(void) -- cgit v1.2.3-71-gd317 From 4205e4786d0b9fc3b4fec7b1910cf645a0468307 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 10 Jan 2017 14:01:05 +0100 Subject: cpu/hotplug: Provide dynamic range for prepare stage Mathieu reported that the LTTNG modules are broken as of 4.10-rc1 due to the removal of the cpu hotplug notifiers. Usually I don't care much about out of tree modules, but LTTNG is widely used in distros. There are two ways to solve that: 1) Reserve a hotplug state for LTTNG 2) Add a dynamic range for the prepare states. While #1 is the simplest solution, #2 is the proper one as we can convert in tree users, which do not care about ordering, to the dynamic range as well. Add a dynamic range which allows LTTNG to request states in the prepare stage. Reported-and-tested-by: Mathieu Desnoyers Signed-off-by: Thomas Gleixner Reviewed-by: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Sebastian Sewior Cc: Steven Rostedt Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1701101353010.3401@nanos Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 2 ++ kernel/cpu.c | 22 ++++++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 20bfefbe7594..d936a0021839 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -74,6 +74,8 @@ enum cpuhp_state { CPUHP_ZCOMP_PREPARE, CPUHP_TIMERS_DEAD, CPUHP_MIPS_SOC_PREPARE, + CPUHP_BP_PREPARE_DYN, + CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20, CPUHP_BRINGUP_CPU, CPUHP_AP_IDLE_DEAD, CPUHP_AP_OFFLINE, diff --git a/kernel/cpu.c b/kernel/cpu.c index f75c4d031eeb..c47506357519 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1302,10 +1302,24 @@ static int cpuhp_cb_check(enum cpuhp_state state) */ static int cpuhp_reserve_state(enum cpuhp_state state) { - enum cpuhp_state i; + enum cpuhp_state i, end; + struct cpuhp_step *step; - for (i = CPUHP_AP_ONLINE_DYN; i <= CPUHP_AP_ONLINE_DYN_END; i++) { - if (!cpuhp_ap_states[i].name) + switch (state) { + case CPUHP_AP_ONLINE_DYN: + step = cpuhp_ap_states + CPUHP_AP_ONLINE_DYN; + end = CPUHP_AP_ONLINE_DYN_END; + break; + case CPUHP_BP_PREPARE_DYN: + step = cpuhp_bp_states + CPUHP_BP_PREPARE_DYN; + end = CPUHP_BP_PREPARE_DYN_END; + break; + default: + return -EINVAL; + } + + for (i = state; i <= end; i++, step++) { + if (!step->name) return i; } WARN(1, "No more dynamic states available for CPU hotplug\n"); @@ -1323,7 +1337,7 @@ static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name, mutex_lock(&cpuhp_state_mutex); - if (state == CPUHP_AP_ONLINE_DYN) { + if (state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN) { ret = cpuhp_reserve_state(state); if (ret < 0) goto out; -- cgit v1.2.3-71-gd317 From f1f7714ea51c56b7163fb1a5acf39c6a204dd758 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 13 Jan 2017 23:38:15 +0100 Subject: bpf: rework prog_digest into prog_tag Commit 7bd509e311f4 ("bpf: add prog_digest and expose it via fdinfo/netlink") was recently discussed, partially due to admittedly suboptimal name of "prog_digest" in combination with sha1 hash usage, thus inevitably and rightfully concerns about its security in terms of collision resistance were raised with regards to use-cases. The intended use cases are for debugging resp. introspection only for providing a stable "tag" over the instruction sequence that both kernel and user space can calculate independently. It's not usable at all for making a security relevant decision. So collisions where two different instruction sequences generate the same tag can happen, but ideally at a rather low rate. The "tag" will be dumped in hex and is short enough to introspect in tracepoints or kallsyms output along with other data such as stack trace, etc. Thus, this patch performs a rename into prog_tag and truncates the tag to a short output (64 bits) to make it obvious it's not collision-free. Should in future a hash or facility be needed with a security relevant focus, then we can think about requirements, constraints, etc that would fit to that situation. For now, rework the exposed parts for the current use cases as long as nothing has been released yet. Tested on x86_64 and s390x. Fixes: 7bd509e311f4 ("bpf: add prog_digest and expose it via fdinfo/netlink") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Andy Lutomirski Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 +- include/linux/filter.h | 6 ++++-- include/uapi/linux/pkt_cls.h | 2 +- include/uapi/linux/tc_act/tc_bpf.h | 2 +- kernel/bpf/core.c | 14 ++++++++------ kernel/bpf/syscall.c | 8 ++++---- kernel/bpf/verifier.c | 2 +- net/sched/act_bpf.c | 5 ++--- net/sched/cls_bpf.c | 4 ++-- 9 files changed, 24 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f74ae68086dc..05cf951df3fe 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -216,7 +216,7 @@ u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); -int bpf_prog_calc_digest(struct bpf_prog *fp); +int bpf_prog_calc_tag(struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); diff --git a/include/linux/filter.h b/include/linux/filter.h index a0934e6c9bab..e4eb2546339a 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -57,6 +57,8 @@ struct bpf_prog_aux; /* BPF program can access up to 512 bytes of stack space. */ #define MAX_BPF_STACK 512 +#define BPF_TAG_SIZE 8 + /* Helper macros for filter block array initializers. */ /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ @@ -408,7 +410,7 @@ struct bpf_prog { kmemcheck_bitfield_end(meta); enum bpf_prog_type type; /* Type of BPF program */ u32 len; /* Number of filter blocks */ - u32 digest[SHA_DIGEST_WORDS]; /* Program digest */ + u8 tag[BPF_TAG_SIZE]; struct bpf_prog_aux *aux; /* Auxiliary fields */ struct sock_fprog_kern *orig_prog; /* Original BPF program */ unsigned int (*bpf_func)(const void *ctx, @@ -519,7 +521,7 @@ static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog) return prog->len * sizeof(struct bpf_insn); } -static inline u32 bpf_prog_digest_scratch_size(const struct bpf_prog *prog) +static inline u32 bpf_prog_tag_scratch_size(const struct bpf_prog *prog) { return round_up(bpf_prog_insn_size(prog) + sizeof(__be64) + 1, SHA_MESSAGE_BYTES); diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index cb4bcdc58543..a4dcd88ec271 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -397,7 +397,7 @@ enum { TCA_BPF_NAME, TCA_BPF_FLAGS, TCA_BPF_FLAGS_GEN, - TCA_BPF_DIGEST, + TCA_BPF_TAG, __TCA_BPF_MAX, }; diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h index a6b88a6f7f71..975b50dc8d1d 100644 --- a/include/uapi/linux/tc_act/tc_bpf.h +++ b/include/uapi/linux/tc_act/tc_bpf.h @@ -27,7 +27,7 @@ enum { TCA_ACT_BPF_FD, TCA_ACT_BPF_NAME, TCA_ACT_BPF_PAD, - TCA_ACT_BPF_DIGEST, + TCA_ACT_BPF_TAG, __TCA_ACT_BPF_MAX, }; #define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 1eb4f1303756..503d4211988a 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -146,10 +146,11 @@ void __bpf_prog_free(struct bpf_prog *fp) vfree(fp); } -int bpf_prog_calc_digest(struct bpf_prog *fp) +int bpf_prog_calc_tag(struct bpf_prog *fp) { const u32 bits_offset = SHA_MESSAGE_BYTES - sizeof(__be64); - u32 raw_size = bpf_prog_digest_scratch_size(fp); + u32 raw_size = bpf_prog_tag_scratch_size(fp); + u32 digest[SHA_DIGEST_WORDS]; u32 ws[SHA_WORKSPACE_WORDS]; u32 i, bsize, psize, blocks; struct bpf_insn *dst; @@ -162,7 +163,7 @@ int bpf_prog_calc_digest(struct bpf_prog *fp) if (!raw) return -ENOMEM; - sha_init(fp->digest); + sha_init(digest); memset(ws, 0, sizeof(ws)); /* We need to take out the map fd for the digest calculation @@ -204,13 +205,14 @@ int bpf_prog_calc_digest(struct bpf_prog *fp) *bits = cpu_to_be64((psize - 1) << 3); while (blocks--) { - sha_transform(fp->digest, todo, ws); + sha_transform(digest, todo, ws); todo += SHA_MESSAGE_BYTES; } - result = (__force __be32 *)fp->digest; + result = (__force __be32 *)digest; for (i = 0; i < SHA_DIGEST_WORDS; i++) - result[i] = cpu_to_be32(fp->digest[i]); + result[i] = cpu_to_be32(digest[i]); + memcpy(fp->tag, result, sizeof(fp->tag)); vfree(raw); return 0; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e89acea22ecf..1d6b29e4e2c3 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -688,17 +688,17 @@ static int bpf_prog_release(struct inode *inode, struct file *filp) static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) { const struct bpf_prog *prog = filp->private_data; - char prog_digest[sizeof(prog->digest) * 2 + 1] = { }; + char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; - bin2hex(prog_digest, prog->digest, sizeof(prog->digest)); + bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); seq_printf(m, "prog_type:\t%u\n" "prog_jited:\t%u\n" - "prog_digest:\t%s\n" + "prog_tag:\t%s\n" "memlock:\t%llu\n", prog->type, prog->jited, - prog_digest, + prog_tag, prog->pages * 1ULL << PAGE_SHIFT); } #endif diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 83ed2f8f6f22..cdc43b899f28 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2936,7 +2936,7 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) int insn_cnt = env->prog->len; int i, j, err; - err = bpf_prog_calc_digest(env->prog); + err = bpf_prog_calc_tag(env->prog); if (err) return err; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 1c60317f0121..520baa41cba3 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -123,12 +123,11 @@ static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog, nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name)) return -EMSGSIZE; - nla = nla_reserve(skb, TCA_ACT_BPF_DIGEST, - sizeof(prog->filter->digest)); + nla = nla_reserve(skb, TCA_ACT_BPF_TAG, sizeof(prog->filter->tag)); if (nla == NULL) return -EMSGSIZE; - memcpy(nla_data(nla), prog->filter->digest, nla_len(nla)); + memcpy(nla_data(nla), prog->filter->tag, nla_len(nla)); return 0; } diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index adc776048d1a..d9c97018317d 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -555,11 +555,11 @@ static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog, nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name)) return -EMSGSIZE; - nla = nla_reserve(skb, TCA_BPF_DIGEST, sizeof(prog->filter->digest)); + nla = nla_reserve(skb, TCA_BPF_TAG, sizeof(prog->filter->tag)); if (nla == NULL) return -EMSGSIZE; - memcpy(nla_data(nla), prog->filter->digest, nla_len(nla)); + memcpy(nla_data(nla), prog->filter->tag, nla_len(nla)); return 0; } -- cgit v1.2.3-71-gd317 From 5eb7c0d04f04a667c049fe090a95494a8de2955c Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sun, 1 Jan 2017 20:25:25 -0600 Subject: taint/module: Fix problems when out-of-kernel driver defines true or false Commit 7fd8329ba502 ("taint/module: Clean up global and module taint flags handling") used the key words true and false as character members of a new struct. These names cause problems when out-of-kernel modules such as VirtualBox include their own definitions of true and false. Fixes: 7fd8329ba502 ("taint/module: Clean up global and module taint flags handling") Signed-off-by: Larry Finger Cc: Petr Mladek Cc: Jessica Yu Cc: Rusty Russell Reported-by: Valdis Kletnieks Reviewed-by: Petr Mladek Acked-by: Rusty Russell Signed-off-by: Jessica Yu --- include/linux/kernel.h | 4 ++-- kernel/module.c | 2 +- kernel/panic.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 56aec84237ad..cb09238f6d32 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -514,8 +514,8 @@ extern enum system_states { #define TAINT_FLAGS_COUNT 16 struct taint_flag { - char true; /* character printed when tainted */ - char false; /* character printed when not tainted */ + char c_true; /* character printed when tainted */ + char c_false; /* character printed when not tainted */ bool module; /* also show as a per-module taint flag */ }; diff --git a/kernel/module.c b/kernel/module.c index 5088784c0cf9..38d4270925d4 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1145,7 +1145,7 @@ static size_t module_flags_taint(struct module *mod, char *buf) for (i = 0; i < TAINT_FLAGS_COUNT; i++) { if (taint_flags[i].module && test_bit(i, &mod->taints)) - buf[l++] = taint_flags[i].true; + buf[l++] = taint_flags[i].c_true; } return l; diff --git a/kernel/panic.c b/kernel/panic.c index c51edaa04fce..901c4fb46002 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -355,7 +355,7 @@ const char *print_tainted(void) for (i = 0; i < TAINT_FLAGS_COUNT; i++) { const struct taint_flag *t = &taint_flags[i]; *s++ = test_bit(i, &tainted_mask) ? - t->true : t->false; + t->c_true : t->c_false; } *s = 0; } else -- cgit v1.2.3-71-gd317 From 5c34153704898ed7ec0f8c0dceb651cbe4b713fd Mon Sep 17 00:00:00 2001 From: Vijaya Kumar K Date: Thu, 26 Jan 2017 19:50:49 +0530 Subject: irqchip/gic-v3: Add missing system register definitions Define register definitions for ICH_VMCR_EL2, ICC_CTLR_EL1 and ICH_VTR_EL2, ICC_BPR0_EL1, ICC_BPR1_EL1 registers. Signed-off-by: Vijaya Kumar K Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 43 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index e808f8ae6f14..7f6d904a62c6 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -352,8 +352,30 @@ /* * CPU interface registers */ -#define ICC_CTLR_EL1_EOImode_drop_dir (0U << 1) -#define ICC_CTLR_EL1_EOImode_drop (1U << 1) +#define ICC_CTLR_EL1_EOImode_SHIFT (1) +#define ICC_CTLR_EL1_EOImode_drop_dir (0U << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_EOImode_drop (1U << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_EOImode_MASK (1 << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_CBPR_SHIFT 0 +#define ICC_CTLR_EL1_CBPR_MASK (1 << ICC_CTLR_EL1_CBPR_SHIFT) +#define ICC_CTLR_EL1_PRI_BITS_SHIFT 8 +#define ICC_CTLR_EL1_PRI_BITS_MASK (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT) +#define ICC_CTLR_EL1_ID_BITS_SHIFT 11 +#define ICC_CTLR_EL1_ID_BITS_MASK (0x7 << ICC_CTLR_EL1_ID_BITS_SHIFT) +#define ICC_CTLR_EL1_SEIS_SHIFT 14 +#define ICC_CTLR_EL1_SEIS_MASK (0x1 << ICC_CTLR_EL1_SEIS_SHIFT) +#define ICC_CTLR_EL1_A3V_SHIFT 15 +#define ICC_CTLR_EL1_A3V_MASK (0x1 << ICC_CTLR_EL1_A3V_SHIFT) +#define ICC_PMR_EL1_SHIFT 0 +#define ICC_PMR_EL1_MASK (0xff << ICC_PMR_EL1_SHIFT) +#define ICC_BPR0_EL1_SHIFT 0 +#define ICC_BPR0_EL1_MASK (0x7 << ICC_BPR0_EL1_SHIFT) +#define ICC_BPR1_EL1_SHIFT 0 +#define ICC_BPR1_EL1_MASK (0x7 << ICC_BPR1_EL1_SHIFT) +#define ICC_IGRPEN0_EL1_SHIFT 0 +#define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT) +#define ICC_IGRPEN1_EL1_SHIFT 0 +#define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT) #define ICC_SRE_EL1_SRE (1U << 0) /* @@ -384,12 +406,29 @@ #define ICH_VMCR_CTLR_SHIFT 0 #define ICH_VMCR_CTLR_MASK (0x21f << ICH_VMCR_CTLR_SHIFT) +#define ICH_VMCR_CBPR_SHIFT 4 +#define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT) +#define ICH_VMCR_EOIM_SHIFT 9 +#define ICH_VMCR_EOIM_MASK (1 << ICH_VMCR_EOIM_SHIFT) #define ICH_VMCR_BPR1_SHIFT 18 #define ICH_VMCR_BPR1_MASK (7 << ICH_VMCR_BPR1_SHIFT) #define ICH_VMCR_BPR0_SHIFT 21 #define ICH_VMCR_BPR0_MASK (7 << ICH_VMCR_BPR0_SHIFT) #define ICH_VMCR_PMR_SHIFT 24 #define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT) +#define ICH_VMCR_ENG0_SHIFT 0 +#define ICH_VMCR_ENG0_MASK (1 << ICH_VMCR_ENG0_SHIFT) +#define ICH_VMCR_ENG1_SHIFT 1 +#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT) + +#define ICH_VTR_PRI_BITS_SHIFT 29 +#define ICH_VTR_PRI_BITS_MASK (7 << ICH_VTR_PRI_BITS_SHIFT) +#define ICH_VTR_ID_BITS_SHIFT 23 +#define ICH_VTR_ID_BITS_MASK (7 << ICH_VTR_ID_BITS_SHIFT) +#define ICH_VTR_SEIS_SHIFT 22 +#define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT) +#define ICH_VTR_A3V_SHIFT 21 +#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT) #define ICC_IAR1_EL1_SPURIOUS 0x3ff -- cgit v1.2.3-71-gd317 From 5fb247d79c04240dce86c842976cde1edde7f7ed Mon Sep 17 00:00:00 2001 From: Vijaya Kumar K Date: Thu, 26 Jan 2017 19:50:50 +0530 Subject: KVM: arm/arm64: vgic: Introduce VENG0 and VENG1 fields to vmcr struct ICC_VMCR_EL2 supports virtual access to ICC_IGRPEN1_EL1.Enable and ICC_IGRPEN0_EL1.Enable fields. Add grpen0 and grpen1 member variables to struct vmcr to support read and write of these fields. Also refactor vgic_set_vmcr and vgic_get_vmcr() code. Drop ICH_VMCR_CTLR_SHIFT and ICH_VMCR_CTLR_MASK macros and instead use ICH_VMCR_EOI* and ICH_VMCR_CBPR* macros. Signed-off-by: Vijaya Kumar K Reviewed-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 2 -- virt/kvm/arm/vgic/vgic-mmio-v2.c | 16 ---------------- virt/kvm/arm/vgic/vgic-mmio.c | 16 ++++++++++++++++ virt/kvm/arm/vgic/vgic-v3.c | 20 ++++++++++++++++++-- virt/kvm/arm/vgic/vgic.h | 5 +++++ 5 files changed, 39 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 7f6d904a62c6..170e00a40826 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -404,8 +404,6 @@ #define ICH_HCR_EN (1 << 0) #define ICH_HCR_UIE (1 << 1) -#define ICH_VMCR_CTLR_SHIFT 0 -#define ICH_VMCR_CTLR_MASK (0x21f << ICH_VMCR_CTLR_SHIFT) #define ICH_VMCR_CBPR_SHIFT 4 #define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT) #define ICH_VMCR_EOIM_SHIFT 9 diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index fa68dd41756e..a3ad7ff95c9b 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -213,22 +213,6 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu, } } -static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) -{ - if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_set_vmcr(vcpu, vmcr); - else - vgic_v3_set_vmcr(vcpu, vmcr); -} - -static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) -{ - if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_get_vmcr(vcpu, vmcr); - else - vgic_v3_get_vmcr(vcpu, vmcr); -} - #define GICC_ARCH_VERSION_V2 0x2 /* These are for userland accesses only, there is no guest-facing emulation. */ diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 746c8afca718..1d1886e7d640 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -385,6 +385,22 @@ vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions, sizeof(region[0]), match_region); } +void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_set_vmcr(vcpu, vmcr); + else + vgic_v3_set_vmcr(vcpu, vmcr); +} + +void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_get_vmcr(vcpu, vmcr); + else + vgic_v3_get_vmcr(vcpu, vmcr); +} + /* * kvm_mmio_read_buf() returns a value in a format where it can be converted * to a byte array and be directly observed as the guest wanted it to appear diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 679ba935698f..42ff9c9826d3 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -175,10 +175,18 @@ void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) { u32 vmcr; - vmcr = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK; + /* + * Ignore the FIQen bit, because GIC emulation always implies + * SRE=1 which means the vFIQEn bit is also RES1. + */ + vmcr = ((vmcrp->ctlr >> ICC_CTLR_EL1_EOImode_SHIFT) << + ICH_VMCR_EOIM_SHIFT) & ICH_VMCR_EOIM_MASK; + vmcr |= (vmcrp->ctlr << ICH_VMCR_CBPR_SHIFT) & ICH_VMCR_CBPR_MASK; vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK; vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK; vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK; + vmcr |= (vmcrp->grpen0 << ICH_VMCR_ENG0_SHIFT) & ICH_VMCR_ENG0_MASK; + vmcr |= (vmcrp->grpen1 << ICH_VMCR_ENG1_SHIFT) & ICH_VMCR_ENG1_MASK; vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr; } @@ -187,10 +195,18 @@ void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) { u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr; - vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT; + /* + * Ignore the FIQen bit, because GIC emulation always implies + * SRE=1 which means the vFIQEn bit is also RES1. + */ + vmcrp->ctlr = ((vmcr >> ICH_VMCR_EOIM_SHIFT) << + ICC_CTLR_EL1_EOImode_SHIFT) & ICC_CTLR_EL1_EOImode_MASK; + vmcrp->ctlr |= (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT; vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; + vmcrp->grpen0 = (vmcr & ICH_VMCR_ENG0_MASK) >> ICH_VMCR_ENG0_SHIFT; + vmcrp->grpen1 = (vmcr & ICH_VMCR_ENG1_MASK) >> ICH_VMCR_ENG1_SHIFT; } #define INITIAL_PENDBASER_VALUE \ diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 4505fd4919fd..ecfe1a6c841a 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -65,6 +65,9 @@ struct vgic_vmcr { u32 abpr; u32 bpr; u32 pmr; + /* Below member variable are valid only for GICv3 */ + u32 grpen0; + u32 grpen1; }; struct vgic_reg_attr { @@ -137,6 +140,8 @@ int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write, int offset, u32 *val); int kvm_register_vgic_device(unsigned long type); +void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); int vgic_lazy_init(struct kvm *kvm); int vgic_init(struct kvm *kvm); -- cgit v1.2.3-71-gd317 From 9d93dc1c96ec446bef9c34a189ea24556f1af89a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 25 Jan 2017 13:47:43 +0000 Subject: arm/arm64: KVM: Get rid of KVM_MEMSLOT_INCOHERENT KVM_MEMSLOT_INCOHERENT is not used anymore, as we've killed its only use in the arm/arm64 MMU code. Let's remove the last artifacts. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm/kvm/mmu.c | 9 --------- include/linux/kvm_host.h | 1 - 2 files changed, 10 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 5cc35080cbf7..962616fd4ddd 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1876,15 +1876,6 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { - /* - * Readonly memslots are not incoherent with the caches by definition, - * but in practice, they are used mostly to emulate ROMs or NOR flashes - * that the guest may consider devices and hence map as uncached. - * To prevent incoherency issues in these cases, tag all readonly - * regions as incoherent. - */ - if (slot->flags & KVM_MEM_READONLY) - slot->flags |= KVM_MEMSLOT_INCOHERENT; return 0; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1c5190dab2c1..cda457bcedc1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -45,7 +45,6 @@ * include/linux/kvm_h. */ #define KVM_MEMSLOT_INVALID (1UL << 16) -#define KVM_MEMSLOT_INCOHERENT (1UL << 17) /* Two fragments for cross MMIO pages. */ #define KVM_MAX_MMIO_FRAGMENTS 2 -- cgit v1.2.3-71-gd317