From c3a6cf19e695c8b0a9bf8b5933f863e12d878b7c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 18 Oct 2019 10:31:43 +0100 Subject: export: avoid code duplication in include/linux/export.h include/linux/export.h has lots of code duplication between EXPORT_SYMBOL and EXPORT_SYMBOL_NS. To improve the maintainability and readability, unify the implementation. When the symbol has no namespace, pass the empty string "" to the 'ns' parameter. The drawback of this change is, it grows the code size. When the symbol has no namespace, sym->namespace was previously NULL, but it is now an empty string "". So, it increases 1 byte for every no namespace EXPORT_SYMBOL. A typical kernel configuration has 10K exported symbols, so it increases 10KB in rough estimation. I did not come up with a good idea to refactor it without increasing the code size. I am not sure how big a deal it is, but at least include/linux/export.h looks nicer. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Masahiro Yamada [maennich: rebase on top of 3 fixes for the namespace feature] Signed-off-by: Matthias Maennich Signed-off-by: Jessica Yu --- kernel/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index ff2d7359a418..26c13173da3d 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1400,7 +1400,7 @@ static int verify_namespace_is_imported(const struct load_info *info, char *imported_namespace; namespace = kernel_symbol_namespace(sym); - if (namespace) { + if (namespace && namespace[0]) { imported_namespace = get_modinfo(info, "import_ns"); while (imported_namespace) { if (strcmp(namespace, imported_namespace) == 0) -- cgit v1.2.3-71-gd317 From 56144737e67329c9aaed15f942d46a6302e2e3d8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Nov 2019 09:48:04 -0800 Subject: hrtimer: Annotate lockless access to timer->state syzbot reported various data-race caused by hrtimer_is_queued() reading timer->state. A READ_ONCE() is required there to silence the warning. Also add the corresponding WRITE_ONCE() when timer->state is set. In remove_hrtimer() the hrtimer_is_queued() helper is open coded to avoid loading timer->state twice. KCSAN reported these cases: BUG: KCSAN: data-race in __remove_hrtimer / tcp_pacing_check write to 0xffff8880b2a7d388 of 1 bytes by interrupt on cpu 0: __remove_hrtimer+0x52/0x130 kernel/time/hrtimer.c:991 __run_hrtimer kernel/time/hrtimer.c:1496 [inline] __hrtimer_run_queues+0x250/0x600 kernel/time/hrtimer.c:1576 hrtimer_run_softirq+0x10e/0x150 kernel/time/hrtimer.c:1593 __do_softirq+0x115/0x33f kernel/softirq.c:292 run_ksoftirqd+0x46/0x60 kernel/softirq.c:603 smpboot_thread_fn+0x37d/0x4a0 kernel/smpboot.c:165 kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352 read to 0xffff8880b2a7d388 of 1 bytes by task 24652 on cpu 1: tcp_pacing_check net/ipv4/tcp_output.c:2235 [inline] tcp_pacing_check+0xba/0x130 net/ipv4/tcp_output.c:2225 tcp_xmit_retransmit_queue+0x32c/0x5a0 net/ipv4/tcp_output.c:3044 tcp_xmit_recovery+0x7c/0x120 net/ipv4/tcp_input.c:3558 tcp_ack+0x17b6/0x3170 net/ipv4/tcp_input.c:3717 tcp_rcv_established+0x37e/0xf50 net/ipv4/tcp_input.c:5696 tcp_v4_do_rcv+0x381/0x4e0 net/ipv4/tcp_ipv4.c:1561 sk_backlog_rcv include/net/sock.h:945 [inline] __release_sock+0x135/0x1e0 net/core/sock.c:2435 release_sock+0x61/0x160 net/core/sock.c:2951 sk_stream_wait_memory+0x3d7/0x7c0 net/core/stream.c:145 tcp_sendmsg_locked+0xb47/0x1f30 net/ipv4/tcp.c:1393 tcp_sendmsg+0x39/0x60 net/ipv4/tcp.c:1434 inet_sendmsg+0x6d/0x90 net/ipv4/af_inet.c:807 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg+0x9f/0xc0 net/socket.c:657 BUG: KCSAN: data-race in __remove_hrtimer / __tcp_ack_snd_check write to 0xffff8880a3a65588 of 1 bytes by interrupt on cpu 0: __remove_hrtimer+0x52/0x130 kernel/time/hrtimer.c:991 __run_hrtimer kernel/time/hrtimer.c:1496 [inline] __hrtimer_run_queues+0x250/0x600 kernel/time/hrtimer.c:1576 hrtimer_run_softirq+0x10e/0x150 kernel/time/hrtimer.c:1593 __do_softirq+0x115/0x33f kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0xbb/0xe0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0xe6/0x280 arch/x86/kernel/apic/apic.c:1137 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830 read to 0xffff8880a3a65588 of 1 bytes by task 22891 on cpu 1: __tcp_ack_snd_check+0x415/0x4f0 net/ipv4/tcp_input.c:5265 tcp_ack_snd_check net/ipv4/tcp_input.c:5287 [inline] tcp_rcv_established+0x750/0xf50 net/ipv4/tcp_input.c:5708 tcp_v4_do_rcv+0x381/0x4e0 net/ipv4/tcp_ipv4.c:1561 sk_backlog_rcv include/net/sock.h:945 [inline] __release_sock+0x135/0x1e0 net/core/sock.c:2435 release_sock+0x61/0x160 net/core/sock.c:2951 sk_stream_wait_memory+0x3d7/0x7c0 net/core/stream.c:145 tcp_sendmsg_locked+0xb47/0x1f30 net/ipv4/tcp.c:1393 tcp_sendmsg+0x39/0x60 net/ipv4/tcp.c:1434 inet_sendmsg+0x6d/0x90 net/ipv4/af_inet.c:807 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg+0x9f/0xc0 net/socket.c:657 __sys_sendto+0x21f/0x320 net/socket.c:1952 __do_sys_sendto net/socket.c:1964 [inline] __se_sys_sendto net/socket.c:1960 [inline] __x64_sys_sendto+0x89/0xb0 net/socket.c:1960 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 24652 Comm: syz-executor.3 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 [ tglx: Added comments ] Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20191106174804.74723-1-edumazet@google.com --- include/linux/hrtimer.h | 14 ++++++++++---- kernel/time/hrtimer.c | 11 +++++++---- 2 files changed, 17 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 1b9a51a1bccb..1f98b52118f0 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -456,12 +456,18 @@ extern u64 hrtimer_next_event_without(const struct hrtimer *exclude); extern bool hrtimer_active(const struct hrtimer *timer); -/* - * Helper function to check, whether the timer is on one of the queues +/** + * hrtimer_is_queued = check, whether the timer is on one of the queues + * @timer: Timer to check + * + * Returns: True if the timer is queued, false otherwise + * + * The function can be used lockless, but it gives only a current snapshot. */ -static inline int hrtimer_is_queued(struct hrtimer *timer) +static inline bool hrtimer_is_queued(struct hrtimer *timer) { - return timer->state & HRTIMER_STATE_ENQUEUED; + /* The READ_ONCE pairs with the update functions of timer->state */ + return !!(READ_ONCE(timer->state) & HRTIMER_STATE_ENQUEUED); } /* diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 65605530ee34..7f31932216a1 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -966,7 +966,8 @@ static int enqueue_hrtimer(struct hrtimer *timer, base->cpu_base->active_bases |= 1 << base->index; - timer->state = HRTIMER_STATE_ENQUEUED; + /* Pairs with the lockless read in hrtimer_is_queued() */ + WRITE_ONCE(timer->state, HRTIMER_STATE_ENQUEUED); return timerqueue_add(&base->active, &timer->node); } @@ -988,7 +989,8 @@ static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_cpu_base *cpu_base = base->cpu_base; u8 state = timer->state; - timer->state = newstate; + /* Pairs with the lockless read in hrtimer_is_queued() */ + WRITE_ONCE(timer->state, newstate); if (!(state & HRTIMER_STATE_ENQUEUED)) return; @@ -1013,8 +1015,9 @@ static void __remove_hrtimer(struct hrtimer *timer, static inline int remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart) { - if (hrtimer_is_queued(timer)) { - u8 state = timer->state; + u8 state = timer->state; + + if (state & HRTIMER_STATE_ENQUEUED) { int reprogram; /* -- cgit v1.2.3-71-gd317 From 3f6ec871e1c2b360aaf022e90bb99dcc016b3874 Mon Sep 17 00:00:00 2001 From: Amit Kucheria Date: Mon, 21 Oct 2019 17:45:12 +0530 Subject: cpufreq: Initialize the governors in core_initcall Initialize the cpufreq governors earlier to allow for earlier performance control during the boot process. Signed-off-by: Amit Kucheria Acked-by: Viresh Kumar Reviewed-by: Rafael J. Wysocki Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/b98eae9b44eb2f034d7f5d12a161f5f831be1eb7.1571656015.git.amit.kucheria@linaro.org --- drivers/cpufreq/cpufreq_conservative.c | 2 +- drivers/cpufreq/cpufreq_ondemand.c | 2 +- drivers/cpufreq/cpufreq_performance.c | 2 +- drivers/cpufreq/cpufreq_powersave.c | 2 +- drivers/cpufreq/cpufreq_userspace.c | 2 +- kernel/sched/cpufreq_schedutil.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index b66e81c06a57..737ff3b9c2c0 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -346,7 +346,7 @@ struct cpufreq_governor *cpufreq_default_governor(void) return CPU_FREQ_GOV_CONSERVATIVE; } -fs_initcall(cpufreq_gov_dbs_init); +core_initcall(cpufreq_gov_dbs_init); #else module_init(cpufreq_gov_dbs_init); #endif diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index dced033875bf..82a4d37ddecb 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -483,7 +483,7 @@ struct cpufreq_governor *cpufreq_default_governor(void) return CPU_FREQ_GOV_ONDEMAND; } -fs_initcall(cpufreq_gov_dbs_init); +core_initcall(cpufreq_gov_dbs_init); #else module_init(cpufreq_gov_dbs_init); #endif diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c index aaa04dfcacd9..def9afe0f5b8 100644 --- a/drivers/cpufreq/cpufreq_performance.c +++ b/drivers/cpufreq/cpufreq_performance.c @@ -50,5 +50,5 @@ MODULE_AUTHOR("Dominik Brodowski "); MODULE_DESCRIPTION("CPUfreq policy governor 'performance'"); MODULE_LICENSE("GPL"); -fs_initcall(cpufreq_gov_performance_init); +core_initcall(cpufreq_gov_performance_init); module_exit(cpufreq_gov_performance_exit); diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c index c143dc237d87..1ae66019eb83 100644 --- a/drivers/cpufreq/cpufreq_powersave.c +++ b/drivers/cpufreq/cpufreq_powersave.c @@ -43,7 +43,7 @@ struct cpufreq_governor *cpufreq_default_governor(void) return &cpufreq_gov_powersave; } -fs_initcall(cpufreq_gov_powersave_init); +core_initcall(cpufreq_gov_powersave_init); #else module_init(cpufreq_gov_powersave_init); #endif diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c index cbd81c58cb8f..b43e7cd502c5 100644 --- a/drivers/cpufreq/cpufreq_userspace.c +++ b/drivers/cpufreq/cpufreq_userspace.c @@ -147,7 +147,7 @@ struct cpufreq_governor *cpufreq_default_governor(void) return &cpufreq_gov_userspace; } -fs_initcall(cpufreq_gov_userspace_init); +core_initcall(cpufreq_gov_userspace_init); #else module_init(cpufreq_gov_userspace_init); #endif diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 86800b4d5453..322ca8860f54 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -915,7 +915,7 @@ static int __init sugov_register(void) { return cpufreq_register_governor(&schedutil_gov); } -fs_initcall(sugov_register); +core_initcall(sugov_register); #ifdef CONFIG_ENERGY_MODEL extern bool sched_energy_update; -- cgit v1.2.3-71-gd317 From 153bedbac2ebd475e1c7c2d2fa0c042f5525927d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 8 Nov 2019 17:08:55 +0100 Subject: irq_work: Convert flags to atomic_t We need to convert flags to atomic_t in order to later fix an ordering issue on atomic_cmpxchg() failure. This will allow us to use atomic_fetch_or(). Also clarify the nature of those flags. [ mingo: Converted two more usage site the original patch missed. ] Signed-off-by: Frederic Weisbecker Cc: Linus Torvalds Cc: Paul E . McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20191108160858.31665-2-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/irq_work.h | 10 +++++++--- kernel/bpf/stackmap.c | 2 +- kernel/irq_work.c | 18 +++++++++--------- kernel/printk/printk.c | 2 +- kernel/trace/bpf_trace.c | 2 +- 5 files changed, 19 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index b11fcdfd0770..02da997ad12c 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -22,7 +22,7 @@ #define IRQ_WORK_CLAIMED (IRQ_WORK_PENDING | IRQ_WORK_BUSY) struct irq_work { - unsigned long flags; + atomic_t flags; struct llist_node llnode; void (*func)(struct irq_work *); }; @@ -30,11 +30,15 @@ struct irq_work { static inline void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *)) { - work->flags = 0; + atomic_set(&work->flags, 0); work->func = func; } -#define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { .func = (_f), } +#define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { \ + .flags = ATOMIC_INIT(0), \ + .func = (_f) \ +} + bool irq_work_queue(struct irq_work *work); bool irq_work_queue_on(struct irq_work *work, int cpu); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 052580c33d26..4d31284095e2 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -289,7 +289,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, if (in_nmi()) { work = this_cpu_ptr(&up_read_work); - if (work->irq_work.flags & IRQ_WORK_BUSY) + if (atomic_read(&work->irq_work.flags) & IRQ_WORK_BUSY) /* cannot queue more up_read, fallback */ irq_work_busy = true; } diff --git a/kernel/irq_work.c b/kernel/irq_work.c index d42acaf81886..df0dbf4d859b 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -29,16 +29,16 @@ static DEFINE_PER_CPU(struct llist_head, lazy_list); */ static bool irq_work_claim(struct irq_work *work) { - unsigned long flags, oflags, nflags; + int flags, oflags, nflags; /* * Start with our best wish as a premise but only trust any * flag value after cmpxchg() result. */ - flags = work->flags & ~IRQ_WORK_PENDING; + flags = atomic_read(&work->flags) & ~IRQ_WORK_PENDING; for (;;) { nflags = flags | IRQ_WORK_CLAIMED; - oflags = cmpxchg(&work->flags, flags, nflags); + oflags = atomic_cmpxchg(&work->flags, flags, nflags); if (oflags == flags) break; if (oflags & IRQ_WORK_PENDING) @@ -61,7 +61,7 @@ void __weak arch_irq_work_raise(void) static void __irq_work_queue_local(struct irq_work *work) { /* If the work is "lazy", handle it from next tick if any */ - if (work->flags & IRQ_WORK_LAZY) { + if (atomic_read(&work->flags) & IRQ_WORK_LAZY) { if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) && tick_nohz_tick_stopped()) arch_irq_work_raise(); @@ -143,7 +143,7 @@ static void irq_work_run_list(struct llist_head *list) { struct irq_work *work, *tmp; struct llist_node *llnode; - unsigned long flags; + int flags; BUG_ON(!irqs_disabled()); @@ -159,15 +159,15 @@ static void irq_work_run_list(struct llist_head *list) * to claim that work don't rely on us to handle their data * while we are in the middle of the func. */ - flags = work->flags & ~IRQ_WORK_PENDING; - xchg(&work->flags, flags); + flags = atomic_read(&work->flags) & ~IRQ_WORK_PENDING; + atomic_xchg(&work->flags, flags); work->func(work); /* * Clear the BUSY bit and return to the free state if * no-one else claimed it meanwhile. */ - (void)cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY); + (void)atomic_cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY); } } @@ -199,7 +199,7 @@ void irq_work_sync(struct irq_work *work) { lockdep_assert_irqs_enabled(); - while (work->flags & IRQ_WORK_BUSY) + while (atomic_read(&work->flags) & IRQ_WORK_BUSY) cpu_relax(); } EXPORT_SYMBOL_GPL(irq_work_sync); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index ca65327a6de8..865727373a3b 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2961,7 +2961,7 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work) static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = { .func = wake_up_klogd_work_func, - .flags = IRQ_WORK_LAZY, + .flags = ATOMIC_INIT(IRQ_WORK_LAZY), }; void wake_up_klogd(void) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 44bd08f2443b..ff467a4e2639 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -660,7 +660,7 @@ BPF_CALL_1(bpf_send_signal, u32, sig) return -EINVAL; work = this_cpu_ptr(&send_signal_work); - if (work->irq_work.flags & IRQ_WORK_BUSY) + if (atomic_read(&work->irq_work.flags) & IRQ_WORK_BUSY) return -EBUSY; /* Add the current task, which is the target of sending signal, -- cgit v1.2.3-71-gd317 From 25269871db1ad0cbbaafd5098cbdb40c8db4ccb9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 8 Nov 2019 17:08:56 +0100 Subject: irq_work: Fix irq_work_claim() memory ordering When irq_work_claim() finds IRQ_WORK_PENDING flag already set, we just return and don't raise a new IPI. We expect the destination to see and handle our latest updades thanks to the pairing atomic_xchg() in irq_work_run_list(). But cmpxchg() doesn't guarantee a full memory barrier upon failure. So it's possible that the destination misses our latest updates. So use atomic_fetch_or() instead that is unconditionally fully ordered and also performs exactly what we want here and simplify the code. Signed-off-by: Frederic Weisbecker Cc: Linus Torvalds Cc: Paul E . McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20191108160858.31665-3-frederic@kernel.org Signed-off-by: Ingo Molnar --- kernel/irq_work.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/irq_work.c b/kernel/irq_work.c index df0dbf4d859b..255454a48346 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -29,24 +29,16 @@ static DEFINE_PER_CPU(struct llist_head, lazy_list); */ static bool irq_work_claim(struct irq_work *work) { - int flags, oflags, nflags; + int oflags; + oflags = atomic_fetch_or(IRQ_WORK_CLAIMED, &work->flags); /* - * Start with our best wish as a premise but only trust any - * flag value after cmpxchg() result. + * If the work is already pending, no need to raise the IPI. + * The pairing atomic_xchg() in irq_work_run() makes sure + * everything we did before is visible. */ - flags = atomic_read(&work->flags) & ~IRQ_WORK_PENDING; - for (;;) { - nflags = flags | IRQ_WORK_CLAIMED; - oflags = atomic_cmpxchg(&work->flags, flags, nflags); - if (oflags == flags) - break; - if (oflags & IRQ_WORK_PENDING) - return false; - flags = oflags; - cpu_relax(); - } - + if (oflags & IRQ_WORK_PENDING) + return false; return true; } -- cgit v1.2.3-71-gd317 From feb4a51323babe13315c3b783ea7f1cf25368918 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 8 Nov 2019 17:08:57 +0100 Subject: irq_work: Slightly simplify IRQ_WORK_PENDING clearing Instead of fetching the value of flags and perform an xchg() to clear a bit, just use atomic_fetch_andnot() that is more suitable to do that job in one operation while keeping the full ordering. Signed-off-by: Frederic Weisbecker Cc: Linus Torvalds Cc: Paul E . McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20191108160858.31665-4-frederic@kernel.org Signed-off-by: Ingo Molnar --- kernel/irq_work.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 255454a48346..49c53f80a13a 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -34,7 +34,7 @@ static bool irq_work_claim(struct irq_work *work) oflags = atomic_fetch_or(IRQ_WORK_CLAIMED, &work->flags); /* * If the work is already pending, no need to raise the IPI. - * The pairing atomic_xchg() in irq_work_run() makes sure + * The pairing atomic_fetch_andnot() in irq_work_run() makes sure * everything we did before is visible. */ if (oflags & IRQ_WORK_PENDING) @@ -135,7 +135,6 @@ static void irq_work_run_list(struct llist_head *list) { struct irq_work *work, *tmp; struct llist_node *llnode; - int flags; BUG_ON(!irqs_disabled()); @@ -144,6 +143,7 @@ static void irq_work_run_list(struct llist_head *list) llnode = llist_del_all(list); llist_for_each_entry_safe(work, tmp, llnode, llnode) { + int flags; /* * Clear the PENDING bit, after this point the @work * can be re-used. @@ -151,8 +151,7 @@ static void irq_work_run_list(struct llist_head *list) * to claim that work don't rely on us to handle their data * while we are in the middle of the func. */ - flags = atomic_read(&work->flags) & ~IRQ_WORK_PENDING; - atomic_xchg(&work->flags, flags); + flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->flags); work->func(work); /* -- cgit v1.2.3-71-gd317 From 9a066357184485784f782719093ff804d05b85db Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 8 Oct 2019 21:05:52 +0900 Subject: kheaders: remove unneeded 'cat' command piped to 'head' / 'tail' The 'head' and 'tail' commands can take a file path directly. So, you do not need to run 'cat'. cat kernel/kheaders.md5 | head -1 ... is equivalent to: head -1 kernel/kheaders.md5 and the latter saves forking one process. While I was here, I replaced 'head -1' with 'head -n 1'. I also replaced '==' with '=' since we do not have a good reason to use the bashism. Signed-off-by: Masahiro Yamada --- kernel/gen_kheaders.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index 5a0fc0b0403a..b8054b0d5010 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -41,10 +41,10 @@ obj_files_md5="$(find $dir_list -name "*.h" | this_file_md5="$(ls -l $sfile | md5sum | cut -d ' ' -f1)" if [ -f $tarfile ]; then tarfile_md5="$(md5sum $tarfile | cut -d ' ' -f1)"; fi if [ -f kernel/kheaders.md5 ] && - [ "$(cat kernel/kheaders.md5|head -1)" == "$src_files_md5" ] && - [ "$(cat kernel/kheaders.md5|head -2|tail -1)" == "$obj_files_md5" ] && - [ "$(cat kernel/kheaders.md5|head -3|tail -1)" == "$this_file_md5" ] && - [ "$(cat kernel/kheaders.md5|tail -1)" == "$tarfile_md5" ]; then + [ "$(head -n 1 kernel/kheaders.md5)" = "$src_files_md5" ] && + [ "$(head -n 2 kernel/kheaders.md5 | tail -n 1)" = "$obj_files_md5" ] && + [ "$(head -n 3 kernel/kheaders.md5 | tail -n 1)" = "$this_file_md5" ] && + [ "$(tail -n 1 kernel/kheaders.md5)" = "$tarfile_md5" ]; then exit fi -- cgit v1.2.3-71-gd317 From 0e11773e76098729552b750ccff79374d1e62002 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 8 Oct 2019 21:05:53 +0900 Subject: kheaders: optimize md5sum calculation for in-tree builds This script computes md5sum of headers in srctree and in objtree. However, when we are building in-tree, we know the srctree and the objtree are the same. That is, we end up with the same computation twice. In fact, the first two lines of kernel/kheaders.md5 are always the same for in-tree builds. Unify the two md5sum calculations. For in-tree builds ($building_out_of_srctree is empty), we check only two directories, "include", and "arch/$SRCARCH/include". For out-of-tree builds ($building_out_of_srctree is 1), we check 4 directories, "$srctree/include", "$srctree/arch/$SRCARCH/include", "include", and "arch/$SRCARCH/include" since we know they are all different. Signed-off-by: Masahiro Yamada --- kernel/gen_kheaders.sh | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index b8054b0d5010..6ff86e62787f 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -21,29 +21,30 @@ arch/$SRCARCH/include/ # Uncomment it for debugging. # if [ ! -f /tmp/iter ]; then iter=1; echo 1 > /tmp/iter; # else iter=$(($(cat /tmp/iter) + 1)); echo $iter > /tmp/iter; fi -# find $src_file_list -name "*.h" | xargs ls -l > /tmp/src-ls-$iter -# find $obj_file_list -name "*.h" | xargs ls -l > /tmp/obj-ls-$iter +# find $all_dirs -name "*.h" | xargs ls -l > /tmp/ls-$iter + +all_dirs= +if [ "$building_out_of_srctree" ]; then + for d in $dir_list; do + all_dirs="$all_dirs $srctree/$d" + done +fi +all_dirs="$all_dirs $dir_list" # include/generated/compile.h is ignored because it is touched even when none # of the source files changed. This causes pointless regeneration, so let us # ignore them for md5 calculation. -pushd $srctree > /dev/null -src_files_md5="$(find $dir_list -name "*.h" | - grep -v "include/generated/compile.h" | - grep -v "include/generated/autoconf.h" | - xargs ls -l | md5sum | cut -d ' ' -f1)" -popd > /dev/null -obj_files_md5="$(find $dir_list -name "*.h" | - grep -v "include/generated/compile.h" | - grep -v "include/generated/autoconf.h" | +headers_md5="$(find $all_dirs -name "*.h" | + grep -v "include/generated/compile.h" | + grep -v "include/generated/autoconf.h" | xargs ls -l | md5sum | cut -d ' ' -f1)" + # Any changes to this script will also cause a rebuild of the archive. this_file_md5="$(ls -l $sfile | md5sum | cut -d ' ' -f1)" if [ -f $tarfile ]; then tarfile_md5="$(md5sum $tarfile | cut -d ' ' -f1)"; fi if [ -f kernel/kheaders.md5 ] && - [ "$(head -n 1 kernel/kheaders.md5)" = "$src_files_md5" ] && - [ "$(head -n 2 kernel/kheaders.md5 | tail -n 1)" = "$obj_files_md5" ] && - [ "$(head -n 3 kernel/kheaders.md5 | tail -n 1)" = "$this_file_md5" ] && + [ "$(head -n 1 kernel/kheaders.md5)" = "$headers_md5" ] && + [ "$(head -n 2 kernel/kheaders.md5 | tail -n 1)" = "$this_file_md5" ] && [ "$(tail -n 1 kernel/kheaders.md5)" = "$tarfile_md5" ]; then exit fi @@ -79,8 +80,7 @@ find $cpio_dir -printf "./%P\n" | LC_ALL=C sort | \ --owner=0 --group=0 --numeric-owner --no-recursion \ -Jcf $tarfile -C $cpio_dir/ -T - > /dev/null -echo "$src_files_md5" > kernel/kheaders.md5 -echo "$obj_files_md5" >> kernel/kheaders.md5 +echo $headers_md5 > kernel/kheaders.md5 echo "$this_file_md5" >> kernel/kheaders.md5 echo "$(md5sum $tarfile | cut -d ' ' -f1)" >> kernel/kheaders.md5 -- cgit v1.2.3-71-gd317 From ea79e5168be644fdaf7d4e6a73eceaf07b3da76a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 8 Oct 2019 21:05:54 +0900 Subject: kheaders: optimize header copy for in-tree builds This script copies headers by the cpio command twice; first from srctree, and then from objtree. However, when we building in-tree, we know the srctree and the objtree are the same. That is, all the headers copied by the first cpio are overwritten by the second one. Skip the first cpio when we are building in-tree. Signed-off-by: Masahiro Yamada --- kernel/gen_kheaders.sh | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index 6ff86e62787f..0f7752dd93a6 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -56,14 +56,16 @@ fi rm -rf $cpio_dir mkdir $cpio_dir -pushd $srctree > /dev/null -for f in $dir_list; - do find "$f" -name "*.h"; -done | cpio --quiet -pd $cpio_dir -popd > /dev/null +if [ "$building_out_of_srctree" ]; then + pushd $srctree > /dev/null + for f in $dir_list + do find "$f" -name "*.h"; + done | cpio --quiet -pd $cpio_dir + popd > /dev/null +fi -# The second CPIO can complain if files already exist which can -# happen with out of tree builds. Just silence CPIO for now. +# The second CPIO can complain if files already exist which can happen with out +# of tree builds having stale headers in srctree. Just silence CPIO for now. for f in $dir_list; do find "$f" -name "*.h"; done | cpio --quiet -pd $cpio_dir >/dev/null 2>&1 -- cgit v1.2.3-71-gd317 From 1463f74f492eea7191f0178e01f3d38371a48210 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 8 Oct 2019 21:05:55 +0900 Subject: kheaders: remove the last bashism to allow sh to run it 'pushd' ... 'popd' is the last bash-specific code in this script. One way to avoid it is to run the code in a sub-shell. With that addressed, you can run this script with sh. I replaced $(BASH) with $(CONFIG_SHELL), and I changed the hashbang to #!/bin/sh. Signed-off-by: Masahiro Yamada --- kernel/Makefile | 2 +- kernel/gen_kheaders.sh | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/Makefile b/kernel/Makefile index daad787fb795..42557f251fea 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -128,7 +128,7 @@ $(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE $(obj)/kheaders.o: $(obj)/kheaders_data.tar.xz quiet_cmd_genikh = CHK $(obj)/kheaders_data.tar.xz - cmd_genikh = $(BASH) $(srctree)/kernel/gen_kheaders.sh $@ + cmd_genikh = $(CONFIG_SHELL) $(srctree)/kernel/gen_kheaders.sh $@ $(obj)/kheaders_data.tar.xz: FORCE $(call cmd,genikh) diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index 0f7752dd93a6..dc5744b93f8c 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh # SPDX-License-Identifier: GPL-2.0 # This script generates an archive consisting of kernel headers @@ -57,11 +57,12 @@ rm -rf $cpio_dir mkdir $cpio_dir if [ "$building_out_of_srctree" ]; then - pushd $srctree > /dev/null - for f in $dir_list - do find "$f" -name "*.h"; - done | cpio --quiet -pd $cpio_dir - popd > /dev/null + ( + cd $srctree + for f in $dir_list + do find "$f" -name "*.h"; + done | cpio --quiet -pd $cpio_dir + ) fi # The second CPIO can complain if files already exist which can happen with out -- cgit v1.2.3-71-gd317 From f276031b4e2f4c961ed6d8a42f0f0124ccac2e09 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 8 Oct 2019 21:05:56 +0900 Subject: kheaders: explain why include/config/autoconf.h is excluded from md5sum This comment block explains why include/generated/compile.h is omitted, but nothing about include/generated/autoconf.h, which might be more difficult to understand. Add more comments. Signed-off-by: Masahiro Yamada --- kernel/gen_kheaders.sh | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index dc5744b93f8c..e13ca842eb7e 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -32,8 +32,15 @@ fi all_dirs="$all_dirs $dir_list" # include/generated/compile.h is ignored because it is touched even when none -# of the source files changed. This causes pointless regeneration, so let us -# ignore them for md5 calculation. +# of the source files changed. +# +# When Kconfig regenerates include/generated/autoconf.h, its timestamp is +# updated, but the contents might be still the same. When any CONFIG option is +# changed, Kconfig touches the corresponding timestamp file include/config/*.h. +# Hence, the md5sum detects the configuration change anyway. We do not need to +# check include/generated/autoconf.h explicitly. +# +# Ignore them for md5 calculation to avoid pointless regeneration. headers_md5="$(find $all_dirs -name "*.h" | grep -v "include/generated/compile.h" | grep -v "include/generated/autoconf.h" | -- cgit v1.2.3-71-gd317 From 20d087368d38c7350a4519a3b316ef7eb2504692 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 8 Nov 2019 21:34:25 +0100 Subject: time: Optimize ns_to_timespec64() ns_to_timespec64() calls div_s64_rem(), which is a rather slow function on 32-bit architectures, as it cannot take advantage of the do_div() optimizations for constant arguments. Open-code the div_s64_rem() function in ns_to_timespec64(), so a constant divider can be passed into the optimized div_u64_rem() function. Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20191108203435.112759-3-arnd@arndb.de --- kernel/time/time.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/time/time.c b/kernel/time/time.c index 5c54ca632d08..45a358953f09 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -550,18 +550,21 @@ EXPORT_SYMBOL(set_normalized_timespec64); */ struct timespec64 ns_to_timespec64(const s64 nsec) { - struct timespec64 ts; + struct timespec64 ts = { 0, 0 }; s32 rem; - if (!nsec) - return (struct timespec64) {0, 0}; - - ts.tv_sec = div_s64_rem(nsec, NSEC_PER_SEC, &rem); - if (unlikely(rem < 0)) { - ts.tv_sec--; - rem += NSEC_PER_SEC; + if (likely(nsec > 0)) { + ts.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem); + ts.tv_nsec = rem; + } else if (nsec < 0) { + /* + * With negative times, tv_sec points to the earlier + * second, and tv_nsec counts the nanoseconds since + * then, so tv_nsec is always a positive number. + */ + ts.tv_sec = -div_u64_rem(-nsec - 1, NSEC_PER_SEC, &rem) - 1; + ts.tv_nsec = NSEC_PER_SEC - rem - 1; } - ts.tv_nsec = rem; return ts; } -- cgit v1.2.3-71-gd317 From 1d6acc18fee71a0db6e4fbbfbdb247e0bd5b0655 Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Tue, 15 Oct 2019 13:03:39 +0530 Subject: time: Fix spelling mistake in comment witin => within Signed-off-by: Mukesh Ojha Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/1571124819-9639-1-git-send-email-mojha@codeaurora.org --- kernel/time/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/time.c b/kernel/time/time.c index 45a358953f09..ea6e7e47cc37 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -179,7 +179,7 @@ int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz return error; if (tz) { - /* Verify we're witin the +-15 hrs range */ + /* Verify we're within the +-15 hrs range */ if (tz->tz_minuteswest > 15*60 || tz->tz_minuteswest < -15*60) return -EINVAL; -- cgit v1.2.3-71-gd317 From cf25e24db61cc9df42c47485a2ec2bff4e9a3692 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 7 Nov 2019 11:07:58 +0100 Subject: time: Rename tsk->real_start_time to ->start_boottime Since it stores CLOCK_BOOTTIME, not, as the name suggests, CLOCK_REALTIME, let's rename ->real_start_time to ->start_bootime. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- fs/exec.c | 2 +- fs/proc/array.c | 2 +- include/linux/sched.h | 2 +- kernel/fork.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/fs/exec.c b/fs/exec.c index 555e93c7dec8..f4d0f3acf861 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1132,7 +1132,7 @@ static int de_thread(struct task_struct *tsk) * also take its birthdate (always earlier than our own). */ tsk->start_time = leader->start_time; - tsk->real_start_time = leader->real_start_time; + tsk->start_boottime = leader->start_boottime; BUG_ON(!same_thread_group(leader, tsk)); BUG_ON(has_group_leader_pid(tsk)); diff --git a/fs/proc/array.c b/fs/proc/array.c index 46dcb6f0eccf..5efaf3708ec6 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -533,7 +533,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, nice = task_nice(task); /* convert nsec -> ticks */ - start_time = nsec_to_clock_t(task->real_start_time); + start_time = nsec_to_clock_t(task->start_boottime); seq_put_decimal_ull(m, "", pid_nr_ns(pid, ns)); seq_puts(m, " ("); diff --git a/include/linux/sched.h b/include/linux/sched.h index 67a1d86981a9..254128952eab 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -857,7 +857,7 @@ struct task_struct { u64 start_time; /* Boot based time in nsecs: */ - u64 real_start_time; + u64 start_boottime; /* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */ unsigned long min_flt; diff --git a/kernel/fork.c b/kernel/fork.c index bcdf53125210..1392ee8f4848 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2130,7 +2130,7 @@ static __latent_entropy struct task_struct *copy_process( */ p->start_time = ktime_get_ns(); - p->real_start_time = ktime_get_boottime_ns(); + p->start_boottime = ktime_get_boottime_ns(); /* * Make it visible to the rest of the system, but dont wake it up yet. -- cgit v1.2.3-71-gd317 From e9838bd51169af87ae248336d4c3fc59184a0e46 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 13 Nov 2019 18:12:01 +0100 Subject: irq_work: Fix IRQ_WORK_BUSY bit clearing While attempting to clear the busy bit at the end of a work execution, atomic_cmpxchg() expects the value of the flags with the pending bit cleared as the old value. However by mistake the value of the flags is passed without clearing the pending bit first. As a result, clearing the busy bit fails and irq_work_sync() may stall: watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [blktrace:4948] CPU: 0 PID: 4948 Comm: blktrace Not tainted 5.4.0-rc7-00003-gfeb4a51323bab #1 RIP: 0010:irq_work_sync+0x4/0x10 Call Trace: relay_close_buf+0x19/0x50 relay_close+0x64/0x100 blk_trace_free+0x1f/0x50 __blk_trace_remove+0x1e/0x30 blk_trace_ioctl+0x11b/0x140 blkdev_ioctl+0x6c1/0xa40 block_ioctl+0x39/0x40 do_vfs_ioctl+0xa5/0x700 ksys_ioctl+0x70/0x80 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x5b/0x1d0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 So clear the appropriate bit before passing the old flags to cmpxchg(). Fixes: feb4a51323ba ("irq_work: Slightly simplify IRQ_WORK_PENDING clearing") Reported-by: kernel test robot Reported-by: Leonard Crestez Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Tested-by: Leonard Crestez Link: https://lkml.kernel.org/r/20191113171201.14032-1-frederic@kernel.org --- kernel/irq_work.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 49c53f80a13a..828cc30774bc 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -158,6 +158,7 @@ static void irq_work_run_list(struct llist_head *list) * Clear the BUSY bit and return to the free state if * no-one else claimed it meanwhile. */ + flags &= ~IRQ_WORK_PENDING; (void)atomic_cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY); } } -- cgit v1.2.3-71-gd317 From 20a15ee040f23bd553d4e6bbb1f8724ccd282abc Mon Sep 17 00:00:00 2001 From: luanshi Date: Wed, 13 Nov 2019 22:41:33 +0800 Subject: genirq: Fix function documentation of __irq_alloc_descs() The function got renamed at some point, but the kernel-doc was not updated. Signed-off-by: Liguang Zhang Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/1573656093-8643-1-git-send-email-zhangliguang@linux.alibaba.com --- kernel/irq/irqdesc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 9be995fc3c5a..5b8fdd659e54 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -750,7 +750,7 @@ void irq_free_descs(unsigned int from, unsigned int cnt) EXPORT_SYMBOL_GPL(irq_free_descs); /** - * irq_alloc_descs - allocate and initialize a range of irq descriptors + * __irq_alloc_descs - allocate and initialize a range of irq descriptors * @irq: Allocate for specific irq number if irq >= 0 * @from: Start the search from this irq number * @cnt: Number of consecutive irqs to allocate. -- cgit v1.2.3-71-gd317 From 5d603311615f612320bb77bd2a82553ef1ced5b7 Mon Sep 17 00:00:00 2001 From: Konstantin Khorenko Date: Wed, 13 Nov 2019 12:29:50 +0300 Subject: kernel/module.c: wakeup processes in module_wq on module unload Fix the race between load and unload a kernel module. sys_delete_module() try_stop_module() mod->state = _GOING add_unformed_module() old = find_module_all() (old->state == _GOING => wait_event_interruptible()) During pre-condition finished_loading() rets 0 schedule() (never gets waken up later) free_module() mod->state = _UNFORMED list_del_rcu(&mod->list) (dels mod from "modules" list) return The race above leads to modprobe hanging forever on loading a module. Error paths on loading module call wake_up_all(&module_wq) after freeing module, so let's do the same on straight module unload. Fixes: 6e6de3dee51a ("kernel/module.c: Only return -EEXIST for modules that have finished loading") Reviewed-by: Prarit Bhargava Signed-off-by: Konstantin Khorenko Signed-off-by: Jessica Yu --- kernel/module.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index 26c13173da3d..bdbf95726cb7 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1033,6 +1033,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); free_module(mod); + /* someone could wait for the module in add_unformed_module() */ + wake_up_all(&module_wq); return 0; out: mutex_unlock(&module_mutex); -- cgit v1.2.3-71-gd317 From 4a169a95d885fe5c050bac1a21d43c86ba955bcf Mon Sep 17 00:00:00 2001 From: Maulik Shah Date: Fri, 15 Nov 2019 15:11:49 -0700 Subject: genirq: Introduce irq_chip_get/set_parent_state calls On certain QTI chipsets some GPIOs are direct-connect interrupts to the GIC to be used as regular interrupt lines. When the GPIOs are not used for interrupt generation the interrupt line is disabled. But disabling the interrupt at GIC does not prevent the interrupt to be reported as pending at GIC_ISPEND. Later, when drivers call enable_irq() on the interrupt, an unwanted interrupt occurs. Introduce get and set methods for irqchip's parent to clear it's pending irq state. This then can be invoked by the GPIO interrupt controller on the parents in it hierarchy to clear the interrupt before enabling the interrupt. Signed-off-by: Maulik Shah Signed-off-by: Lina Iyer Signed-off-by: Marc Zyngier Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/1573855915-9841-7-git-send-email-ilina@codeaurora.org [updated commit text and minor code fixes] --- include/linux/irq.h | 6 ++++++ kernel/irq/chip.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) (limited to 'kernel') diff --git a/include/linux/irq.h b/include/linux/irq.h index fb301cf29148..7853eb9301f2 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -610,6 +610,12 @@ extern int irq_chip_pm_put(struct irq_data *data); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY extern void handle_fasteoi_ack_irq(struct irq_desc *desc); extern void handle_fasteoi_mask_irq(struct irq_desc *desc); +extern int irq_chip_set_parent_state(struct irq_data *data, + enum irqchip_irq_state which, + bool val); +extern int irq_chip_get_parent_state(struct irq_data *data, + enum irqchip_irq_state which, + bool *state); extern void irq_chip_enable_parent(struct irq_data *data); extern void irq_chip_disable_parent(struct irq_data *data); extern void irq_chip_ack_parent(struct irq_data *data); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index b76703b2c0af..b3fa2d87d2f3 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1297,6 +1297,50 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_mask_irq); #endif /* CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS */ +/** + * irq_chip_set_parent_state - set the state of a parent interrupt. + * + * @data: Pointer to interrupt specific data + * @which: State to be restored (one of IRQCHIP_STATE_*) + * @val: Value corresponding to @which + * + * Conditional success, if the underlying irqchip does not implement it. + */ +int irq_chip_set_parent_state(struct irq_data *data, + enum irqchip_irq_state which, + bool val) +{ + data = data->parent_data; + + if (!data || !data->chip->irq_set_irqchip_state) + return 0; + + return data->chip->irq_set_irqchip_state(data, which, val); +} +EXPORT_SYMBOL_GPL(irq_chip_set_parent_state); + +/** + * irq_chip_get_parent_state - get the state of a parent interrupt. + * + * @data: Pointer to interrupt specific data + * @which: one of IRQCHIP_STATE_* the caller wants to know + * @state: a pointer to a boolean where the state is to be stored + * + * Conditional success, if the underlying irqchip does not implement it. + */ +int irq_chip_get_parent_state(struct irq_data *data, + enum irqchip_irq_state which, + bool *state) +{ + data = data->parent_data; + + if (!data || !data->chip->irq_get_irqchip_state) + return 0; + + return data->chip->irq_get_irqchip_state(data, which, state); +} +EXPORT_SYMBOL_GPL(irq_chip_get_parent_state); + /** * irq_chip_enable_parent - Enable the parent interrupt (defaults to unmask if * NULL) -- cgit v1.2.3-71-gd317 From 7b8474466ed97be458c825f34a85f2c2b84c3f95 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 00:03:03 +0000 Subject: time: Zero the upper 32-bits in __kernel_timespec on 32-bit On compat interfaces, the high order bits of nanoseconds should be zeroed out. This is because the application code or the libc do not guarantee zeroing of these. If used without zeroing, kernel might be at risk of using timespec values incorrectly. Originally it was handled correctly, but lost during is_compat_syscall() cleanup. Revert the condition back to check CONFIG_64BIT. Fixes: 98f76206b335 ("compat: Cleanup in_compat_syscall() callers") Reported-by: Ben Hutchings Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191121000303.126523-1-dima@arista.com --- kernel/time/time.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/time.c b/kernel/time/time.c index 5c54ca632d08..83f403e7a15c 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -881,7 +881,8 @@ int get_timespec64(struct timespec64 *ts, ts->tv_sec = kts.tv_sec; /* Zero out the padding for 32 bit systems or in compat mode */ - if (IS_ENABLED(CONFIG_64BIT_TIME) && in_compat_syscall()) + if (IS_ENABLED(CONFIG_64BIT_TIME) && (!IS_ENABLED(CONFIG_64BIT) || + in_compat_syscall())) kts.tv_nsec &= 0xFFFFFFFFUL; ts->tv_nsec = kts.tv_nsec; -- cgit v1.2.3-71-gd317 From d0f010434124598988ba1c97fbb0e4e820ff5d8c Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 26 Nov 2019 15:01:06 -0800 Subject: bpf: Fix static checker warning kernel/bpf/btf.c:4023 btf_distill_func_proto() error: potentially dereferencing uninitialized 't'. kernel/bpf/btf.c 4012 nargs = btf_type_vlen(func); 4013 if (nargs >= MAX_BPF_FUNC_ARGS) { 4014 bpf_log(log, 4015 "The function %s has %d arguments. Too many.\n", 4016 tname, nargs); 4017 return -EINVAL; 4018 } 4019 ret = __get_type_size(btf, func->type, &t); ^^ t isn't initialized for the first -EINVAL return This is unlikely path, since BTF should have been validated at this point. Fix it by returning 'void' BTF. Reported-by: Dan Carpenter Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191126230106.237179-1-ast@kernel.org --- kernel/bpf/btf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 40efde5eedcb..bd5e11881ba3 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3976,8 +3976,10 @@ static int __get_type_size(struct btf *btf, u32 btf_id, t = btf_type_by_id(btf, btf_id); while (t && btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); - if (!t) + if (!t) { + *bad_type = btf->types[0]; return -EINVAL; + } if (btf_type_is_ptr(t)) /* kernel size of pointer. Not BPF's size of pointer*/ return sizeof(void *); -- cgit v1.2.3-71-gd317 From ce27709b8162e5c501bc54292b8bf6bdecc4bbd4 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 27 Nov 2019 20:35:08 -0800 Subject: bpf: Fix build in minimal configurations Some kconfigs can have BPF enabled without a single valid program type. In such configurations the build will fail with: ./kernel/bpf/btf.c:3466:1: error: empty enum is invalid Fix it by adding unused value to the enum. Reported-by: Randy Dunlap Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Randy Dunlap # build-tested Link: https://lore.kernel.org/bpf/20191128043508.2346723-1-ast@kernel.org --- kernel/bpf/btf.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index bd5e11881ba3..7d40da240891 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3463,6 +3463,7 @@ enum { __ctx_convert##_id, #include #undef BPF_PROG_TYPE + __ctx_convert_unused, /* to avoid empty enum in extreme .config */ }; static u8 bpf_ctx_convert_map[] = { #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ -- cgit v1.2.3-71-gd317 From 36a8015f89e40f7c9c91cc7e6d028fa288dad27b Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Tue, 26 Nov 2019 17:17:13 +0200 Subject: PM / QoS: Restore DEV_PM_QOS_MIN/MAX_FREQUENCY Support for adding per-device frequency limits was removed in commit 2aac8bdf7a0f ("PM: QoS: Drop frequency QoS types from device PM QoS") after cpufreq switched to use a new "freq_constraints" construct. Restore support for per-device freq limits but base this upon freq_constraints. This is primarily meant to be used by the devfreq subsystem. This removes the "static" marking on freq_qos_apply but does not export it for modules. Signed-off-by: Leonard Crestez Reviewed-by: Matthias Kaehlcke Tested-by: Matthias Kaehlcke Signed-off-by: Rafael J. Wysocki --- drivers/base/power/qos.c | 73 ++++++++++++++++++++++++++++++++++++++++++++---- include/linux/pm_qos.h | 12 ++++++++ kernel/power/qos.c | 4 ++- 3 files changed, 82 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index 350dcafd751f..8e93167f1783 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -115,10 +115,20 @@ s32 dev_pm_qos_read_value(struct device *dev, enum dev_pm_qos_req_type type) spin_lock_irqsave(&dev->power.lock, flags); - if (type == DEV_PM_QOS_RESUME_LATENCY) { + switch (type) { + case DEV_PM_QOS_RESUME_LATENCY: ret = IS_ERR_OR_NULL(qos) ? PM_QOS_RESUME_LATENCY_NO_CONSTRAINT : pm_qos_read_value(&qos->resume_latency); - } else { + break; + case DEV_PM_QOS_MIN_FREQUENCY: + ret = IS_ERR_OR_NULL(qos) ? PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE + : freq_qos_read_value(&qos->freq, FREQ_QOS_MIN); + break; + case DEV_PM_QOS_MAX_FREQUENCY: + ret = IS_ERR_OR_NULL(qos) ? PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE + : freq_qos_read_value(&qos->freq, FREQ_QOS_MAX); + break; + default: WARN_ON(1); ret = 0; } @@ -159,6 +169,10 @@ static int apply_constraint(struct dev_pm_qos_request *req, req->dev->power.set_latency_tolerance(req->dev, value); } break; + case DEV_PM_QOS_MIN_FREQUENCY: + case DEV_PM_QOS_MAX_FREQUENCY: + ret = freq_qos_apply(&req->data.freq, action, value); + break; case DEV_PM_QOS_FLAGS: ret = pm_qos_update_flags(&qos->flags, &req->data.flr, action, value); @@ -209,6 +223,8 @@ static int dev_pm_qos_constraints_allocate(struct device *dev) c->no_constraint_value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT; c->type = PM_QOS_MIN; + freq_constraints_init(&qos->freq); + INIT_LIST_HEAD(&qos->flags.list); spin_lock_irq(&dev->power.lock); @@ -269,6 +285,20 @@ void dev_pm_qos_constraints_destroy(struct device *dev) memset(req, 0, sizeof(*req)); } + c = &qos->freq.min_freq; + plist_for_each_entry_safe(req, tmp, &c->list, data.freq.pnode) { + apply_constraint(req, PM_QOS_REMOVE_REQ, + PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE); + memset(req, 0, sizeof(*req)); + } + + c = &qos->freq.max_freq; + plist_for_each_entry_safe(req, tmp, &c->list, data.freq.pnode) { + apply_constraint(req, PM_QOS_REMOVE_REQ, + PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); + memset(req, 0, sizeof(*req)); + } + f = &qos->flags; list_for_each_entry_safe(req, tmp, &f->list, data.flr.node) { apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); @@ -314,11 +344,22 @@ static int __dev_pm_qos_add_request(struct device *dev, ret = dev_pm_qos_constraints_allocate(dev); trace_dev_pm_qos_add_request(dev_name(dev), type, value); - if (!ret) { - req->dev = dev; - req->type = type; + if (ret) + return ret; + + req->dev = dev; + req->type = type; + if (req->type == DEV_PM_QOS_MIN_FREQUENCY) + ret = freq_qos_add_request(&dev->power.qos->freq, + &req->data.freq, + FREQ_QOS_MIN, value); + else if (req->type == DEV_PM_QOS_MAX_FREQUENCY) + ret = freq_qos_add_request(&dev->power.qos->freq, + &req->data.freq, + FREQ_QOS_MAX, value); + else ret = apply_constraint(req, PM_QOS_ADD_REQ, value); - } + return ret; } @@ -382,6 +423,10 @@ static int __dev_pm_qos_update_request(struct dev_pm_qos_request *req, case DEV_PM_QOS_LATENCY_TOLERANCE: curr_value = req->data.pnode.prio; break; + case DEV_PM_QOS_MIN_FREQUENCY: + case DEV_PM_QOS_MAX_FREQUENCY: + curr_value = req->data.freq.pnode.prio; + break; case DEV_PM_QOS_FLAGS: curr_value = req->data.flr.flags; break; @@ -507,6 +552,14 @@ int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier, ret = blocking_notifier_chain_register(dev->power.qos->resume_latency.notifiers, notifier); break; + case DEV_PM_QOS_MIN_FREQUENCY: + ret = freq_qos_add_notifier(&dev->power.qos->freq, + FREQ_QOS_MIN, notifier); + break; + case DEV_PM_QOS_MAX_FREQUENCY: + ret = freq_qos_add_notifier(&dev->power.qos->freq, + FREQ_QOS_MAX, notifier); + break; default: WARN_ON(1); ret = -EINVAL; @@ -546,6 +599,14 @@ int dev_pm_qos_remove_notifier(struct device *dev, ret = blocking_notifier_chain_unregister(dev->power.qos->resume_latency.notifiers, notifier); break; + case DEV_PM_QOS_MIN_FREQUENCY: + ret = freq_qos_remove_notifier(&dev->power.qos->freq, + FREQ_QOS_MIN, notifier); + break; + case DEV_PM_QOS_MAX_FREQUENCY: + ret = freq_qos_remove_notifier(&dev->power.qos->freq, + FREQ_QOS_MAX, notifier); + break; default: WARN_ON(1); ret = -EINVAL; diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 678fec6da5b9..19eafca5680e 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -34,6 +34,8 @@ enum pm_qos_flags_status { #define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT PM_QOS_LATENCY_ANY #define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS PM_QOS_LATENCY_ANY_NS #define PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE 0 +#define PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE 0 +#define PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE FREQ_QOS_MAX_DEFAULT_VALUE #define PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT (-1) #define PM_QOS_FLAG_NO_POWER_OFF (1 << 0) @@ -101,6 +103,8 @@ struct freq_qos_request { enum dev_pm_qos_req_type { DEV_PM_QOS_RESUME_LATENCY = 1, DEV_PM_QOS_LATENCY_TOLERANCE, + DEV_PM_QOS_MIN_FREQUENCY, + DEV_PM_QOS_MAX_FREQUENCY, DEV_PM_QOS_FLAGS, }; @@ -109,6 +113,7 @@ struct dev_pm_qos_request { union { struct plist_node pnode; struct pm_qos_flags_request flr; + struct freq_qos_request freq; } data; struct device *dev; }; @@ -116,6 +121,7 @@ struct dev_pm_qos_request { struct dev_pm_qos { struct pm_qos_constraints resume_latency; struct pm_qos_constraints latency_tolerance; + struct freq_constraints freq; struct pm_qos_flags flags; struct dev_pm_qos_request *resume_latency_req; struct dev_pm_qos_request *latency_tolerance_req; @@ -214,6 +220,10 @@ static inline s32 dev_pm_qos_read_value(struct device *dev, switch (type) { case DEV_PM_QOS_RESUME_LATENCY: return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; + case DEV_PM_QOS_MIN_FREQUENCY: + return PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE; + case DEV_PM_QOS_MAX_FREQUENCY: + return PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE; default: WARN_ON(1); return 0; @@ -293,6 +303,8 @@ int freq_qos_add_request(struct freq_constraints *qos, enum freq_qos_req_type type, s32 value); int freq_qos_update_request(struct freq_qos_request *req, s32 new_value); int freq_qos_remove_request(struct freq_qos_request *req); +int freq_qos_apply(struct freq_qos_request *req, + enum pm_qos_req_action action, s32 value); int freq_qos_add_notifier(struct freq_constraints *qos, enum freq_qos_req_type type, diff --git a/kernel/power/qos.c b/kernel/power/qos.c index a45cba7df0ae..83edf8698118 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -714,8 +714,10 @@ s32 freq_qos_read_value(struct freq_constraints *qos, * @req: Constraint request to apply. * @action: Action to perform (add/update/remove). * @value: Value to assign to the QoS request. + * + * This is only meant to be called from inside pm_qos, not drivers. */ -static int freq_qos_apply(struct freq_qos_request *req, +int freq_qos_apply(struct freq_qos_request *req, enum pm_qos_req_action action, s32 value) { int ret; -- cgit v1.2.3-71-gd317 From 6c3edaf9fd6a3be7fb5bc6931897c24cd3848f84 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 29 Nov 2019 20:52:18 -0800 Subject: tracing: Introduce trace event injection We have been trying to use rasdaemon to monitor hardware errors like correctable memory errors. rasdaemon uses trace events to monitor various hardware errors. In order to test it, we have to inject some hardware errors, unfortunately not all of them provide error injections. MCE does provide a way to inject MCE errors, but errors like PCI error and devlink error don't, it is not easy to add error injection to each of them. Instead, it is relatively easier to just allow users to inject trace events in a generic way so that all trace events can be injected. This patch introduces trace event injection, where a new 'inject' is added to each tracepoint directory. Users could write into this file with key=value pairs to specify the value of each fields of the trace event, all unspecified fields are set to zero values by default. For example, for the net/net_dev_queue tracepoint, we can inject: INJECT=/sys/kernel/debug/tracing/events/net/net_dev_queue/inject echo "" > $INJECT echo "name='test'" > $INJECT echo "name='test' len=1024" > $INJECT cat /sys/kernel/debug/tracing/trace ... <...>-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0 <...>-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0 <...>-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024 Triggers could be triggered as usual too: echo "stacktrace if len == 1025" > /sys/kernel/debug/tracing/events/net/net_dev_queue/trigger echo "len=1025" > $INJECT cat /sys/kernel/debug/tracing/trace ... bash-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0 bash-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0 bash-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024 bash-614 [001] .N.1 284.236349: => event_inject_write => vfs_write => ksys_write => do_syscall_64 => entry_SYSCALL_64_after_hwframe The only thing that can't be injected is string pointers as they require constant string pointers, this can't be done at run time. Link: http://lkml.kernel.org/r/20191130045218.18979-1-xiyou.wangcong@gmail.com Cc: Ingo Molnar Signed-off-by: Cong Wang Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/Kconfig | 9 + kernel/trace/Makefile | 1 + kernel/trace/trace.h | 1 + kernel/trace/trace_events.c | 6 + kernel/trace/trace_events_inject.c | 331 +++++++++++++++++++++++++++++++++++++ 5 files changed, 348 insertions(+) create mode 100644 kernel/trace/trace_events_inject.c (limited to 'kernel') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index f67620499faa..29a9c5058b62 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -672,6 +672,15 @@ config HIST_TRIGGERS See Documentation/trace/histogram.rst. If in doubt, say N. +config TRACE_EVENT_INJECT + bool "Trace event injection" + depends on TRACING + help + Allow user-space to inject a specific trace event into the ring + buffer. This is mainly used for testing purpose. + + If unsure, say N. + config MMIOTRACE_TEST tristate "Test module for mmiotrace" depends on MMIOTRACE && m diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index c2b2148bb1d2..0e63db62225f 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -69,6 +69,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o endif obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o +obj-$(CONFIG_TRACE_EVENT_INJECT) += trace_events_inject.o obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o obj-$(CONFIG_KPROBE_EVENTS) += trace_kprobe.o diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index ca7fccafbcbb..63bf60f79398 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1601,6 +1601,7 @@ extern struct list_head ftrace_events; extern const struct file_operations event_trigger_fops; extern const struct file_operations event_hist_fops; +extern const struct file_operations event_inject_fops; #ifdef CONFIG_HIST_TRIGGERS extern int register_trigger_hist_cmd(void); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 6b3a69e9aa6a..c6de3cebc127 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2044,6 +2044,12 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file) trace_create_file("format", 0444, file->dir, call, &ftrace_event_format_fops); +#ifdef CONFIG_TRACE_EVENT_INJECT + if (call->event.type && call->class->reg) + trace_create_file("inject", 0200, file->dir, file, + &event_inject_fops); +#endif + return 0; } diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c new file mode 100644 index 000000000000..d43710718ee5 --- /dev/null +++ b/kernel/trace/trace_events_inject.c @@ -0,0 +1,331 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * trace_events_inject - trace event injection + * + * Copyright (C) 2019 Cong Wang + */ + +#include +#include +#include +#include +#include + +#include "trace.h" + +static int +trace_inject_entry(struct trace_event_file *file, void *rec, int len) +{ + struct trace_event_buffer fbuffer; + struct ring_buffer *buffer; + int written = 0; + void *entry; + + rcu_read_lock_sched(); + buffer = file->tr->trace_buffer.buffer; + entry = trace_event_buffer_reserve(&fbuffer, file, len); + if (entry) { + memcpy(entry, rec, len); + written = len; + trace_event_buffer_commit(&fbuffer); + } + rcu_read_unlock_sched(); + + return written; +} + +static int +parse_field(char *str, struct trace_event_call *call, + struct ftrace_event_field **pf, u64 *pv) +{ + struct ftrace_event_field *field; + char *field_name; + int s, i = 0; + int len; + u64 val; + + if (!str[i]) + return 0; + /* First find the field to associate to */ + while (isspace(str[i])) + i++; + s = i; + while (isalnum(str[i]) || str[i] == '_') + i++; + len = i - s; + if (!len) + return -EINVAL; + + field_name = kmemdup_nul(str + s, len, GFP_KERNEL); + if (!field_name) + return -ENOMEM; + field = trace_find_event_field(call, field_name); + kfree(field_name); + if (!field) + return -ENOENT; + + *pf = field; + while (isspace(str[i])) + i++; + if (str[i] != '=') + return -EINVAL; + i++; + while (isspace(str[i])) + i++; + s = i; + if (isdigit(str[i]) || str[i] == '-') { + char *num, c; + int ret; + + /* Make sure the field is not a string */ + if (is_string_field(field)) + return -EINVAL; + + if (str[i] == '-') + i++; + + /* We allow 0xDEADBEEF */ + while (isalnum(str[i])) + i++; + num = str + s; + c = str[i]; + if (c != '\0' && !isspace(c)) + return -EINVAL; + str[i] = '\0'; + /* Make sure it is a value */ + if (field->is_signed) + ret = kstrtoll(num, 0, &val); + else + ret = kstrtoull(num, 0, &val); + str[i] = c; + if (ret) + return ret; + + *pv = val; + return i; + } else if (str[i] == '\'' || str[i] == '"') { + char q = str[i]; + + /* Make sure the field is OK for strings */ + if (!is_string_field(field)) + return -EINVAL; + + for (i++; str[i]; i++) { + if (str[i] == '\\' && str[i + 1]) { + i++; + continue; + } + if (str[i] == q) + break; + } + if (!str[i]) + return -EINVAL; + + /* Skip quotes */ + s++; + len = i - s; + if (len >= MAX_FILTER_STR_VAL) + return -EINVAL; + + *pv = (unsigned long)(str + s); + str[i] = 0; + /* go past the last quote */ + i++; + return i; + } + + return -EINVAL; +} + +static int trace_get_entry_size(struct trace_event_call *call) +{ + struct ftrace_event_field *field; + struct list_head *head; + int size = 0; + + head = trace_get_fields(call); + list_for_each_entry(field, head, link) { + if (field->size + field->offset > size) + size = field->size + field->offset; + } + + return size; +} + +static void *trace_alloc_entry(struct trace_event_call *call, int *size) +{ + int entry_size = trace_get_entry_size(call); + struct ftrace_event_field *field; + struct list_head *head; + void *entry = NULL; + + /* We need an extra '\0' at the end. */ + entry = kzalloc(entry_size + 1, GFP_KERNEL); + if (!entry) + return NULL; + + head = trace_get_fields(call); + list_for_each_entry(field, head, link) { + if (!is_string_field(field)) + continue; + if (field->filter_type == FILTER_STATIC_STRING) + continue; + if (field->filter_type == FILTER_DYN_STRING) { + u32 *str_item; + int str_loc = entry_size & 0xffff; + + str_item = (u32 *)(entry + field->offset); + *str_item = str_loc; /* string length is 0. */ + } else { + char **paddr; + + paddr = (char **)(entry + field->offset); + *paddr = ""; + } + } + + *size = entry_size + 1; + return entry; +} + +#define INJECT_STRING "STATIC STRING CAN NOT BE INJECTED" + +/* Caller is responsible to free the *pentry. */ +static int parse_entry(char *str, struct trace_event_call *call, void **pentry) +{ + struct ftrace_event_field *field; + unsigned long irq_flags; + void *entry = NULL; + int entry_size; + u64 val; + int len; + + entry = trace_alloc_entry(call, &entry_size); + *pentry = entry; + if (!entry) + return -ENOMEM; + + local_save_flags(irq_flags); + tracing_generic_entry_update(entry, call->event.type, irq_flags, + preempt_count()); + + while ((len = parse_field(str, call, &field, &val)) > 0) { + if (is_function_field(field)) + return -EINVAL; + + if (is_string_field(field)) { + char *addr = (char *)(unsigned long) val; + + if (field->filter_type == FILTER_STATIC_STRING) { + strlcpy(entry + field->offset, addr, field->size); + } else if (field->filter_type == FILTER_DYN_STRING) { + int str_len = strlen(addr) + 1; + int str_loc = entry_size & 0xffff; + u32 *str_item; + + entry_size += str_len; + *pentry = krealloc(entry, entry_size, GFP_KERNEL); + if (!*pentry) { + kfree(entry); + return -ENOMEM; + } + entry = *pentry; + + strlcpy(entry + (entry_size - str_len), addr, str_len); + str_item = (u32 *)(entry + field->offset); + *str_item = (str_len << 16) | str_loc; + } else { + char **paddr; + + paddr = (char **)(entry + field->offset); + *paddr = INJECT_STRING; + } + } else { + switch (field->size) { + case 1: { + u8 tmp = (u8) val; + + memcpy(entry + field->offset, &tmp, 1); + break; + } + case 2: { + u16 tmp = (u16) val; + + memcpy(entry + field->offset, &tmp, 2); + break; + } + case 4: { + u32 tmp = (u32) val; + + memcpy(entry + field->offset, &tmp, 4); + break; + } + case 8: + memcpy(entry + field->offset, &val, 8); + break; + default: + return -EINVAL; + } + } + + str += len; + } + + if (len < 0) + return len; + + return entry_size; +} + +static ssize_t +event_inject_write(struct file *filp, const char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct trace_event_call *call; + struct trace_event_file *file; + int err = -ENODEV, size; + void *entry = NULL; + char *buf; + + if (cnt >= PAGE_SIZE) + return -EINVAL; + + buf = memdup_user_nul(ubuf, cnt); + if (IS_ERR(buf)) + return PTR_ERR(buf); + strim(buf); + + mutex_lock(&event_mutex); + file = event_file_data(filp); + if (file) { + call = file->event_call; + size = parse_entry(buf, call, &entry); + if (size < 0) + err = size; + else + err = trace_inject_entry(file, entry, size); + } + mutex_unlock(&event_mutex); + + kfree(entry); + kfree(buf); + + if (err < 0) + return err; + + *ppos += err; + return cnt; +} + +static ssize_t +event_inject_read(struct file *file, char __user *buf, size_t size, + loff_t *ppos) +{ + return -EPERM; +} + +const struct file_operations event_inject_fops = { + .open = tracing_open_generic, + .read = event_inject_read, + .write = event_inject_write, +}; -- cgit v1.2.3-71-gd317 From a356646a56857c2e5ad875beec734d7145ecd49a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 2 Dec 2019 16:25:27 -0500 Subject: tracing: Do not create directories if lockdown is in affect If lockdown is disabling tracing on boot up, it prevents the tracing files from even bering created. But when that happens, there's several places that will give a warning that the files were not created as that is usually a sign of a bug. Add in strategic locations where a check is made to see if tracing is disabled by lockdown, and if it is, do not go further, and fail silently (but print that tracing is disabled by lockdown, without doing a WARN_ON()). Cc: Matthew Garrett Fixes: 17911ff38aa5 ("tracing: Add locked_down checks to the open calls of files created for tracefs") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 6 ++++++ kernel/trace/trace.c | 17 +++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 66358d66c933..4bf050fcfe3b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include /* for self test */ @@ -5068,6 +5069,11 @@ static __init int test_ringbuffer(void) int cpu; int ret = 0; + if (security_locked_down(LOCKDOWN_TRACEFS)) { + pr_warning("Lockdown is enabled, skipping ring buffer tests\n"); + return 0; + } + pr_info("Running ring buffer tests...\n"); buffer = ring_buffer_alloc(RB_TEST_BUFFER_SIZE, RB_FL_OVERWRITE); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 02a23a6e5e00..23459d53d576 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1888,6 +1888,12 @@ int __init register_tracer(struct tracer *type) return -1; } + if (security_locked_down(LOCKDOWN_TRACEFS)) { + pr_warning("Can not register tracer %s due to lockdown\n", + type->name); + return -EPERM; + } + mutex_lock(&trace_types_lock); tracing_selftest_running = true; @@ -8789,6 +8795,11 @@ struct dentry *tracing_init_dentry(void) { struct trace_array *tr = &global_trace; + if (security_locked_down(LOCKDOWN_TRACEFS)) { + pr_warning("Tracing disabled due to lockdown\n"); + return ERR_PTR(-EPERM); + } + /* The top level trace array uses NULL as parent */ if (tr->dir) return NULL; @@ -9231,6 +9242,12 @@ __init static int tracer_alloc_buffers(void) int ring_buf_size; int ret = -ENOMEM; + + if (security_locked_down(LOCKDOWN_TRACEFS)) { + pr_warning("Tracing disabled due to lockdown\n"); + return -EPERM; + } + /* * Make sure we don't accidently add more trace options * than we have bits for. -- cgit v1.2.3-71-gd317 From 1a50cb80f219c44adb6265f5071b81fc3c1deced Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Wed, 4 Dec 2019 16:50:39 -0800 Subject: kernel/notifier.c: intercept duplicate registrations to avoid infinite loops Registering the same notifier to a hook repeatedly can cause the hook list to form a ring or lose other members of the list. case1: An infinite loop in notifier_chain_register() can cause soft lockup atomic_notifier_chain_register(&test_notifier_list, &test1); atomic_notifier_chain_register(&test_notifier_list, &test1); atomic_notifier_chain_register(&test_notifier_list, &test2); case2: An infinite loop in notifier_chain_register() can cause soft lockup atomic_notifier_chain_register(&test_notifier_list, &test1); atomic_notifier_chain_register(&test_notifier_list, &test1); atomic_notifier_call_chain(&test_notifier_list, 0, NULL); case3: lose other hook test2 atomic_notifier_chain_register(&test_notifier_list, &test1); atomic_notifier_chain_register(&test_notifier_list, &test2); atomic_notifier_chain_register(&test_notifier_list, &test1); case4: Unregister returns 0, but the hook is still in the linked list, and it is not really registered. If you call notifier_call_chain after ko is unloaded, it will trigger oops. If the system is configured with softlockup_panic and the same hook is repeatedly registered on the panic_notifier_list, it will cause a loop panic. Add a check in notifier_chain_register(), intercepting duplicate registrations to avoid infinite loops Link: http://lkml.kernel.org/r/1568861888-34045-2-git-send-email-nixiaoming@huawei.com Signed-off-by: Xiaoming Ni Reviewed-by: Vasily Averin Reviewed-by: Andrew Morton Cc: Alexey Dobriyan Cc: Anna Schumaker Cc: Arjan van de Ven Cc: J. Bruce Fields Cc: Chuck Lever Cc: David S. Miller Cc: Jeff Layton Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Nadia Derbey Cc: "Paul E. McKenney" Cc: Sam Protsenko Cc: Alan Stern Cc: Thomas Gleixner Cc: Trond Myklebust Cc: Viresh Kumar Cc: Xiaoming Ni Cc: YueHaibing Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/notifier.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/notifier.c b/kernel/notifier.c index d9f5081d578d..30bedb8be6dd 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -23,7 +23,10 @@ static int notifier_chain_register(struct notifier_block **nl, struct notifier_block *n) { while ((*nl) != NULL) { - WARN_ONCE(((*nl) == n), "double register detected"); + if (unlikely((*nl) == n)) { + WARN(1, "double register detected"); + return 0; + } if (n->priority > (*nl)->priority) break; nl = &((*nl)->next); -- cgit v1.2.3-71-gd317 From 5adaabb65a267d890b29193af2dbc38a3b85bbf2 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Wed, 4 Dec 2019 16:50:43 -0800 Subject: kernel/notifier.c: remove notifier_chain_cond_register() The only difference between notifier_chain_cond_register() and notifier_chain_register() is the lack of warning hints for duplicate registrations. Use notifier_chain_register() instead of notifier_chain_cond_register() to avoid duplicate code Link: http://lkml.kernel.org/r/1568861888-34045-3-git-send-email-nixiaoming@huawei.com Signed-off-by: Xiaoming Ni Reviewed-by: Andrew Morton Cc: Alan Stern Cc: Alexey Dobriyan Cc: Andy Lutomirski Cc: Anna Schumaker Cc: Arjan van de Ven Cc: Chuck Lever Cc: David S. Miller Cc: Ingo Molnar Cc: J. Bruce Fields Cc: Jeff Layton Cc: Nadia Derbey Cc: "Paul E. McKenney" Cc: Sam Protsenko Cc: Thomas Gleixner Cc: Trond Myklebust Cc: Vasily Averin Cc: Viresh Kumar Cc: YueHaibing Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/notifier.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'kernel') diff --git a/kernel/notifier.c b/kernel/notifier.c index 30bedb8be6dd..e3d221f092fe 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -36,21 +36,6 @@ static int notifier_chain_register(struct notifier_block **nl, return 0; } -static int notifier_chain_cond_register(struct notifier_block **nl, - struct notifier_block *n) -{ - while ((*nl) != NULL) { - if ((*nl) == n) - return 0; - if (n->priority > (*nl)->priority) - break; - nl = &((*nl)->next); - } - n->next = *nl; - rcu_assign_pointer(*nl, n); - return 0; -} - static int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n) { @@ -252,7 +237,7 @@ int blocking_notifier_chain_cond_register(struct blocking_notifier_head *nh, int ret; down_write(&nh->rwsem); - ret = notifier_chain_cond_register(&nh->head, n); + ret = notifier_chain_register(&nh->head, n); up_write(&nh->rwsem); return ret; } -- cgit v1.2.3-71-gd317 From 260a2679e5cbfb3d8a4cf6cd1cb6f57e89c7e543 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Wed, 4 Dec 2019 16:50:47 -0800 Subject: kernel/notifier.c: remove blocking_notifier_chain_cond_register() blocking_notifier_chain_cond_register() does not consider system_booting state, which is the only difference between this function and blocking_notifier_cain_register(). This can be a bug and is a piece of duplicate code. Delete blocking_notifier_chain_cond_register() Link: http://lkml.kernel.org/r/1568861888-34045-4-git-send-email-nixiaoming@huawei.com Signed-off-by: Xiaoming Ni Reviewed-by: Andrew Morton Cc: Alan Stern Cc: Alexey Dobriyan Cc: Andy Lutomirski Cc: Anna Schumaker Cc: Arjan van de Ven Cc: Chuck Lever Cc: David S. Miller Cc: Ingo Molnar Cc: J. Bruce Fields Cc: Jeff Layton Cc: Nadia Derbey Cc: "Paul E. McKenney" Cc: Sam Protsenko Cc: Thomas Gleixner Cc: Trond Myklebust Cc: Vasily Averin Cc: Viresh Kumar Cc: YueHaibing Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/notifier.h | 4 ---- kernel/notifier.c | 23 ----------------------- net/sunrpc/rpc_pipe.c | 2 +- 3 files changed, 1 insertion(+), 28 deletions(-) (limited to 'kernel') diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 0096a05395e3..018947611483 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -150,10 +150,6 @@ extern int raw_notifier_chain_register(struct raw_notifier_head *nh, extern int srcu_notifier_chain_register(struct srcu_notifier_head *nh, struct notifier_block *nb); -extern int blocking_notifier_chain_cond_register( - struct blocking_notifier_head *nh, - struct notifier_block *nb); - extern int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, struct notifier_block *nb); extern int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh, diff --git a/kernel/notifier.c b/kernel/notifier.c index e3d221f092fe..63d7501ac638 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -220,29 +220,6 @@ int blocking_notifier_chain_register(struct blocking_notifier_head *nh, } EXPORT_SYMBOL_GPL(blocking_notifier_chain_register); -/** - * blocking_notifier_chain_cond_register - Cond add notifier to a blocking notifier chain - * @nh: Pointer to head of the blocking notifier chain - * @n: New entry in notifier chain - * - * Adds a notifier to a blocking notifier chain, only if not already - * present in the chain. - * Must be called in process context. - * - * Currently always returns zero. - */ -int blocking_notifier_chain_cond_register(struct blocking_notifier_head *nh, - struct notifier_block *n) -{ - int ret; - - down_write(&nh->rwsem); - ret = notifier_chain_register(&nh->head, n); - up_write(&nh->rwsem); - return ret; -} -EXPORT_SYMBOL_GPL(blocking_notifier_chain_cond_register); - /** * blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain * @nh: Pointer to head of the blocking notifier chain diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index b71a39ded930..39e14d5edaf1 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -51,7 +51,7 @@ static BLOCKING_NOTIFIER_HEAD(rpc_pipefs_notifier_list); int rpc_pipefs_notifier_register(struct notifier_block *nb) { - return blocking_notifier_chain_cond_register(&rpc_pipefs_notifier_list, nb); + return blocking_notifier_chain_register(&rpc_pipefs_notifier_list, nb); } EXPORT_SYMBOL_GPL(rpc_pipefs_notifier_register); -- cgit v1.2.3-71-gd317 From ef70eff9dea66f38f8c2c2dcc7fe4b7a2bbb4921 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 4 Dec 2019 16:50:50 -0800 Subject: kernel/profile.c: use cpumask_available to check for NULL cpumask When building with clang + -Wtautological-pointer-compare, these instances pop up: kernel/profile.c:339:6: warning: comparison of array 'prof_cpu_mask' not equal to a null pointer is always true [-Wtautological-pointer-compare] if (prof_cpu_mask != NULL) ^~~~~~~~~~~~~ ~~~~ kernel/profile.c:376:6: warning: comparison of array 'prof_cpu_mask' not equal to a null pointer is always true [-Wtautological-pointer-compare] if (prof_cpu_mask != NULL) ^~~~~~~~~~~~~ ~~~~ kernel/profile.c:406:26: warning: comparison of array 'prof_cpu_mask' not equal to a null pointer is always true [-Wtautological-pointer-compare] if (!user_mode(regs) && prof_cpu_mask != NULL && ^~~~~~~~~~~~~ ~~~~ 3 warnings generated. This can be addressed with the cpumask_available helper, introduced in commit f7e30f01a9e2 ("cpumask: Add helper cpumask_available()") to fix warnings like this while keeping the code the same. Link: https://github.com/ClangBuiltLinux/linux/issues/747 Link: http://lkml.kernel.org/r/20191022191957.9554-1-natechancellor@gmail.com Signed-off-by: Nathan Chancellor Reviewed-by: Andrew Morton Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/profile.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/profile.c b/kernel/profile.c index af7c94bf5fa1..4b144b02ca5d 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -336,7 +336,7 @@ static int profile_dead_cpu(unsigned int cpu) struct page *page; int i; - if (prof_cpu_mask != NULL) + if (cpumask_available(prof_cpu_mask)) cpumask_clear_cpu(cpu, prof_cpu_mask); for (i = 0; i < 2; i++) { @@ -373,7 +373,7 @@ static int profile_prepare_cpu(unsigned int cpu) static int profile_online_cpu(unsigned int cpu) { - if (prof_cpu_mask != NULL) + if (cpumask_available(prof_cpu_mask)) cpumask_set_cpu(cpu, prof_cpu_mask); return 0; @@ -403,7 +403,7 @@ void profile_tick(int type) { struct pt_regs *regs = get_irq_regs(); - if (!user_mode(regs) && prof_cpu_mask != NULL && + if (!user_mode(regs) && cpumask_available(prof_cpu_mask) && cpumask_test_cpu(smp_processor_id(), prof_cpu_mask)) profile_hit(type, (void *)profile_pc(regs)); } -- cgit v1.2.3-71-gd317 From 5e1aada08cd19ea652b2d32a250501d09b02ff2e Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 4 Dec 2019 16:50:53 -0800 Subject: kernel/sys.c: avoid copying possible padding bytes in copy_to_user Initialization is not guaranteed to zero padding bytes so use an explicit memset instead to avoid leaking any kernel content in any possible padding bytes. Link: http://lkml.kernel.org/r/dfa331c00881d61c8ee51577a082d8bebd61805c.camel@perches.com Signed-off-by: Joe Perches Cc: Dan Carpenter Cc: Julia Lawall Cc: Thomas Gleixner Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index d3aef31e24dc..a9331f101883 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1279,11 +1279,13 @@ SYSCALL_DEFINE1(uname, struct old_utsname __user *, name) SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name) { - struct oldold_utsname tmp = {}; + struct oldold_utsname tmp; if (!name) return -EFAULT; + memset(&tmp, 0, sizeof(tmp)); + down_read(&uts_sem); memcpy(&tmp.sysname, &utsname()->sysname, __OLD_UTS_LEN); memcpy(&tmp.nodename, &utsname()->nodename, __OLD_UTS_LEN); -- cgit v1.2.3-71-gd317 From 964975ac6677c97ae61ec9d6969dd5d03f18d1c3 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Wed, 4 Dec 2019 16:52:03 -0800 Subject: lib/genalloc.c: rename addr_in_gen_pool to gen_pool_has_addr Follow the kernel conventions, rename addr_in_gen_pool to gen_pool_has_addr. [sjhuang@iluvatar.ai: fix Documentation/ too] Link: http://lkml.kernel.org/r/20181229015914.5573-1-sjhuang@iluvatar.ai Link: http://lkml.kernel.org/r/20181228083950.20398-1-sjhuang@iluvatar.ai Signed-off-by: Huang Shijie Reviewed-by: Andrew Morton Cc: Russell King Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Christoph Hellwig Cc: Marek Szyprowski Cc: Robin Murphy Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/core-api/genalloc.rst | 2 +- arch/arm/mm/dma-mapping.c | 2 +- drivers/misc/sram-exec.c | 2 +- include/linux/genalloc.h | 2 +- kernel/dma/remap.c | 2 +- lib/genalloc.c | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/Documentation/core-api/genalloc.rst b/Documentation/core-api/genalloc.rst index 098a46f55798..a5af2cbf58a5 100644 --- a/Documentation/core-api/genalloc.rst +++ b/Documentation/core-api/genalloc.rst @@ -129,7 +129,7 @@ writing of special-purpose memory allocators in the future. :functions: gen_pool_for_each_chunk .. kernel-doc:: lib/genalloc.c - :functions: addr_in_gen_pool + :functions: gen_pool_has_addr .. kernel-doc:: lib/genalloc.c :functions: gen_pool_avail diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 1df6eb42f22e..e822af0d9219 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -529,7 +529,7 @@ static void *__alloc_from_pool(size_t size, struct page **ret_page) static bool __in_atomic_pool(void *start, size_t size) { - return addr_in_gen_pool(atomic_pool, (unsigned long)start, size); + return gen_pool_has_addr(atomic_pool, (unsigned long)start, size); } static int __free_from_pool(void *start, size_t size) diff --git a/drivers/misc/sram-exec.c b/drivers/misc/sram-exec.c index 426ad912b441..d054e2842a5f 100644 --- a/drivers/misc/sram-exec.c +++ b/drivers/misc/sram-exec.c @@ -96,7 +96,7 @@ void *sram_exec_copy(struct gen_pool *pool, void *dst, void *src, if (!part) return NULL; - if (!addr_in_gen_pool(pool, (unsigned long)dst, size)) + if (!gen_pool_has_addr(pool, (unsigned long)dst, size)) return NULL; base = (unsigned long)part->base; diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 4bd583bd6934..5b14a0f38124 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -206,7 +206,7 @@ extern struct gen_pool *devm_gen_pool_create(struct device *dev, int min_alloc_order, int nid, const char *name); extern struct gen_pool *gen_pool_get(struct device *dev, const char *name); -bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start, +extern bool gen_pool_has_addr(struct gen_pool *pool, unsigned long start, size_t size); #ifdef CONFIG_OF diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c index d47bd40fc0f5..d14cbc83986a 100644 --- a/kernel/dma/remap.c +++ b/kernel/dma/remap.c @@ -178,7 +178,7 @@ bool dma_in_atomic_pool(void *start, size_t size) if (unlikely(!atomic_pool)) return false; - return addr_in_gen_pool(atomic_pool, (unsigned long)start, size); + return gen_pool_has_addr(atomic_pool, (unsigned long)start, size); } void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags) diff --git a/lib/genalloc.c b/lib/genalloc.c index af9a57422186..7f1244b5294a 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -540,7 +540,7 @@ void gen_pool_for_each_chunk(struct gen_pool *pool, EXPORT_SYMBOL(gen_pool_for_each_chunk); /** - * addr_in_gen_pool - checks if an address falls within the range of a pool + * gen_pool_has_addr - checks if an address falls within the range of a pool * @pool: the generic memory pool * @start: start address * @size: size of the region @@ -548,7 +548,7 @@ EXPORT_SYMBOL(gen_pool_for_each_chunk); * Check if the range of addresses falls within the specified pool. Returns * true if the entire range is contained in the pool and false otherwise. */ -bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start, +bool gen_pool_has_addr(struct gen_pool *pool, unsigned long start, size_t size) { bool found = false; @@ -567,7 +567,7 @@ bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start, rcu_read_unlock(); return found; } -EXPORT_SYMBOL(addr_in_gen_pool); +EXPORT_SYMBOL(gen_pool_has_addr); /** * gen_pool_avail - get available free space of the pool -- cgit v1.2.3-71-gd317 From eec028c9386ed1a692aa01a85b55952202b41619 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 4 Dec 2019 16:52:43 -0800 Subject: kcov: remote coverage support Patch series " kcov: collect coverage from usb and vhost", v3. This patchset extends kcov to allow collecting coverage from backgound kernel threads. This extension requires custom annotations for each of the places where coverage collection is desired. This patchset implements this for hub events in the USB subsystem and for vhost workers. See the first patch description for details about the kcov extension. The other two patches apply this kcov extension to USB and vhost. Examples of other subsystems that might potentially benefit from this when custom annotations are added (the list is based on process_one_work() callers for bugs recently reported by syzbot): 1. fs: writeback wb_workfn() worker, 2. net: addrconf_dad_work()/addrconf_verify_work() workers, 3. net: neigh_periodic_work() worker, 4. net/p9: p9_write_work()/p9_read_work() workers, 5. block: blk_mq_run_work_fn() worker. These patches have been used to enable coverage-guided USB fuzzing with syzkaller for the last few years, see the details here: https://github.com/google/syzkaller/blob/master/docs/linux/external_fuzzing_usb.md This patchset has been pushed to the public Linux kernel Gerrit instance: https://linux-review.googlesource.com/c/linux/kernel/git/torvalds/linux/+/1524 This patch (of 3): Add background thread coverage collection ability to kcov. With KCOV_ENABLE coverage is collected only for syscalls that are issued from the current process. With KCOV_REMOTE_ENABLE it's possible to collect coverage for arbitrary parts of the kernel code, provided that those parts are annotated with kcov_remote_start()/kcov_remote_stop(). This allows to collect coverage from two types of kernel background threads: the global ones, that are spawned during kernel boot in a limited number of instances (e.g. one USB hub_event() worker thread is spawned per USB HCD); and the local ones, that are spawned when a user interacts with some kernel interface (e.g. vhost workers). To enable collecting coverage from a global background thread, a unique global handle must be assigned and passed to the corresponding kcov_remote_start() call. Then a userspace process can pass a list of such handles to the KCOV_REMOTE_ENABLE ioctl in the handles array field of the kcov_remote_arg struct. This will attach the used kcov device to the code sections, that are referenced by those handles. Since there might be many local background threads spawned from different userspace processes, we can't use a single global handle per annotation. Instead, the userspace process passes a non-zero handle through the common_handle field of the kcov_remote_arg struct. This common handle gets saved to the kcov_handle field in the current task_struct and needs to be passed to the newly spawned threads via custom annotations. Those threads should in turn be annotated with kcov_remote_start()/kcov_remote_stop(). Internally kcov stores handles as u64 integers. The top byte of a handle is used to denote the id of a subsystem that this handle belongs to, and the lower 4 bytes are used to denote the id of a thread instance within that subsystem. A reserved value 0 is used as a subsystem id for common handles as they don't belong to a particular subsystem. The bytes 4-7 are currently reserved and must be zero. In the future the number of bytes used for the subsystem or handle ids might be increased. When a particular userspace process collects coverage by via a common handle, kcov will collect coverage for each code section that is annotated to use the common handle obtained as kcov_handle from the current task_struct. However non common handles allow to collect coverage selectively from different subsystems. Link: http://lkml.kernel.org/r/e90e315426a384207edbec1d6aa89e43008e4caf.1572366574.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Dmitry Vyukov Cc: Greg Kroah-Hartman Cc: Alan Stern Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Arnd Bergmann Cc: Steven Rostedt Cc: David Windsor Cc: Elena Reshetova Cc: Anders Roxell Cc: Alexander Potapenko Cc: Marco Elver Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/dev-tools/kcov.rst | 129 +++++++++ include/linux/kcov.h | 23 ++ include/linux/sched.h | 8 + include/uapi/linux/kcov.h | 28 ++ kernel/kcov.c | 547 ++++++++++++++++++++++++++++++++++++--- 5 files changed, 700 insertions(+), 35 deletions(-) (limited to 'kernel') diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst index 42b612677799..36890b026e77 100644 --- a/Documentation/dev-tools/kcov.rst +++ b/Documentation/dev-tools/kcov.rst @@ -34,6 +34,7 @@ Profiling data will only become accessible once debugfs has been mounted:: Coverage collection ------------------- + The following program demonstrates coverage collection from within a test program using kcov: @@ -128,6 +129,7 @@ only need to enable coverage (disable happens automatically on thread end). Comparison operands collection ------------------------------ + Comparison operands collection is similar to coverage collection: .. code-block:: c @@ -202,3 +204,130 @@ Comparison operands collection is similar to coverage collection: Note that the kcov modes (coverage collection or comparison operands) are mutually exclusive. + +Remote coverage collection +-------------------------- + +With KCOV_ENABLE coverage is collected only for syscalls that are issued +from the current process. With KCOV_REMOTE_ENABLE it's possible to collect +coverage for arbitrary parts of the kernel code, provided that those parts +are annotated with kcov_remote_start()/kcov_remote_stop(). + +This allows to collect coverage from two types of kernel background +threads: the global ones, that are spawned during kernel boot in a limited +number of instances (e.g. one USB hub_event() worker thread is spawned per +USB HCD); and the local ones, that are spawned when a user interacts with +some kernel interface (e.g. vhost workers). + +To enable collecting coverage from a global background thread, a unique +global handle must be assigned and passed to the corresponding +kcov_remote_start() call. Then a userspace process can pass a list of such +handles to the KCOV_REMOTE_ENABLE ioctl in the handles array field of the +kcov_remote_arg struct. This will attach the used kcov device to the code +sections, that are referenced by those handles. + +Since there might be many local background threads spawned from different +userspace processes, we can't use a single global handle per annotation. +Instead, the userspace process passes a non-zero handle through the +common_handle field of the kcov_remote_arg struct. This common handle gets +saved to the kcov_handle field in the current task_struct and needs to be +passed to the newly spawned threads via custom annotations. Those threads +should in turn be annotated with kcov_remote_start()/kcov_remote_stop(). + +Internally kcov stores handles as u64 integers. The top byte of a handle +is used to denote the id of a subsystem that this handle belongs to, and +the lower 4 bytes are used to denote the id of a thread instance within +that subsystem. A reserved value 0 is used as a subsystem id for common +handles as they don't belong to a particular subsystem. The bytes 4-7 are +currently reserved and must be zero. In the future the number of bytes +used for the subsystem or handle ids might be increased. + +When a particular userspace proccess collects coverage by via a common +handle, kcov will collect coverage for each code section that is annotated +to use the common handle obtained as kcov_handle from the current +task_struct. However non common handles allow to collect coverage +selectively from different subsystems. + +.. code-block:: c + + struct kcov_remote_arg { + unsigned trace_mode; + unsigned area_size; + unsigned num_handles; + uint64_t common_handle; + uint64_t handles[0]; + }; + + #define KCOV_INIT_TRACE _IOR('c', 1, unsigned long) + #define KCOV_DISABLE _IO('c', 101) + #define KCOV_REMOTE_ENABLE _IOW('c', 102, struct kcov_remote_arg) + + #define COVER_SIZE (64 << 10) + + #define KCOV_TRACE_PC 0 + + #define KCOV_SUBSYSTEM_COMMON (0x00ull << 56) + #define KCOV_SUBSYSTEM_USB (0x01ull << 56) + + #define KCOV_SUBSYSTEM_MASK (0xffull << 56) + #define KCOV_INSTANCE_MASK (0xffffffffull) + + static inline __u64 kcov_remote_handle(__u64 subsys, __u64 inst) + { + if (subsys & ~KCOV_SUBSYSTEM_MASK || inst & ~KCOV_INSTANCE_MASK) + return 0; + return subsys | inst; + } + + #define KCOV_COMMON_ID 0x42 + #define KCOV_USB_BUS_NUM 1 + + int main(int argc, char **argv) + { + int fd; + unsigned long *cover, n, i; + struct kcov_remote_arg *arg; + + fd = open("/sys/kernel/debug/kcov", O_RDWR); + if (fd == -1) + perror("open"), exit(1); + if (ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE)) + perror("ioctl"), exit(1); + cover = (unsigned long*)mmap(NULL, COVER_SIZE * sizeof(unsigned long), + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if ((void*)cover == MAP_FAILED) + perror("mmap"), exit(1); + + /* Enable coverage collection via common handle and from USB bus #1. */ + arg = calloc(1, sizeof(*arg) + sizeof(uint64_t)); + if (!arg) + perror("calloc"), exit(1); + arg->trace_mode = KCOV_TRACE_PC; + arg->area_size = COVER_SIZE; + arg->num_handles = 1; + arg->common_handle = kcov_remote_handle(KCOV_SUBSYSTEM_COMMON, + KCOV_COMMON_ID); + arg->handles[0] = kcov_remote_handle(KCOV_SUBSYSTEM_USB, + KCOV_USB_BUS_NUM); + if (ioctl(fd, KCOV_REMOTE_ENABLE, arg)) + perror("ioctl"), free(arg), exit(1); + free(arg); + + /* + * Here the user needs to trigger execution of a kernel code section + * that is either annotated with the common handle, or to trigger some + * activity on USB bus #1. + */ + sleep(2); + + n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED); + for (i = 0; i < n; i++) + printf("0x%lx\n", cover[i + 1]); + if (ioctl(fd, KCOV_DISABLE, 0)) + perror("ioctl"), exit(1); + if (munmap(cover, COVER_SIZE * sizeof(unsigned long))) + perror("munmap"), exit(1); + if (close(fd)) + perror("close"), exit(1); + return 0; + } diff --git a/include/linux/kcov.h b/include/linux/kcov.h index b76a1807028d..a10e84707d82 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -37,12 +37,35 @@ do { \ (t)->kcov_mode &= ~KCOV_IN_CTXSW; \ } while (0) +/* See Documentation/dev-tools/kcov.rst for usage details. */ +void kcov_remote_start(u64 handle); +void kcov_remote_stop(void); +u64 kcov_common_handle(void); + +static inline void kcov_remote_start_common(u64 id) +{ + kcov_remote_start(kcov_remote_handle(KCOV_SUBSYSTEM_COMMON, id)); +} + +static inline void kcov_remote_start_usb(u64 id) +{ + kcov_remote_start(kcov_remote_handle(KCOV_SUBSYSTEM_USB, id)); +} + #else static inline void kcov_task_init(struct task_struct *t) {} static inline void kcov_task_exit(struct task_struct *t) {} static inline void kcov_prepare_switch(struct task_struct *t) {} static inline void kcov_finish_switch(struct task_struct *t) {} +static inline void kcov_remote_start(u64 handle) {} +static inline void kcov_remote_stop(void) {} +static inline u64 kcov_common_handle(void) +{ + return 0; +} +static inline void kcov_remote_start_common(u64 id) {} +static inline void kcov_remote_start_usb(u64 id) {} #endif /* CONFIG_KCOV */ #endif /* _LINUX_KCOV_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 0cd97d9dd021..467d26046416 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1210,6 +1210,8 @@ struct task_struct { #endif /* CONFIG_TRACING */ #ifdef CONFIG_KCOV + /* See kernel/kcov.c for more details. */ + /* Coverage collection mode enabled for this task (0 if disabled): */ unsigned int kcov_mode; @@ -1221,6 +1223,12 @@ struct task_struct { /* KCOV descriptor wired with this task or NULL: */ struct kcov *kcov; + + /* KCOV common handle for remote coverage collection: */ + u64 kcov_handle; + + /* KCOV sequence number: */ + int kcov_sequence; #endif #ifdef CONFIG_MEMCG diff --git a/include/uapi/linux/kcov.h b/include/uapi/linux/kcov.h index 9529867717a8..409d3ad1e6e2 100644 --- a/include/uapi/linux/kcov.h +++ b/include/uapi/linux/kcov.h @@ -4,9 +4,24 @@ #include +/* + * Argument for KCOV_REMOTE_ENABLE ioctl, see Documentation/dev-tools/kcov.rst + * and the comment before kcov_remote_start() for usage details. + */ +struct kcov_remote_arg { + unsigned int trace_mode; /* KCOV_TRACE_PC or KCOV_TRACE_CMP */ + unsigned int area_size; /* Length of coverage buffer in words */ + unsigned int num_handles; /* Size of handles array */ + __u64 common_handle; + __u64 handles[0]; +}; + +#define KCOV_REMOTE_MAX_HANDLES 0x100 + #define KCOV_INIT_TRACE _IOR('c', 1, unsigned long) #define KCOV_ENABLE _IO('c', 100) #define KCOV_DISABLE _IO('c', 101) +#define KCOV_REMOTE_ENABLE _IOW('c', 102, struct kcov_remote_arg) enum { /* @@ -32,4 +47,17 @@ enum { #define KCOV_CMP_SIZE(n) ((n) << 1) #define KCOV_CMP_MASK KCOV_CMP_SIZE(3) +#define KCOV_SUBSYSTEM_COMMON (0x00ull << 56) +#define KCOV_SUBSYSTEM_USB (0x01ull << 56) + +#define KCOV_SUBSYSTEM_MASK (0xffull << 56) +#define KCOV_INSTANCE_MASK (0xffffffffull) + +static inline __u64 kcov_remote_handle(__u64 subsys, __u64 inst) +{ + if (subsys & ~KCOV_SUBSYSTEM_MASK || inst & ~KCOV_INSTANCE_MASK) + return 0; + return subsys | inst; +} + #endif /* _LINUX_KCOV_IOCTLS_H */ diff --git a/kernel/kcov.c b/kernel/kcov.c index 2ee38727844a..f50354202dbe 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -21,8 +22,11 @@ #include #include #include +#include #include +#define kcov_debug(fmt, ...) pr_debug("%s: " fmt, __func__, ##__VA_ARGS__) + /* Number of 64-bit words written per one comparison: */ #define KCOV_WORDS_PER_CMP 4 @@ -44,19 +48,100 @@ struct kcov { * Reference counter. We keep one for: * - opened file descriptor * - task with enabled coverage (we can't unwire it from another task) + * - each code section for remote coverage collection */ refcount_t refcount; /* The lock protects mode, size, area and t. */ spinlock_t lock; enum kcov_mode mode; - /* Size of arena (in long's for KCOV_MODE_TRACE). */ - unsigned size; + /* Size of arena (in long's). */ + unsigned int size; /* Coverage buffer shared with user space. */ void *area; /* Task for which we collect coverage, or NULL. */ struct task_struct *t; + /* Collecting coverage from remote (background) threads. */ + bool remote; + /* Size of remote area (in long's). */ + unsigned int remote_size; + /* + * Sequence is incremented each time kcov is reenabled, used by + * kcov_remote_stop(), see the comment there. + */ + int sequence; }; +struct kcov_remote_area { + struct list_head list; + unsigned int size; +}; + +struct kcov_remote { + u64 handle; + struct kcov *kcov; + struct hlist_node hnode; +}; + +static DEFINE_SPINLOCK(kcov_remote_lock); +static DEFINE_HASHTABLE(kcov_remote_map, 4); +static struct list_head kcov_remote_areas = LIST_HEAD_INIT(kcov_remote_areas); + +/* Must be called with kcov_remote_lock locked. */ +static struct kcov_remote *kcov_remote_find(u64 handle) +{ + struct kcov_remote *remote; + + hash_for_each_possible(kcov_remote_map, remote, hnode, handle) { + if (remote->handle == handle) + return remote; + } + return NULL; +} + +static struct kcov_remote *kcov_remote_add(struct kcov *kcov, u64 handle) +{ + struct kcov_remote *remote; + + if (kcov_remote_find(handle)) + return ERR_PTR(-EEXIST); + remote = kmalloc(sizeof(*remote), GFP_ATOMIC); + if (!remote) + return ERR_PTR(-ENOMEM); + remote->handle = handle; + remote->kcov = kcov; + hash_add(kcov_remote_map, &remote->hnode, handle); + return remote; +} + +/* Must be called with kcov_remote_lock locked. */ +static struct kcov_remote_area *kcov_remote_area_get(unsigned int size) +{ + struct kcov_remote_area *area; + struct list_head *pos; + + kcov_debug("size = %u\n", size); + list_for_each(pos, &kcov_remote_areas) { + area = list_entry(pos, struct kcov_remote_area, list); + if (area->size == size) { + list_del(&area->list); + kcov_debug("rv = %px\n", area); + return area; + } + } + kcov_debug("rv = NULL\n"); + return NULL; +} + +/* Must be called with kcov_remote_lock locked. */ +static void kcov_remote_area_put(struct kcov_remote_area *area, + unsigned int size) +{ + kcov_debug("area = %px, size = %u\n", area, size); + INIT_LIST_HEAD(&area->list); + area->size = size; + list_add(&area->list, &kcov_remote_areas); +} + static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t) { unsigned int mode; @@ -73,7 +158,7 @@ static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_stru * in_interrupt() returns false (e.g. preempt_schedule_irq()). * READ_ONCE()/barrier() effectively provides load-acquire wrt * interrupts, there are paired barrier()/WRITE_ONCE() in - * kcov_ioctl_locked(). + * kcov_start(). */ barrier(); return mode == needed_mode; @@ -227,6 +312,78 @@ void notrace __sanitizer_cov_trace_switch(u64 val, u64 *cases) EXPORT_SYMBOL(__sanitizer_cov_trace_switch); #endif /* ifdef CONFIG_KCOV_ENABLE_COMPARISONS */ +static void kcov_start(struct task_struct *t, unsigned int size, + void *area, enum kcov_mode mode, int sequence) +{ + kcov_debug("t = %px, size = %u, area = %px\n", t, size, area); + /* Cache in task struct for performance. */ + t->kcov_size = size; + t->kcov_area = area; + /* See comment in check_kcov_mode(). */ + barrier(); + WRITE_ONCE(t->kcov_mode, mode); + t->kcov_sequence = sequence; +} + +static void kcov_stop(struct task_struct *t) +{ + WRITE_ONCE(t->kcov_mode, KCOV_MODE_DISABLED); + barrier(); + t->kcov_size = 0; + t->kcov_area = NULL; +} + +static void kcov_task_reset(struct task_struct *t) +{ + kcov_stop(t); + t->kcov = NULL; + t->kcov_sequence = 0; + t->kcov_handle = 0; +} + +void kcov_task_init(struct task_struct *t) +{ + kcov_task_reset(t); + t->kcov_handle = current->kcov_handle; +} + +static void kcov_reset(struct kcov *kcov) +{ + kcov->t = NULL; + kcov->mode = KCOV_MODE_INIT; + kcov->remote = false; + kcov->remote_size = 0; + kcov->sequence++; +} + +static void kcov_remote_reset(struct kcov *kcov) +{ + int bkt; + struct kcov_remote *remote; + struct hlist_node *tmp; + + spin_lock(&kcov_remote_lock); + hash_for_each_safe(kcov_remote_map, bkt, tmp, remote, hnode) { + if (remote->kcov != kcov) + continue; + kcov_debug("removing handle %llx\n", remote->handle); + hash_del(&remote->hnode); + kfree(remote); + } + /* Do reset before unlock to prevent races with kcov_remote_start(). */ + kcov_reset(kcov); + spin_unlock(&kcov_remote_lock); +} + +static void kcov_disable(struct task_struct *t, struct kcov *kcov) +{ + kcov_task_reset(t); + if (kcov->remote) + kcov_remote_reset(kcov); + else + kcov_reset(kcov); +} + static void kcov_get(struct kcov *kcov) { refcount_inc(&kcov->refcount); @@ -235,20 +392,12 @@ static void kcov_get(struct kcov *kcov) static void kcov_put(struct kcov *kcov) { if (refcount_dec_and_test(&kcov->refcount)) { + kcov_remote_reset(kcov); vfree(kcov->area); kfree(kcov); } } -void kcov_task_init(struct task_struct *t) -{ - WRITE_ONCE(t->kcov_mode, KCOV_MODE_DISABLED); - barrier(); - t->kcov_size = 0; - t->kcov_area = NULL; - t->kcov = NULL; -} - void kcov_task_exit(struct task_struct *t) { struct kcov *kcov; @@ -256,15 +405,36 @@ void kcov_task_exit(struct task_struct *t) kcov = t->kcov; if (kcov == NULL) return; + spin_lock(&kcov->lock); + kcov_debug("t = %px, kcov->t = %px\n", t, kcov->t); + /* + * For KCOV_ENABLE devices we want to make sure that t->kcov->t == t, + * which comes down to: + * WARN_ON(!kcov->remote && kcov->t != t); + * + * For KCOV_REMOTE_ENABLE devices, the exiting task is either: + * 2. A remote task between kcov_remote_start() and kcov_remote_stop(). + * In this case we should print a warning right away, since a task + * shouldn't be exiting when it's in a kcov coverage collection + * section. Here t points to the task that is collecting remote + * coverage, and t->kcov->t points to the thread that created the + * kcov device. Which means that to detect this case we need to + * check that t != t->kcov->t, and this gives us the following: + * WARN_ON(kcov->remote && kcov->t != t); + * + * 2. The task that created kcov exiting without calling KCOV_DISABLE, + * and then again we can make sure that t->kcov->t == t: + * WARN_ON(kcov->remote && kcov->t != t); + * + * By combining all three checks into one we get: + */ if (WARN_ON(kcov->t != t)) { spin_unlock(&kcov->lock); return; } /* Just to not leave dangling references behind. */ - kcov_task_init(t); - kcov->t = NULL; - kcov->mode = KCOV_MODE_INIT; + kcov_disable(t, kcov); spin_unlock(&kcov->lock); kcov_put(kcov); } @@ -313,6 +483,7 @@ static int kcov_open(struct inode *inode, struct file *filep) if (!kcov) return -ENOMEM; kcov->mode = KCOV_MODE_DISABLED; + kcov->sequence = 1; refcount_set(&kcov->refcount, 1); spin_lock_init(&kcov->lock); filep->private_data = kcov; @@ -325,6 +496,20 @@ static int kcov_close(struct inode *inode, struct file *filep) return 0; } +static int kcov_get_mode(unsigned long arg) +{ + if (arg == KCOV_TRACE_PC) + return KCOV_MODE_TRACE_PC; + else if (arg == KCOV_TRACE_CMP) +#ifdef CONFIG_KCOV_ENABLE_COMPARISONS + return KCOV_MODE_TRACE_CMP; +#else + return -ENOTSUPP; +#endif + else + return -EINVAL; +} + /* * Fault in a lazily-faulted vmalloc area before it can be used by * __santizer_cov_trace_pc(), to avoid recursion issues if any code on the @@ -340,14 +525,35 @@ static void kcov_fault_in_area(struct kcov *kcov) READ_ONCE(area[offset]); } +static inline bool kcov_check_handle(u64 handle, bool common_valid, + bool uncommon_valid, bool zero_valid) +{ + if (handle & ~(KCOV_SUBSYSTEM_MASK | KCOV_INSTANCE_MASK)) + return false; + switch (handle & KCOV_SUBSYSTEM_MASK) { + case KCOV_SUBSYSTEM_COMMON: + return (handle & KCOV_INSTANCE_MASK) ? + common_valid : zero_valid; + case KCOV_SUBSYSTEM_USB: + return uncommon_valid; + default: + return false; + } + return false; +} + static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, unsigned long arg) { struct task_struct *t; unsigned long size, unused; + int mode, i; + struct kcov_remote_arg *remote_arg; + struct kcov_remote *remote; switch (cmd) { case KCOV_INIT_TRACE: + kcov_debug("KCOV_INIT_TRACE\n"); /* * Enable kcov in trace mode and setup buffer size. * Must happen before anything else. @@ -366,6 +572,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, kcov->mode = KCOV_MODE_INIT; return 0; case KCOV_ENABLE: + kcov_debug("KCOV_ENABLE\n"); /* * Enable coverage for the current task. * At this point user must have been enabled trace mode, @@ -378,29 +585,20 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, t = current; if (kcov->t != NULL || t->kcov != NULL) return -EBUSY; - if (arg == KCOV_TRACE_PC) - kcov->mode = KCOV_MODE_TRACE_PC; - else if (arg == KCOV_TRACE_CMP) -#ifdef CONFIG_KCOV_ENABLE_COMPARISONS - kcov->mode = KCOV_MODE_TRACE_CMP; -#else - return -ENOTSUPP; -#endif - else - return -EINVAL; + mode = kcov_get_mode(arg); + if (mode < 0) + return mode; kcov_fault_in_area(kcov); - /* Cache in task struct for performance. */ - t->kcov_size = kcov->size; - t->kcov_area = kcov->area; - /* See comment in check_kcov_mode(). */ - barrier(); - WRITE_ONCE(t->kcov_mode, kcov->mode); + kcov->mode = mode; + kcov_start(t, kcov->size, kcov->area, kcov->mode, + kcov->sequence); t->kcov = kcov; kcov->t = t; - /* This is put either in kcov_task_exit() or in KCOV_DISABLE. */ + /* Put either in kcov_task_exit() or in KCOV_DISABLE. */ kcov_get(kcov); return 0; case KCOV_DISABLE: + kcov_debug("KCOV_DISABLE\n"); /* Disable coverage for the current task. */ unused = arg; if (unused != 0 || current->kcov != kcov) @@ -408,11 +606,65 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, t = current; if (WARN_ON(kcov->t != t)) return -EINVAL; - kcov_task_init(t); - kcov->t = NULL; - kcov->mode = KCOV_MODE_INIT; + kcov_disable(t, kcov); kcov_put(kcov); return 0; + case KCOV_REMOTE_ENABLE: + kcov_debug("KCOV_REMOTE_ENABLE\n"); + if (kcov->mode != KCOV_MODE_INIT || !kcov->area) + return -EINVAL; + t = current; + if (kcov->t != NULL || t->kcov != NULL) + return -EBUSY; + remote_arg = (struct kcov_remote_arg *)arg; + mode = kcov_get_mode(remote_arg->trace_mode); + if (mode < 0) + return mode; + if (remote_arg->area_size > LONG_MAX / sizeof(unsigned long)) + return -EINVAL; + kcov->mode = mode; + t->kcov = kcov; + kcov->t = t; + kcov->remote = true; + kcov->remote_size = remote_arg->area_size; + spin_lock(&kcov_remote_lock); + for (i = 0; i < remote_arg->num_handles; i++) { + kcov_debug("handle %llx\n", remote_arg->handles[i]); + if (!kcov_check_handle(remote_arg->handles[i], + false, true, false)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return -EINVAL; + } + remote = kcov_remote_add(kcov, remote_arg->handles[i]); + if (IS_ERR(remote)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return PTR_ERR(remote); + } + } + if (remote_arg->common_handle) { + kcov_debug("common handle %llx\n", + remote_arg->common_handle); + if (!kcov_check_handle(remote_arg->common_handle, + true, false, false)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return -EINVAL; + } + remote = kcov_remote_add(kcov, + remote_arg->common_handle); + if (IS_ERR(remote)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return PTR_ERR(remote); + } + t->kcov_handle = remote_arg->common_handle; + } + spin_unlock(&kcov_remote_lock); + /* Put either in kcov_task_exit() or in KCOV_DISABLE. */ + kcov_get(kcov); + return 0; default: return -ENOTTY; } @@ -422,11 +674,35 @@ static long kcov_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { struct kcov *kcov; int res; + struct kcov_remote_arg *remote_arg = NULL; + unsigned int remote_num_handles; + unsigned long remote_arg_size; + + if (cmd == KCOV_REMOTE_ENABLE) { + if (get_user(remote_num_handles, (unsigned __user *)(arg + + offsetof(struct kcov_remote_arg, num_handles)))) + return -EFAULT; + if (remote_num_handles > KCOV_REMOTE_MAX_HANDLES) + return -EINVAL; + remote_arg_size = struct_size(remote_arg, handles, + remote_num_handles); + remote_arg = memdup_user((void __user *)arg, remote_arg_size); + if (IS_ERR(remote_arg)) + return PTR_ERR(remote_arg); + if (remote_arg->num_handles != remote_num_handles) { + kfree(remote_arg); + return -EINVAL; + } + arg = (unsigned long)remote_arg; + } kcov = filep->private_data; spin_lock(&kcov->lock); res = kcov_ioctl_locked(kcov, cmd, arg); spin_unlock(&kcov->lock); + + kfree(remote_arg); + return res; } @@ -438,6 +714,207 @@ static const struct file_operations kcov_fops = { .release = kcov_close, }; +/* + * kcov_remote_start() and kcov_remote_stop() can be used to annotate a section + * of code in a kernel background thread to allow kcov to be used to collect + * coverage from that part of code. + * + * The handle argument of kcov_remote_start() identifies a code section that is + * used for coverage collection. A userspace process passes this handle to + * KCOV_REMOTE_ENABLE ioctl to make the used kcov device start collecting + * coverage for the code section identified by this handle. + * + * The usage of these annotations in the kernel code is different depending on + * the type of the kernel thread whose code is being annotated. + * + * For global kernel threads that are spawned in a limited number of instances + * (e.g. one USB hub_event() worker thread is spawned per USB HCD), each + * instance must be assigned a unique 4-byte instance id. The instance id is + * then combined with a 1-byte subsystem id to get a handle via + * kcov_remote_handle(subsystem_id, instance_id). + * + * For local kernel threads that are spawned from system calls handler when a + * user interacts with some kernel interface (e.g. vhost workers), a handle is + * passed from a userspace process as the common_handle field of the + * kcov_remote_arg struct (note, that the user must generate a handle by using + * kcov_remote_handle() with KCOV_SUBSYSTEM_COMMON as the subsystem id and an + * arbitrary 4-byte non-zero number as the instance id). This common handle + * then gets saved into the task_struct of the process that issued the + * KCOV_REMOTE_ENABLE ioctl. When this proccess issues system calls that spawn + * kernel threads, the common handle must be retrived via kcov_common_handle() + * and passed to the spawned threads via custom annotations. Those kernel + * threads must in turn be annotated with kcov_remote_start(common_handle) and + * kcov_remote_stop(). All of the threads that are spawned by the same process + * obtain the same handle, hence the name "common". + * + * See Documentation/dev-tools/kcov.rst for more details. + * + * Internally, this function looks up the kcov device associated with the + * provided handle, allocates an area for coverage collection, and saves the + * pointers to kcov and area into the current task_struct to allow coverage to + * be collected via __sanitizer_cov_trace_pc() + * In turns kcov_remote_stop() clears those pointers from task_struct to stop + * collecting coverage and copies all collected coverage into the kcov area. + */ +void kcov_remote_start(u64 handle) +{ + struct kcov_remote *remote; + void *area; + struct task_struct *t; + unsigned int size; + enum kcov_mode mode; + int sequence; + + if (WARN_ON(!kcov_check_handle(handle, true, true, true))) + return; + if (WARN_ON(!in_task())) + return; + t = current; + /* + * Check that kcov_remote_start is not called twice + * nor called by user tasks (with enabled kcov). + */ + if (WARN_ON(t->kcov)) + return; + + kcov_debug("handle = %llx\n", handle); + + spin_lock(&kcov_remote_lock); + remote = kcov_remote_find(handle); + if (!remote) { + kcov_debug("no remote found"); + spin_unlock(&kcov_remote_lock); + return; + } + /* Put in kcov_remote_stop(). */ + kcov_get(remote->kcov); + t->kcov = remote->kcov; + /* + * Read kcov fields before unlock to prevent races with + * KCOV_DISABLE / kcov_remote_reset(). + */ + size = remote->kcov->remote_size; + mode = remote->kcov->mode; + sequence = remote->kcov->sequence; + area = kcov_remote_area_get(size); + spin_unlock(&kcov_remote_lock); + + if (!area) { + area = vmalloc(size * sizeof(unsigned long)); + if (!area) { + t->kcov = NULL; + kcov_put(remote->kcov); + return; + } + } + /* Reset coverage size. */ + *(u64 *)area = 0; + + kcov_debug("area = %px, size = %u", area, size); + + kcov_start(t, size, area, mode, sequence); + +} +EXPORT_SYMBOL(kcov_remote_start); + +static void kcov_move_area(enum kcov_mode mode, void *dst_area, + unsigned int dst_area_size, void *src_area) +{ + u64 word_size = sizeof(unsigned long); + u64 count_size, entry_size_log; + u64 dst_len, src_len; + void *dst_entries, *src_entries; + u64 dst_occupied, dst_free, bytes_to_move, entries_moved; + + kcov_debug("%px %u <= %px %lu\n", + dst_area, dst_area_size, src_area, *(unsigned long *)src_area); + + switch (mode) { + case KCOV_MODE_TRACE_PC: + dst_len = READ_ONCE(*(unsigned long *)dst_area); + src_len = *(unsigned long *)src_area; + count_size = sizeof(unsigned long); + entry_size_log = __ilog2_u64(sizeof(unsigned long)); + break; + case KCOV_MODE_TRACE_CMP: + dst_len = READ_ONCE(*(u64 *)dst_area); + src_len = *(u64 *)src_area; + count_size = sizeof(u64); + BUILD_BUG_ON(!is_power_of_2(KCOV_WORDS_PER_CMP)); + entry_size_log = __ilog2_u64(sizeof(u64) * KCOV_WORDS_PER_CMP); + break; + default: + WARN_ON(1); + return; + } + + /* As arm can't divide u64 integers use log of entry size. */ + if (dst_len > ((dst_area_size * word_size - count_size) >> + entry_size_log)) + return; + dst_occupied = count_size + (dst_len << entry_size_log); + dst_free = dst_area_size * word_size - dst_occupied; + bytes_to_move = min(dst_free, src_len << entry_size_log); + dst_entries = dst_area + dst_occupied; + src_entries = src_area + count_size; + memcpy(dst_entries, src_entries, bytes_to_move); + entries_moved = bytes_to_move >> entry_size_log; + + switch (mode) { + case KCOV_MODE_TRACE_PC: + WRITE_ONCE(*(unsigned long *)dst_area, dst_len + entries_moved); + break; + case KCOV_MODE_TRACE_CMP: + WRITE_ONCE(*(u64 *)dst_area, dst_len + entries_moved); + break; + default: + break; + } +} + +/* See the comment before kcov_remote_start() for usage details. */ +void kcov_remote_stop(void) +{ + struct task_struct *t = current; + struct kcov *kcov = t->kcov; + void *area = t->kcov_area; + unsigned int size = t->kcov_size; + int sequence = t->kcov_sequence; + + if (!kcov) { + kcov_debug("no kcov found\n"); + return; + } + + kcov_stop(t); + t->kcov = NULL; + + spin_lock(&kcov->lock); + /* + * KCOV_DISABLE could have been called between kcov_remote_start() + * and kcov_remote_stop(), hence the check. + */ + kcov_debug("move if: %d == %d && %d\n", + sequence, kcov->sequence, (int)kcov->remote); + if (sequence == kcov->sequence && kcov->remote) + kcov_move_area(kcov->mode, kcov->area, kcov->size, area); + spin_unlock(&kcov->lock); + + spin_lock(&kcov_remote_lock); + kcov_remote_area_put(area, size); + spin_unlock(&kcov_remote_lock); + + kcov_put(kcov); +} +EXPORT_SYMBOL(kcov_remote_stop); + +/* See the comment before kcov_remote_start() for usage details. */ +u64 kcov_common_handle(void) +{ + return current->kcov_handle; +} +EXPORT_SYMBOL(kcov_common_handle); + static int __init kcov_init(void) { /* -- cgit v1.2.3-71-gd317 From e9eeec58c992c47b394e4f829e4f81b923b0a322 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 4 Dec 2019 17:06:06 -0800 Subject: bpf: Fix a bug when getting subprog 0 jited image in check_attach_btf_id For jited bpf program, if the subprogram count is 1, i.e., there is no callees in the program, prog->aux->func will be NULL and prog->bpf_func points to image address of the program. If there is more than one subprogram, prog->aux->func is populated, and subprogram 0 can be accessed through either prog->bpf_func or prog->aux->func[0]. Other subprograms should be accessed through prog->aux->func[subprog_id]. This patch fixed a bug in check_attach_btf_id(), where prog->aux->func[subprog_id] is used to access any subprogram which caused a segfault like below: [79162.619208] BUG: kernel NULL pointer dereference, address: 0000000000000000 ...... [79162.634255] Call Trace: [79162.634974] ? _cond_resched+0x15/0x30 [79162.635686] ? kmem_cache_alloc_trace+0x162/0x220 [79162.636398] ? selinux_bpf_prog_alloc+0x1f/0x60 [79162.637111] bpf_prog_load+0x3de/0x690 [79162.637809] __do_sys_bpf+0x105/0x1740 [79162.638488] do_syscall_64+0x5b/0x180 [79162.639147] entry_SYSCALL_64_after_hwframe+0x44/0xa9 ...... Fixes: 5b92a28aae4d ("bpf: Support attaching tracing BPF program to other BPF programs") Reported-by: Eelco Chaudron Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191205010606.177774-1-yhs@fb.com --- kernel/bpf/verifier.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a0482e1c4a77..034ef81f935b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -9636,7 +9636,10 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) ret = -EINVAL; goto out; } - addr = (long) tgt_prog->aux->func[subprog]->bpf_func; + if (subprog == 0) + addr = (long) tgt_prog->bpf_func; + else + addr = (long) tgt_prog->aux->func[subprog]->bpf_func; } else { addr = kallsyms_lookup_name(tname); if (!addr) { -- cgit v1.2.3-71-gd317 From 1d9a6159bd04b676cb7d9b13245888fa450cec10 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 28 Nov 2019 08:47:49 +0800 Subject: workqueue: Use pr_warn instead of pr_warning Use pr_warn() instead of the remaining pr_warning() calls. Link: http://lkml.kernel.org/r/20191128004752.35268-2-wangkefeng.wang@huawei.com To: joe@perches.com To: linux-kernel@vger.kernel.org Cc: gregkh@linuxfoundation.org Cc: tj@kernel.org Cc: arnd@arndb.de Cc: sergey.senozhatsky@gmail.com Cc: rostedt@goodmis.org Signed-off-by: Kefeng Wang Acked-by: Tejun Heo Signed-off-by: Petr Mladek --- kernel/workqueue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index bc88fd939f4e..cfc923558e04 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4374,8 +4374,8 @@ void destroy_workqueue(struct workqueue_struct *wq) for_each_pwq(pwq, wq) { spin_lock_irq(&pwq->pool->lock); if (WARN_ON(pwq_busy(pwq))) { - pr_warning("%s: %s has the following busy pwq\n", - __func__, wq->name); + pr_warn("%s: %s has the following busy pwq\n", + __func__, wq->name); show_pwq(pwq); spin_unlock_irq(&pwq->pool->lock); mutex_unlock(&wq->mutex); -- cgit v1.2.3-71-gd317 From ee19545220a8663f0ca58c3427bc08fd6a104a42 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 6 Dec 2019 09:25:03 +1100 Subject: Fix up for "printk: Drop pr_warning definition" Link: http://lkml.kernel.org/r/20191206092503.303d6a57@canb.auug.org.au Cc: Linux Next Mailing List Cc: Linux Kernel Mailing List Cc: "Steven Rostedt (VMware)" Cc: Kefeng Wang Cc: Linus Torvalds Signed-off-by: Stephen Rothwell Signed-off-by: Petr Mladek --- kernel/trace/ring_buffer.c | 2 +- kernel/trace/trace.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 4bf050fcfe3b..3f655371eaf6 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -5070,7 +5070,7 @@ static __init int test_ringbuffer(void) int ret = 0; if (security_locked_down(LOCKDOWN_TRACEFS)) { - pr_warning("Lockdown is enabled, skipping ring buffer tests\n"); + pr_warn("Lockdown is enabled, skipping ring buffer tests\n"); return 0; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 23459d53d576..6c75410f9698 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1889,7 +1889,7 @@ int __init register_tracer(struct tracer *type) } if (security_locked_down(LOCKDOWN_TRACEFS)) { - pr_warning("Can not register tracer %s due to lockdown\n", + pr_warn("Can not register tracer %s due to lockdown\n", type->name); return -EPERM; } @@ -8796,7 +8796,7 @@ struct dentry *tracing_init_dentry(void) struct trace_array *tr = &global_trace; if (security_locked_down(LOCKDOWN_TRACEFS)) { - pr_warning("Tracing disabled due to lockdown\n"); + pr_warn("Tracing disabled due to lockdown\n"); return ERR_PTR(-EPERM); } @@ -9244,7 +9244,7 @@ __init static int tracer_alloc_buffers(void) if (security_locked_down(LOCKDOWN_TRACEFS)) { - pr_warning("Tracing disabled due to lockdown\n"); + pr_warn("Tracing disabled due to lockdown\n"); return -EPERM; } -- cgit v1.2.3-71-gd317