From 322cea2f41adb62c975f46a3242f4e3b43226fa1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 25 Mar 2016 00:30:25 +0100 Subject: bpf: add missing map_flags to bpf_map_show_fdinfo Add map_flags attribute to bpf_map_show_fdinfo(), so that tools like tc can check for them when loading objects from a pinned entry, e.g. if user intent wrt allocation (BPF_F_NO_PREALLOC) is different to the pinned object, it can bail out. Follow-up to 6c9059817432 ("bpf: pre-allocate hash map elements"), so that tc can still support this with v4.6. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/syscall.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2a2efe1bc76c..adc5e4bd74f8 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -137,11 +137,13 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) "map_type:\t%u\n" "key_size:\t%u\n" "value_size:\t%u\n" - "max_entries:\t%u\n", + "max_entries:\t%u\n" + "map_flags:\t%#x\n", map->map_type, map->key_size, map->value_size, - map->max_entries); + map->max_entries, + map->map_flags); } #endif -- cgit v1.2.3-71-gd317 From f009a7a767e792d5ab0b46c08d46236ea5271dd9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 24 Mar 2016 15:38:00 +0100 Subject: timers/nohz: Convert tick dependency mask to atomic_t The tick dependency mask was intially unsigned long because this is the type on which clear_bit() operates on and fetch_or() accepts it. But now that we have atomic_fetch_or(), we can instead use atomic_andnot() to clear the bit. This consolidates the type of our tick dependency mask, reduce its size on structures and benefit from possible architecture optimizations on atomic_t operations. Suggested-by: Linus Torvalds Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1458830281-4255-3-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++-- kernel/time/tick-sched.c | 61 ++++++++++++++++++++++++------------------------ kernel/time/tick-sched.h | 2 +- 3 files changed, 33 insertions(+), 34 deletions(-) (limited to 'kernel') diff --git a/include/linux/sched.h b/include/linux/sched.h index 60bba7e032dc..52c4847b05e2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -720,7 +720,7 @@ struct signal_struct { struct task_cputime cputime_expires; #ifdef CONFIG_NO_HZ_FULL - unsigned long tick_dep_mask; + atomic_t tick_dep_mask; #endif struct list_head cpu_timers[3]; @@ -1549,7 +1549,7 @@ struct task_struct { #endif #ifdef CONFIG_NO_HZ_FULL - unsigned long tick_dep_mask; + atomic_t tick_dep_mask; #endif unsigned long nvcsw, nivcsw; /* context switch counts */ u64 start_time; /* monotonic time in nsec */ diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 084b79f5917e..58e3310c9b21 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -157,52 +157,50 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) cpumask_var_t tick_nohz_full_mask; cpumask_var_t housekeeping_mask; bool tick_nohz_full_running; -static unsigned long tick_dep_mask; +static atomic_t tick_dep_mask; -static void trace_tick_dependency(unsigned long dep) +static bool check_tick_dependency(atomic_t *dep) { - if (dep & TICK_DEP_MASK_POSIX_TIMER) { + int val = atomic_read(dep); + + if (val & TICK_DEP_MASK_POSIX_TIMER) { trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER); - return; + return true; } - if (dep & TICK_DEP_MASK_PERF_EVENTS) { + if (val & TICK_DEP_MASK_PERF_EVENTS) { trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS); - return; + return true; } - if (dep & TICK_DEP_MASK_SCHED) { + if (val & TICK_DEP_MASK_SCHED) { trace_tick_stop(0, TICK_DEP_MASK_SCHED); - return; + return true; } - if (dep & TICK_DEP_MASK_CLOCK_UNSTABLE) + if (val & TICK_DEP_MASK_CLOCK_UNSTABLE) { trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE); + return true; + } + + return false; } static bool can_stop_full_tick(struct tick_sched *ts) { WARN_ON_ONCE(!irqs_disabled()); - if (tick_dep_mask) { - trace_tick_dependency(tick_dep_mask); + if (check_tick_dependency(&tick_dep_mask)) return false; - } - if (ts->tick_dep_mask) { - trace_tick_dependency(ts->tick_dep_mask); + if (check_tick_dependency(&ts->tick_dep_mask)) return false; - } - if (current->tick_dep_mask) { - trace_tick_dependency(current->tick_dep_mask); + if (check_tick_dependency(¤t->tick_dep_mask)) return false; - } - if (current->signal->tick_dep_mask) { - trace_tick_dependency(current->signal->tick_dep_mask); + if (check_tick_dependency(¤t->signal->tick_dep_mask)) return false; - } return true; } @@ -259,12 +257,12 @@ static void tick_nohz_full_kick_all(void) preempt_enable(); } -static void tick_nohz_dep_set_all(unsigned long *dep, +static void tick_nohz_dep_set_all(atomic_t *dep, enum tick_dep_bits bit) { - unsigned long prev; + int prev; - prev = fetch_or(dep, BIT_MASK(bit)); + prev = atomic_fetch_or(dep, BIT(bit)); if (!prev) tick_nohz_full_kick_all(); } @@ -280,7 +278,7 @@ void tick_nohz_dep_set(enum tick_dep_bits bit) void tick_nohz_dep_clear(enum tick_dep_bits bit) { - clear_bit(bit, &tick_dep_mask); + atomic_andnot(BIT(bit), &tick_dep_mask); } /* @@ -289,12 +287,12 @@ void tick_nohz_dep_clear(enum tick_dep_bits bit) */ void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit) { - unsigned long prev; + int prev; struct tick_sched *ts; ts = per_cpu_ptr(&tick_cpu_sched, cpu); - prev = fetch_or(&ts->tick_dep_mask, BIT_MASK(bit)); + prev = atomic_fetch_or(&ts->tick_dep_mask, BIT(bit)); if (!prev) { preempt_disable(); /* Perf needs local kick that is NMI safe */ @@ -313,7 +311,7 @@ void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu); - clear_bit(bit, &ts->tick_dep_mask); + atomic_andnot(BIT(bit), &ts->tick_dep_mask); } /* @@ -331,7 +329,7 @@ void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit) void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit) { - clear_bit(bit, &tsk->tick_dep_mask); + atomic_andnot(BIT(bit), &tsk->tick_dep_mask); } /* @@ -345,7 +343,7 @@ void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit) void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit) { - clear_bit(bit, &sig->tick_dep_mask); + atomic_andnot(BIT(bit), &sig->tick_dep_mask); } /* @@ -366,7 +364,8 @@ void __tick_nohz_task_switch(void) ts = this_cpu_ptr(&tick_cpu_sched); if (ts->tick_stopped) { - if (current->tick_dep_mask || current->signal->tick_dep_mask) + if (atomic_read(¤t->tick_dep_mask) || + atomic_read(¤t->signal->tick_dep_mask)) tick_nohz_full_kick(); } out: diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index eb4e32566a83..bf38226e5c17 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -60,7 +60,7 @@ struct tick_sched { u64 next_timer; ktime_t idle_expires; int do_timer_last; - unsigned long tick_dep_mask; + atomic_t tick_dep_mask; }; extern struct tick_sched *tick_get_tick_sched(int cpu); -- cgit v1.2.3-71-gd317 From 5529578a27288d11d4d15635c258c6dde0f0fb10 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 24 Mar 2016 15:38:01 +0100 Subject: locking/atomic, sched: Unexport fetch_or() This patch functionally reverts: 5fd7a09cfb8c ("atomic: Export fetch_or()") During the merge Linus observed that the generic version of fetch_or() was messy: " This makes the ugly "fetch_or()" macro that the scheduler used internally a new generic helper, and does a bad job at it. " e23604edac2a Merge branch 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Now that we have introduced atomic_fetch_or(), fetch_or() is only used by the scheduler in order to deal with thread_info flags which type can vary across architectures. Lets confine fetch_or() back to the scheduler so that we encourage future users to use the more robust and well typed atomic_t version instead. While at it, fetch_or() gets robustified, pasting improvements from a previous patch by Ingo Molnar that avoids needless expression re-evaluations in the loop. Reported-by: Linus Torvalds Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1458830281-4255-4-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 21 --------------------- kernel/sched/core.c | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 21 deletions(-) (limited to 'kernel') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 3d64c0852164..506c3531832e 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -579,27 +579,6 @@ static inline int atomic_fetch_or(atomic_t *p, int mask) } #endif -/** - * fetch_or - perform *ptr |= mask and return old value of *ptr - * @ptr: pointer to value - * @mask: mask to OR on the value - * - * cmpxchg based fetch_or, macro so it works for different integer types - */ -#ifndef fetch_or -#define fetch_or(ptr, mask) \ -({ typeof(*(ptr)) __old, __val = *(ptr); \ - for (;;) { \ - __old = cmpxchg((ptr), __val, __val | (mask)); \ - if (__old == __val) \ - break; \ - __val = __old; \ - } \ - __old; \ -}) -#endif - - #ifdef CONFIG_GENERIC_ATOMIC64 #include #endif diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d8465eeab8b3..8b489fcac37b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -321,6 +321,24 @@ static inline void init_hrtick(void) } #endif /* CONFIG_SCHED_HRTICK */ +/* + * cmpxchg based fetch_or, macro so it works for different integer types + */ +#define fetch_or(ptr, mask) \ + ({ \ + typeof(ptr) _ptr = (ptr); \ + typeof(mask) _mask = (mask); \ + typeof(*_ptr) _old, _val = *_ptr; \ + \ + for (;;) { \ + _old = cmpxchg(_ptr, _val, _val | _mask); \ + if (_old == _val) \ + break; \ + _val = _old; \ + } \ + _old; \ +}) + #if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG) /* * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG, -- cgit v1.2.3-71-gd317 From 8fdc65391c6ad16461526a685f03262b3b01bfde Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 29 Mar 2016 09:26:44 +0200 Subject: perf/core: Fix time tracking bug with multiplexing Stephane reported that commit: 3cbaa5906967 ("perf: Fix ctx time tracking by introducing EVENT_TIME") introduced a regression wrt. time tracking, as easily observed by: > This patch introduce a bug in the time tracking of events when > multiplexing is used. > > The issue is easily reproducible with the following perf run: > > $ perf stat -a -C 0 -e branches,branches,branches,branches,branches,branches -I 1000 > 1.000730239 652,394 branches (66.41%) > 1.000730239 597,809 branches (66.41%) > 1.000730239 593,870 branches (66.63%) > 1.000730239 651,440 branches (67.03%) > 1.000730239 656,725 branches (66.96%) > 1.000730239 branches > > One branches event is shown as not having run. Yet, with > multiplexing, all events should run especially with a 1s (-I 1000) > interval. The delta for time_running comes out to 0. Yet, the event > has run because the kernel is actually multiplexing the events. The > problem is that the time tracking is the kernel and especially in > ctx_sched_out() is wrong now. > > The problem is that in case that the kernel enters ctx_sched_out() with the > following state: > ctx->is_active=0x7 event_type=0x1 > Call Trace: > [] dump_stack+0x63/0x82 > [] ctx_sched_out+0x2bc/0x2d0 > [] perf_mux_hrtimer_handler+0xf6/0x2c0 > [] ? __perf_install_in_context+0x130/0x130 > [] __hrtimer_run_queues+0xf8/0x2f0 > [] hrtimer_interrupt+0xb7/0x1d0 > [] local_apic_timer_interrupt+0x38/0x60 > [] smp_apic_timer_interrupt+0x3d/0x50 > [] apic_timer_interrupt+0x8c/0xa0 > > In that case, the test: > if (is_active & EVENT_TIME) > > will be false and the time will not be updated. Time must always be updated on > sched out. Fix this by always updating time if EVENT_TIME was set, as opposed to only updating time when EVENT_TIME changed. Reported-by: Stephane Eranian Tested-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Cc: kan.liang@intel.com Cc: namhyung@kernel.org Fixes: 3cbaa5906967 ("perf: Fix ctx time tracking by introducing EVENT_TIME") Link: http://lkml.kernel.org/r/20160329072644.GB3408@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index de24fbce5277..8c11388e92a5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2417,14 +2417,24 @@ static void ctx_sched_out(struct perf_event_context *ctx, cpuctx->task_ctx = NULL; } - is_active ^= ctx->is_active; /* changed bits */ - + /* + * Always update time if it was set; not only when it changes. + * Otherwise we can 'forget' to update time for any but the last + * context we sched out. For example: + * + * ctx_sched_out(.event_type = EVENT_FLEXIBLE) + * ctx_sched_out(.event_type = EVENT_PINNED) + * + * would only update time for the pinned events. + */ if (is_active & EVENT_TIME) { /* update (and stop) ctx time */ update_context_time(ctx); update_cgrp_time_from_cpuctx(cpuctx); } + is_active ^= ctx->is_active; /* changed bits */ + if (!ctx->nr_active || !(is_active & EVENT_ALL)) return; -- cgit v1.2.3-71-gd317 From 201c2f85bd0bc13b712d9c0b3d11251b182e06ae Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Mon, 21 Mar 2016 10:02:42 +0200 Subject: perf/core: Don't leak event in the syscall error path In the error path, event_file not being NULL is used to determine whether the event itself still needs to be free'd, so fix it up to avoid leaking. Reported-by: Leon Yu Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: 130056275ade ("perf: Do not double free") Link: http://lkml.kernel.org/r/87twk06yxp.fsf@ashishki-desk.ger.corp.intel.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index 8c11388e92a5..52bedc5a5aaa 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8542,6 +8542,7 @@ SYSCALL_DEFINE5(perf_event_open, f_flags); if (IS_ERR(event_file)) { err = PTR_ERR(event_file); + event_file = NULL; goto err_context; } -- cgit v1.2.3-71-gd317 From 39e2e173fb1f900959d3a25c21c65fa88b06c6ee Mon Sep 17 00:00:00 2001 From: Alfredo Alvarez Fernandez Date: Wed, 30 Mar 2016 19:03:36 +0200 Subject: locking/lockdep: Print chain_key collision information A sequence of pairs [class_idx -> corresponding chain_key iteration] is printed for both the current held_lock chain and the cached chain. That exposes the two different class_idx sequences that led to that particular hash value. This helps with debugging hash chain collision reports. Signed-off-by: Alfredo Alvarez Fernandez Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-fsdevel@vger.kernel.org Cc: sedat.dilek@gmail.com Cc: tytso@mit.edu Link: http://lkml.kernel.org/r/1459357416-19190-1-git-send-email-alfredoalvarezernandez@gmail.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 53ab2f85d77e..2324ba5310db 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1999,6 +1999,77 @@ static inline int get_first_held_lock(struct task_struct *curr, return ++i; } +/* + * Returns the next chain_key iteration + */ +static u64 print_chain_key_iteration(int class_idx, u64 chain_key) +{ + u64 new_chain_key = iterate_chain_key(chain_key, class_idx); + + printk(" class_idx:%d -> chain_key:%016Lx", + class_idx, + (unsigned long long)new_chain_key); + return new_chain_key; +} + +static void +print_chain_keys_held_locks(struct task_struct *curr, struct held_lock *hlock_next) +{ + struct held_lock *hlock; + u64 chain_key = 0; + int depth = curr->lockdep_depth; + int i; + + printk("depth: %u\n", depth + 1); + for (i = get_first_held_lock(curr, hlock_next); i < depth; i++) { + hlock = curr->held_locks + i; + chain_key = print_chain_key_iteration(hlock->class_idx, chain_key); + + print_lock(hlock); + } + + print_chain_key_iteration(hlock_next->class_idx, chain_key); + print_lock(hlock_next); +} + +static void print_chain_keys_chain(struct lock_chain *chain) +{ + int i; + u64 chain_key = 0; + int class_id; + + printk("depth: %u\n", chain->depth); + for (i = 0; i < chain->depth; i++) { + class_id = chain_hlocks[chain->base + i]; + chain_key = print_chain_key_iteration(class_id + 1, chain_key); + + print_lock_name(lock_classes + class_id); + printk("\n"); + } +} + +static void print_collision(struct task_struct *curr, + struct held_lock *hlock_next, + struct lock_chain *chain) +{ + printk("\n"); + printk("======================\n"); + printk("[chain_key collision ]\n"); + print_kernel_ident(); + printk("----------------------\n"); + printk("%s/%d: ", current->comm, task_pid_nr(current)); + printk("Hash chain already cached but the contents don't match!\n"); + + printk("Held locks:"); + print_chain_keys_held_locks(curr, hlock_next); + + printk("Locks in cached chain:"); + print_chain_keys_chain(chain); + + printk("\nstack backtrace:\n"); + dump_stack(); +} + /* * Checks whether the chain and the current held locks are consistent * in depth and also in content. If they are not it most likely means @@ -2014,14 +2085,18 @@ static int check_no_collision(struct task_struct *curr, i = get_first_held_lock(curr, hlock); - if (DEBUG_LOCKS_WARN_ON(chain->depth != curr->lockdep_depth - (i - 1))) + if (DEBUG_LOCKS_WARN_ON(chain->depth != curr->lockdep_depth - (i - 1))) { + print_collision(curr, hlock, chain); return 0; + } for (j = 0; j < chain->depth - 1; j++, i++) { id = curr->held_locks[i].class_idx - 1; - if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id)) + if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id)) { + print_collision(curr, hlock, chain); return 0; + } } #endif return 1; -- cgit v1.2.3-71-gd317