From 8dd33bcb7050dd6f8c1432732f930932c9d3a33e Mon Sep 17 00:00:00 2001 From: Bo Yan Date: Mon, 18 Sep 2017 10:03:35 -0700 Subject: tracing: Erase irqsoff trace with empty write One convenient way to erase trace is "echo > trace". However, this is currently broken if the current tracer is irqsoff tracer. This is because irqsoff tracer use max_buffer as the default trace buffer. Set the max_buffer as the one to be cleared when it's the trace buffer currently in use. Link: http://lkml.kernel.org/r/1505754215-29411-1-git-send-email-byan@nvidia.com Cc: Cc: stable@vger.kernel.org Fixes: 4acd4d00f ("tracing: give easy way to clear trace buffer") Signed-off-by: Bo Yan Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5360b7aec57a..a7fb136da891 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4020,11 +4020,17 @@ static int tracing_open(struct inode *inode, struct file *file) /* If this file was open for write, then erase contents */ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { int cpu = tracing_get_cpu(inode); + struct trace_buffer *trace_buf = &tr->trace_buffer; + +#ifdef CONFIG_TRACER_MAX_TRACE + if (tr->current_trace->print_max) + trace_buf = &tr->max_buffer; +#endif if (cpu == RING_BUFFER_ALL_CPUS) - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(trace_buf); else - tracing_reset(&tr->trace_buffer, cpu); + tracing_reset(trace_buf, cpu); } if (file->f_mode & FMODE_READ) { -- cgit v1.2.3-71-gd317 From c7b3ae0bd2ca658c7a71c49901d08c590294fac9 Mon Sep 17 00:00:00 2001 From: "Ziqian SUN (Zamir)" Date: Mon, 11 Sep 2017 14:26:35 +0800 Subject: tracing: Ignore mmiotrace from kernel commandline The mmiotrace tracer cannot be enabled with ftrace=mmiotrace in kernel commandline. With this patch, noboot is added to the tracer struct, and when system boot with a tracer that has noboot=true, it will print out a warning message and continue booting. Link: http://lkml.kernel.org/r/1505111195-31942-1-git-send-email-zsun@redhat.com Signed-off-by: Ziqian SUN (Zamir) Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 7 +++++++ kernel/trace/trace.h | 2 ++ kernel/trace/trace_mmiotrace.c | 1 + 3 files changed, 10 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a7fb136da891..d3ca35f38803 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5364,6 +5364,13 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) if (t == tr->current_trace) goto out; + /* Some tracers won't work on kernel command line */ + if (system_state < SYSTEM_RUNNING && t->noboot) { + pr_warn("Tracer '%s' is not allowed on command line, ignored\n", + t->name); + goto out; + } + /* Some tracers are only allowed for the top level buffer */ if (!trace_ok_for_array(t, tr)) { ret = -EINVAL; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index fb5d54d0d1b3..652c682707cd 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -444,6 +444,8 @@ struct tracer { #ifdef CONFIG_TRACER_MAX_TRACE bool use_max_tr; #endif + /* True if tracer cannot be enabled in kernel param */ + bool noboot; }; diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index cd7480d0a201..dca78fc48439 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -282,6 +282,7 @@ static struct tracer mmio_tracer __read_mostly = .close = mmio_close, .read = mmio_read, .print_line = mmio_print_line, + .noboot = true, }; __init static int init_mmio_trace(void) -- cgit v1.2.3-71-gd317 From 75df6e688ccd517e339a7c422ef7ad73045b18a2 Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Sun, 17 Sep 2017 03:23:48 -0700 Subject: tracing: Fix trace_pipe behavior for instance traces When reading data from trace_pipe, tracing_wait_pipe() performs a check to see if tracing has been turned off after some data was read. Currently, this check always looks at global trace state, but it should be checking the trace instance where trace_pipe is located at. Because of this bug, cat instances/i1/trace_pipe in the following script will immediately exit instead of waiting for data: cd /sys/kernel/debug/tracing echo 0 > tracing_on mkdir -p instances/i1 echo 1 > instances/i1/tracing_on echo 1 > instances/i1/events/sched/sched_process_exec/enable cat instances/i1/trace_pipe Link: http://lkml.kernel.org/r/20170917102348.1615-1-tahsin@google.com Cc: stable@vger.kernel.org Fixes: 10246fa35d4f ("tracing: give easy way to clear trace buffer") Signed-off-by: Tahsin Erdogan Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d3ca35f38803..752e5daf0896 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5680,7 +5680,7 @@ static int tracing_wait_pipe(struct file *filp) * * iter->pos will be 0 if we haven't read anything. */ - if (!tracing_is_on() && iter->pos) + if (!tracer_tracing_is_on(iter->tr) && iter->pos) break; mutex_unlock(&iter->mutex); -- cgit v1.2.3-71-gd317 From 15516c89acce948debc4c598e03c3fee53045797 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 21 Sep 2017 13:00:21 -0400 Subject: tracing: Remove RCU work arounds from stack tracer Currently the stack tracer calls rcu_irq_enter() to make sure RCU is watching when it records a stack trace. But if the stack tracer is triggered while tracing inside of a rcu_irq_enter(), calling rcu_irq_enter() unconditionally can be problematic. The reason for having rcu_irq_enter() in the first place has been fixed from within the saving of the stack trace code, and there's no reason for doing it in the stack tracer itself. Just remove it. Cc: stable@vger.kernel.org Fixes: 0be964be0 ("module: Sanitize RCU usage and locking") Acked-by: Paul E. McKenney Suggested-by: "Paul E. McKenney" Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_stack.c | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index a4df67cbc711..49cb41412eec 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -96,23 +96,9 @@ check_stack(unsigned long ip, unsigned long *stack) if (in_nmi()) return; - /* - * There's a slight chance that we are tracing inside the - * RCU infrastructure, and rcu_irq_enter() will not work - * as expected. - */ - if (unlikely(rcu_irq_enter_disabled())) - return; - local_irq_save(flags); arch_spin_lock(&stack_trace_max_lock); - /* - * RCU may not be watching, make it see us. - * The stack trace code uses rcu_sched. - */ - rcu_irq_enter(); - /* In case another CPU set the tracer_frame on us */ if (unlikely(!frame_size)) this_size -= tracer_frame; @@ -205,7 +191,6 @@ check_stack(unsigned long ip, unsigned long *stack) } out: - rcu_irq_exit(); arch_spin_unlock(&stack_trace_max_lock); local_irq_restore(flags); } -- cgit v1.2.3-71-gd317 From 5acb3cc2c2e9d3020a4fee43763c6463767f1572 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 20 Sep 2017 13:12:20 -0600 Subject: blktrace: Fix potential deadlock between delete & sysfs ops The lockdep code had reported the following unsafe locking scenario: CPU0 CPU1 ---- ---- lock(s_active#228); lock(&bdev->bd_mutex/1); lock(s_active#228); lock(&bdev->bd_mutex); *** DEADLOCK *** The deadlock may happen when one task (CPU1) is trying to delete a partition in a block device and another task (CPU0) is accessing tracing sysfs file (e.g. /sys/block/dm-1/trace/act_mask) in that partition. The s_active isn't an actual lock. It is a reference count (kn->count) on the sysfs (kernfs) file. Removal of a sysfs file, however, require a wait until all the references are gone. The reference count is treated like a rwsem using lockdep instrumentation code. The fact that a thread is in the sysfs callback method or in the ioctl call means there is a reference to the opended sysfs or device file. That should prevent the underlying block structure from being removed. Instead of using bd_mutex in the block_device structure, a new blk_trace_mutex is now added to the request_queue structure to protect access to the blk_trace structure. Suggested-by: Christoph Hellwig Signed-off-by: Waiman Long Acked-by: Steven Rostedt (VMware) Fix typo in patch subject line, and prune a comment detailing how the code used to work. Signed-off-by: Jens Axboe --- block/blk-core.c | 3 +++ include/linux/blkdev.h | 1 + kernel/trace/blktrace.c | 18 ++++++++++++------ 3 files changed, 16 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/block/blk-core.c b/block/blk-core.c index aebe676225e6..048be4aa6024 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -854,6 +854,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) kobject_init(&q->kobj, &blk_queue_ktype); +#ifdef CONFIG_BLK_DEV_IO_TRACE + mutex_init(&q->blk_trace_mutex); +#endif mutex_init(&q->sysfs_lock); spin_lock_init(&q->__queue_lock); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 460294bb0fa5..02fa42d24b52 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -551,6 +551,7 @@ struct request_queue { int node; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace *blk_trace; + struct mutex blk_trace_mutex; #endif /* * for flush operations diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 2a685b45b73b..45a3928544ce 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -648,6 +648,12 @@ int blk_trace_startstop(struct request_queue *q, int start) } EXPORT_SYMBOL_GPL(blk_trace_startstop); +/* + * When reading or writing the blktrace sysfs files, the references to the + * opened sysfs or device files should prevent the underlying block device + * from being removed. So no further delete protection is really needed. + */ + /** * blk_trace_ioctl: - handle the ioctls associated with tracing * @bdev: the block device @@ -665,7 +671,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) if (!q) return -ENXIO; - mutex_lock(&bdev->bd_mutex); + mutex_lock(&q->blk_trace_mutex); switch (cmd) { case BLKTRACESETUP: @@ -691,7 +697,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) break; } - mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&q->blk_trace_mutex); return ret; } @@ -1727,7 +1733,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, if (q == NULL) goto out_bdput; - mutex_lock(&bdev->bd_mutex); + mutex_lock(&q->blk_trace_mutex); if (attr == &dev_attr_enable) { ret = sprintf(buf, "%u\n", !!q->blk_trace); @@ -1746,7 +1752,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); out_unlock_bdev: - mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&q->blk_trace_mutex); out_bdput: bdput(bdev); out: @@ -1788,7 +1794,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, if (q == NULL) goto out_bdput; - mutex_lock(&bdev->bd_mutex); + mutex_lock(&q->blk_trace_mutex); if (attr == &dev_attr_enable) { if (value) @@ -1814,7 +1820,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, } out_unlock_bdev: - mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&q->blk_trace_mutex); out_bdput: bdput(bdev); out: -- cgit v1.2.3-71-gd317 From 5f6ad26ea353fdf3dad2328052cbee49e0b9c5b4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Sep 2017 18:23:31 +0200 Subject: sched/tracing: Use common task-state helpers Remove yet another task-state char instance. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- kernel/trace/trace_output.c | 21 ++++++--------------- kernel/trace/trace_sched_wakeup.c | 8 ++++---- 3 files changed, 10 insertions(+), 21 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/sched.h b/include/linux/sched.h index a2fe636b6825..bc7807933415 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -83,8 +83,6 @@ struct task_group; #define TASK_NEW 0x0800 #define TASK_STATE_MAX 0x1000 -#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn" - /* Convenience macros for the sake of set_current_state: */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index bac629af2285..c738e764e2a5 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -656,15 +656,6 @@ int trace_print_lat_context(struct trace_iterator *iter) return !trace_seq_has_overflowed(s); } -static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; - -static int task_state_char(unsigned long state) -{ - int bit = state ? __ffs(state) + 1 : 0; - - return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?'; -} - /** * ftrace_find_event - find a registered event * @type: the type of event to look for @@ -930,8 +921,8 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, trace_assign_type(field, iter->ent); - T = task_state_char(field->next_state); - S = task_state_char(field->prev_state); + T = __task_state_to_char(field->next_state); + S = __task_state_to_char(field->prev_state); trace_find_cmdline(field->next_pid, comm); trace_seq_printf(&iter->seq, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", @@ -966,8 +957,8 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S) trace_assign_type(field, iter->ent); if (!S) - S = task_state_char(field->prev_state); - T = task_state_char(field->next_state); + S = __task_state_to_char(field->prev_state); + T = __task_state_to_char(field->next_state); trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n", field->prev_pid, field->prev_prio, @@ -1002,8 +993,8 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S) trace_assign_type(field, iter->ent); if (!S) - S = task_state_char(field->prev_state); - T = task_state_char(field->next_state); + S = __task_state_to_char(field->prev_state); + T = __task_state_to_char(field->next_state); SEQ_PUT_HEX_FIELD(s, field->prev_pid); SEQ_PUT_HEX_FIELD(s, field->prev_prio); diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index ddec53b67646..0c331978b1a6 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -397,10 +397,10 @@ tracing_sched_switch_trace(struct trace_array *tr, entry = ring_buffer_event_data(event); entry->prev_pid = prev->pid; entry->prev_prio = prev->prio; - entry->prev_state = prev->state; + entry->prev_state = __get_task_state(prev); entry->next_pid = next->pid; entry->next_prio = next->prio; - entry->next_state = next->state; + entry->next_state = __get_task_state(next); entry->next_cpu = task_cpu(next); if (!call_filter_check_discard(call, entry, buffer, event)) @@ -425,10 +425,10 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry = ring_buffer_event_data(event); entry->prev_pid = curr->pid; entry->prev_prio = curr->prio; - entry->prev_state = curr->state; + entry->prev_state = __get_task_state(curr); entry->next_pid = wakee->pid; entry->next_prio = wakee->prio; - entry->next_state = wakee->state; + entry->next_state = __get_task_state(wakee); entry->next_cpu = task_cpu(wakee); if (!call_filter_check_discard(call, entry, buffer, event)) -- cgit v1.2.3-71-gd317 From 2b0b8499ae75df91455bbeb7491d45affc384fb0 Mon Sep 17 00:00:00 2001 From: Shu Wang Date: Tue, 12 Sep 2017 10:14:54 +0800 Subject: ftrace: Fix kmemleak in unregister_ftrace_graph The trampoline allocated by function tracer was overwriten by function_graph tracer, and caused a memory leak. The save_global_trampoline should have saved the previous trampoline in register_ftrace_graph() and restored it in unregister_ftrace_graph(). But as it is implemented, save_global_trampoline was only used in unregister_ftrace_graph as default value 0, and it overwrote the previous trampoline's value. Causing the previous allocated trampoline to be lost. kmmeleak backtrace: kmemleak_vmalloc+0x77/0xc0 __vmalloc_node_range+0x1b5/0x2c0 module_alloc+0x7c/0xd0 arch_ftrace_update_trampoline+0xb5/0x290 ftrace_startup+0x78/0x210 register_ftrace_function+0x8b/0xd0 function_trace_init+0x4f/0x80 tracing_set_tracer+0xe6/0x170 tracing_set_trace_write+0x90/0xd0 __vfs_write+0x37/0x170 vfs_write+0xb2/0x1b0 SyS_write+0x55/0xc0 do_syscall_64+0x67/0x180 return_from_SYSCALL_64+0x0/0x6a [ Looking further into this, I found that this was left over from when the function and function graph tracers shared the same ftrace_ops. But in commit 5f151b2401 ("ftrace: Fix function_profiler and function tracer together"), the two were separated, and the save_global_trampoline no longer was necessary (and it may have been broken back then too). -- Steven Rostedt ] Link: http://lkml.kernel.org/r/20170912021454.5976-1-shuwang@redhat.com Cc: stable@vger.kernel.org Fixes: 5f151b2401 ("ftrace: Fix function_profiler and function tracer together") Signed-off-by: Shu Wang Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6abfafd7f173..8319e09e15b9 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4954,9 +4954,6 @@ static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; static char ftrace_graph_notrace_buf[FTRACE_FILTER_SIZE] __initdata; static int ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer); -static unsigned long save_global_trampoline; -static unsigned long save_global_flags; - static int __init set_graph_function(char *str) { strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); @@ -6808,17 +6805,6 @@ void unregister_ftrace_graph(void) unregister_pm_notifier(&ftrace_suspend_notifier); unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); -#ifdef CONFIG_DYNAMIC_FTRACE - /* - * Function graph does not allocate the trampoline, but - * other global_ops do. We need to reset the ALLOC_TRAMP flag - * if one was used. - */ - global_ops.trampoline = save_global_trampoline; - if (save_global_flags & FTRACE_OPS_FL_ALLOC_TRAMP) - global_ops.flags |= FTRACE_OPS_FL_ALLOC_TRAMP; -#endif - out: mutex_unlock(&ftrace_lock); } -- cgit v1.2.3-71-gd317