cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

blktrace.c (47934B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
      4 *
      5 */
      6
      7#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
      8
      9#include <linux/kernel.h>
     10#include <linux/blkdev.h>
     11#include <linux/blktrace_api.h>
     12#include <linux/percpu.h>
     13#include <linux/init.h>
     14#include <linux/mutex.h>
     15#include <linux/slab.h>
     16#include <linux/debugfs.h>
     17#include <linux/export.h>
     18#include <linux/time.h>
     19#include <linux/uaccess.h>
     20#include <linux/list.h>
     21#include <linux/blk-cgroup.h>
     22
     23#include "../../block/blk.h"
     24
     25#include <trace/events/block.h>
     26
     27#include "trace_output.h"
     28
     29#ifdef CONFIG_BLK_DEV_IO_TRACE
     30
     31static unsigned int blktrace_seq __read_mostly = 1;
     32
     33static struct trace_array *blk_tr;
     34static bool blk_tracer_enabled __read_mostly;
     35
     36static LIST_HEAD(running_trace_list);
     37static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(running_trace_lock);
     38
     39/* Select an alternative, minimalistic output than the original one */
     40#define TRACE_BLK_OPT_CLASSIC	0x1
     41#define TRACE_BLK_OPT_CGROUP	0x2
     42#define TRACE_BLK_OPT_CGNAME	0x4
     43
     44static struct tracer_opt blk_tracer_opts[] = {
     45	/* Default disable the minimalistic output */
     46	{ TRACER_OPT(blk_classic, TRACE_BLK_OPT_CLASSIC) },
     47#ifdef CONFIG_BLK_CGROUP
     48	{ TRACER_OPT(blk_cgroup, TRACE_BLK_OPT_CGROUP) },
     49	{ TRACER_OPT(blk_cgname, TRACE_BLK_OPT_CGNAME) },
     50#endif
     51	{ }
     52};
     53
     54static struct tracer_flags blk_tracer_flags = {
     55	.val  = 0,
     56	.opts = blk_tracer_opts,
     57};
     58
     59/* Global reference count of probes */
     60static DEFINE_MUTEX(blk_probe_mutex);
     61static int blk_probes_ref;
     62
     63static void blk_register_tracepoints(void);
     64static void blk_unregister_tracepoints(void);
     65
     66/*
     67 * Send out a notify message.
     68 */
     69static void trace_note(struct blk_trace *bt, pid_t pid, int action,
     70		       const void *data, size_t len, u64 cgid)
     71{
     72	struct blk_io_trace *t;
     73	struct ring_buffer_event *event = NULL;
     74	struct trace_buffer *buffer = NULL;
     75	unsigned int trace_ctx = 0;
     76	int cpu = smp_processor_id();
     77	bool blk_tracer = blk_tracer_enabled;
     78	ssize_t cgid_len = cgid ? sizeof(cgid) : 0;
     79
     80	if (blk_tracer) {
     81		buffer = blk_tr->array_buffer.buffer;
     82		trace_ctx = tracing_gen_ctx_flags(0);
     83		event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
     84						  sizeof(*t) + len + cgid_len,
     85						  trace_ctx);
     86		if (!event)
     87			return;
     88		t = ring_buffer_event_data(event);
     89		goto record_it;
     90	}
     91
     92	if (!bt->rchan)
     93		return;
     94
     95	t = relay_reserve(bt->rchan, sizeof(*t) + len + cgid_len);
     96	if (t) {
     97		t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
     98		t->time = ktime_to_ns(ktime_get());
     99record_it:
    100		t->device = bt->dev;
    101		t->action = action | (cgid ? __BLK_TN_CGROUP : 0);
    102		t->pid = pid;
    103		t->cpu = cpu;
    104		t->pdu_len = len + cgid_len;
    105		if (cgid_len)
    106			memcpy((void *)t + sizeof(*t), &cgid, cgid_len);
    107		memcpy((void *) t + sizeof(*t) + cgid_len, data, len);
    108
    109		if (blk_tracer)
    110			trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx);
    111	}
    112}
    113
    114/*
    115 * Send out a notify for this process, if we haven't done so since a trace
    116 * started
    117 */
    118static void trace_note_tsk(struct task_struct *tsk)
    119{
    120	unsigned long flags;
    121	struct blk_trace *bt;
    122
    123	tsk->btrace_seq = blktrace_seq;
    124	raw_spin_lock_irqsave(&running_trace_lock, flags);
    125	list_for_each_entry(bt, &running_trace_list, running_list) {
    126		trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm,
    127			   sizeof(tsk->comm), 0);
    128	}
    129	raw_spin_unlock_irqrestore(&running_trace_lock, flags);
    130}
    131
    132static void trace_note_time(struct blk_trace *bt)
    133{
    134	struct timespec64 now;
    135	unsigned long flags;
    136	u32 words[2];
    137
    138	/* need to check user space to see if this breaks in y2038 or y2106 */
    139	ktime_get_real_ts64(&now);
    140	words[0] = (u32)now.tv_sec;
    141	words[1] = now.tv_nsec;
    142
    143	local_irq_save(flags);
    144	trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words), 0);
    145	local_irq_restore(flags);
    146}
    147
    148void __blk_trace_note_message(struct blk_trace *bt,
    149		struct cgroup_subsys_state *css, const char *fmt, ...)
    150{
    151	int n;
    152	va_list args;
    153	unsigned long flags;
    154	char *buf;
    155	u64 cgid = 0;
    156
    157	if (unlikely(bt->trace_state != Blktrace_running &&
    158		     !blk_tracer_enabled))
    159		return;
    160
    161	/*
    162	 * If the BLK_TC_NOTIFY action mask isn't set, don't send any note
    163	 * message to the trace.
    164	 */
    165	if (!(bt->act_mask & BLK_TC_NOTIFY))
    166		return;
    167
    168	local_irq_save(flags);
    169	buf = this_cpu_ptr(bt->msg_data);
    170	va_start(args, fmt);
    171	n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
    172	va_end(args);
    173
    174#ifdef CONFIG_BLK_CGROUP
    175	if (css && (blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
    176		cgid = cgroup_id(css->cgroup);
    177	else
    178		cgid = 1;
    179#endif
    180	trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, cgid);
    181	local_irq_restore(flags);
    182}
    183EXPORT_SYMBOL_GPL(__blk_trace_note_message);
    184
    185static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
    186			 pid_t pid)
    187{
    188	if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
    189		return 1;
    190	if (sector && (sector < bt->start_lba || sector > bt->end_lba))
    191		return 1;
    192	if (bt->pid && pid != bt->pid)
    193		return 1;
    194
    195	return 0;
    196}
    197
    198/*
    199 * Data direction bit lookup
    200 */
    201static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
    202				 BLK_TC_ACT(BLK_TC_WRITE) };
    203
    204#define BLK_TC_RAHEAD		BLK_TC_AHEAD
    205#define BLK_TC_PREFLUSH		BLK_TC_FLUSH
    206
    207/* The ilog2() calls fall out because they're constant */
    208#define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \
    209	  (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## __name))
    210
    211/*
    212 * The worker for the various blk_add_trace*() types. Fills out a
    213 * blk_io_trace structure and places it in a per-cpu subbuffer.
    214 */
    215static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
    216		     int op, int op_flags, u32 what, int error, int pdu_len,
    217		     void *pdu_data, u64 cgid)
    218{
    219	struct task_struct *tsk = current;
    220	struct ring_buffer_event *event = NULL;
    221	struct trace_buffer *buffer = NULL;
    222	struct blk_io_trace *t;
    223	unsigned long flags = 0;
    224	unsigned long *sequence;
    225	unsigned int trace_ctx = 0;
    226	pid_t pid;
    227	int cpu;
    228	bool blk_tracer = blk_tracer_enabled;
    229	ssize_t cgid_len = cgid ? sizeof(cgid) : 0;
    230
    231	if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer))
    232		return;
    233
    234	what |= ddir_act[op_is_write(op) ? WRITE : READ];
    235	what |= MASK_TC_BIT(op_flags, SYNC);
    236	what |= MASK_TC_BIT(op_flags, RAHEAD);
    237	what |= MASK_TC_BIT(op_flags, META);
    238	what |= MASK_TC_BIT(op_flags, PREFLUSH);
    239	what |= MASK_TC_BIT(op_flags, FUA);
    240	if (op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)
    241		what |= BLK_TC_ACT(BLK_TC_DISCARD);
    242	if (op == REQ_OP_FLUSH)
    243		what |= BLK_TC_ACT(BLK_TC_FLUSH);
    244	if (cgid)
    245		what |= __BLK_TA_CGROUP;
    246
    247	pid = tsk->pid;
    248	if (act_log_check(bt, what, sector, pid))
    249		return;
    250	cpu = raw_smp_processor_id();
    251
    252	if (blk_tracer) {
    253		tracing_record_cmdline(current);
    254
    255		buffer = blk_tr->array_buffer.buffer;
    256		trace_ctx = tracing_gen_ctx_flags(0);
    257		event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
    258						  sizeof(*t) + pdu_len + cgid_len,
    259						  trace_ctx);
    260		if (!event)
    261			return;
    262		t = ring_buffer_event_data(event);
    263		goto record_it;
    264	}
    265
    266	if (unlikely(tsk->btrace_seq != blktrace_seq))
    267		trace_note_tsk(tsk);
    268
    269	/*
    270	 * A word about the locking here - we disable interrupts to reserve
    271	 * some space in the relay per-cpu buffer, to prevent an irq
    272	 * from coming in and stepping on our toes.
    273	 */
    274	local_irq_save(flags);
    275	t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len + cgid_len);
    276	if (t) {
    277		sequence = per_cpu_ptr(bt->sequence, cpu);
    278
    279		t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
    280		t->sequence = ++(*sequence);
    281		t->time = ktime_to_ns(ktime_get());
    282record_it:
    283		/*
    284		 * These two are not needed in ftrace as they are in the
    285		 * generic trace_entry, filled by tracing_generic_entry_update,
    286		 * but for the trace_event->bin() synthesizer benefit we do it
    287		 * here too.
    288		 */
    289		t->cpu = cpu;
    290		t->pid = pid;
    291
    292		t->sector = sector;
    293		t->bytes = bytes;
    294		t->action = what;
    295		t->device = bt->dev;
    296		t->error = error;
    297		t->pdu_len = pdu_len + cgid_len;
    298
    299		if (cgid_len)
    300			memcpy((void *)t + sizeof(*t), &cgid, cgid_len);
    301		if (pdu_len)
    302			memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len);
    303
    304		if (blk_tracer) {
    305			trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx);
    306			return;
    307		}
    308	}
    309
    310	local_irq_restore(flags);
    311}
    312
    313static void blk_trace_free(struct request_queue *q, struct blk_trace *bt)
    314{
    315	relay_close(bt->rchan);
    316
    317	/*
    318	 * If 'bt->dir' is not set, then both 'dropped' and 'msg' are created
    319	 * under 'q->debugfs_dir', thus lookup and remove them.
    320	 */
    321	if (!bt->dir) {
    322		debugfs_remove(debugfs_lookup("dropped", q->debugfs_dir));
    323		debugfs_remove(debugfs_lookup("msg", q->debugfs_dir));
    324	} else {
    325		debugfs_remove(bt->dir);
    326	}
    327	free_percpu(bt->sequence);
    328	free_percpu(bt->msg_data);
    329	kfree(bt);
    330}
    331
    332static void get_probe_ref(void)
    333{
    334	mutex_lock(&blk_probe_mutex);
    335	if (++blk_probes_ref == 1)
    336		blk_register_tracepoints();
    337	mutex_unlock(&blk_probe_mutex);
    338}
    339
    340static void put_probe_ref(void)
    341{
    342	mutex_lock(&blk_probe_mutex);
    343	if (!--blk_probes_ref)
    344		blk_unregister_tracepoints();
    345	mutex_unlock(&blk_probe_mutex);
    346}
    347
    348static void blk_trace_cleanup(struct request_queue *q, struct blk_trace *bt)
    349{
    350	synchronize_rcu();
    351	blk_trace_free(q, bt);
    352	put_probe_ref();
    353}
    354
    355static int __blk_trace_remove(struct request_queue *q)
    356{
    357	struct blk_trace *bt;
    358
    359	bt = rcu_replace_pointer(q->blk_trace, NULL,
    360				 lockdep_is_held(&q->debugfs_mutex));
    361	if (!bt)
    362		return -EINVAL;
    363
    364	if (bt->trace_state != Blktrace_running)
    365		blk_trace_cleanup(q, bt);
    366
    367	return 0;
    368}
    369
    370int blk_trace_remove(struct request_queue *q)
    371{
    372	int ret;
    373
    374	mutex_lock(&q->debugfs_mutex);
    375	ret = __blk_trace_remove(q);
    376	mutex_unlock(&q->debugfs_mutex);
    377
    378	return ret;
    379}
    380EXPORT_SYMBOL_GPL(blk_trace_remove);
    381
    382static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
    383				size_t count, loff_t *ppos)
    384{
    385	struct blk_trace *bt = filp->private_data;
    386	char buf[16];
    387
    388	snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped));
    389
    390	return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
    391}
    392
    393static const struct file_operations blk_dropped_fops = {
    394	.owner =	THIS_MODULE,
    395	.open =		simple_open,
    396	.read =		blk_dropped_read,
    397	.llseek =	default_llseek,
    398};
    399
    400static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
    401				size_t count, loff_t *ppos)
    402{
    403	char *msg;
    404	struct blk_trace *bt;
    405
    406	if (count >= BLK_TN_MAX_MSG)
    407		return -EINVAL;
    408
    409	msg = memdup_user_nul(buffer, count);
    410	if (IS_ERR(msg))
    411		return PTR_ERR(msg);
    412
    413	bt = filp->private_data;
    414	__blk_trace_note_message(bt, NULL, "%s", msg);
    415	kfree(msg);
    416
    417	return count;
    418}
    419
    420static const struct file_operations blk_msg_fops = {
    421	.owner =	THIS_MODULE,
    422	.open =		simple_open,
    423	.write =	blk_msg_write,
    424	.llseek =	noop_llseek,
    425};
    426
    427/*
    428 * Keep track of how many times we encountered a full subbuffer, to aid
    429 * the user space app in telling how many lost events there were.
    430 */
    431static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
    432				     void *prev_subbuf, size_t prev_padding)
    433{
    434	struct blk_trace *bt;
    435
    436	if (!relay_buf_full(buf))
    437		return 1;
    438
    439	bt = buf->chan->private_data;
    440	atomic_inc(&bt->dropped);
    441	return 0;
    442}
    443
    444static int blk_remove_buf_file_callback(struct dentry *dentry)
    445{
    446	debugfs_remove(dentry);
    447
    448	return 0;
    449}
    450
    451static struct dentry *blk_create_buf_file_callback(const char *filename,
    452						   struct dentry *parent,
    453						   umode_t mode,
    454						   struct rchan_buf *buf,
    455						   int *is_global)
    456{
    457	return debugfs_create_file(filename, mode, parent, buf,
    458					&relay_file_operations);
    459}
    460
    461static const struct rchan_callbacks blk_relay_callbacks = {
    462	.subbuf_start		= blk_subbuf_start_callback,
    463	.create_buf_file	= blk_create_buf_file_callback,
    464	.remove_buf_file	= blk_remove_buf_file_callback,
    465};
    466
    467static void blk_trace_setup_lba(struct blk_trace *bt,
    468				struct block_device *bdev)
    469{
    470	if (bdev) {
    471		bt->start_lba = bdev->bd_start_sect;
    472		bt->end_lba = bdev->bd_start_sect + bdev_nr_sectors(bdev);
    473	} else {
    474		bt->start_lba = 0;
    475		bt->end_lba = -1ULL;
    476	}
    477}
    478
    479/*
    480 * Setup everything required to start tracing
    481 */
    482static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
    483			      struct block_device *bdev,
    484			      struct blk_user_trace_setup *buts)
    485{
    486	struct blk_trace *bt = NULL;
    487	struct dentry *dir = NULL;
    488	int ret;
    489
    490	lockdep_assert_held(&q->debugfs_mutex);
    491
    492	if (!buts->buf_size || !buts->buf_nr)
    493		return -EINVAL;
    494
    495	strncpy(buts->name, name, BLKTRACE_BDEV_SIZE);
    496	buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
    497
    498	/*
    499	 * some device names have larger paths - convert the slashes
    500	 * to underscores for this to work as expected
    501	 */
    502	strreplace(buts->name, '/', '_');
    503
    504	/*
    505	 * bdev can be NULL, as with scsi-generic, this is a helpful as
    506	 * we can be.
    507	 */
    508	if (rcu_dereference_protected(q->blk_trace,
    509				      lockdep_is_held(&q->debugfs_mutex))) {
    510		pr_warn("Concurrent blktraces are not allowed on %s\n",
    511			buts->name);
    512		return -EBUSY;
    513	}
    514
    515	bt = kzalloc(sizeof(*bt), GFP_KERNEL);
    516	if (!bt)
    517		return -ENOMEM;
    518
    519	ret = -ENOMEM;
    520	bt->sequence = alloc_percpu(unsigned long);
    521	if (!bt->sequence)
    522		goto err;
    523
    524	bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char));
    525	if (!bt->msg_data)
    526		goto err;
    527
    528	/*
    529	 * When tracing the whole disk reuse the existing debugfs directory
    530	 * created by the block layer on init. For partitions block devices,
    531	 * and scsi-generic block devices we create a temporary new debugfs
    532	 * directory that will be removed once the trace ends.
    533	 */
    534	if (bdev && !bdev_is_partition(bdev))
    535		dir = q->debugfs_dir;
    536	else
    537		bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root);
    538
    539	/*
    540	 * As blktrace relies on debugfs for its interface the debugfs directory
    541	 * is required, contrary to the usual mantra of not checking for debugfs
    542	 * files or directories.
    543	 */
    544	if (IS_ERR_OR_NULL(dir)) {
    545		pr_warn("debugfs_dir not present for %s so skipping\n",
    546			buts->name);
    547		ret = -ENOENT;
    548		goto err;
    549	}
    550
    551	bt->dev = dev;
    552	atomic_set(&bt->dropped, 0);
    553	INIT_LIST_HEAD(&bt->running_list);
    554
    555	ret = -EIO;
    556	debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops);
    557	debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
    558
    559	bt->rchan = relay_open("trace", dir, buts->buf_size,
    560				buts->buf_nr, &blk_relay_callbacks, bt);
    561	if (!bt->rchan)
    562		goto err;
    563
    564	bt->act_mask = buts->act_mask;
    565	if (!bt->act_mask)
    566		bt->act_mask = (u16) -1;
    567
    568	blk_trace_setup_lba(bt, bdev);
    569
    570	/* overwrite with user settings */
    571	if (buts->start_lba)
    572		bt->start_lba = buts->start_lba;
    573	if (buts->end_lba)
    574		bt->end_lba = buts->end_lba;
    575
    576	bt->pid = buts->pid;
    577	bt->trace_state = Blktrace_setup;
    578
    579	rcu_assign_pointer(q->blk_trace, bt);
    580	get_probe_ref();
    581
    582	ret = 0;
    583err:
    584	if (ret)
    585		blk_trace_free(q, bt);
    586	return ret;
    587}
    588
    589static int __blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
    590			     struct block_device *bdev, char __user *arg)
    591{
    592	struct blk_user_trace_setup buts;
    593	int ret;
    594
    595	ret = copy_from_user(&buts, arg, sizeof(buts));
    596	if (ret)
    597		return -EFAULT;
    598
    599	ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
    600	if (ret)
    601		return ret;
    602
    603	if (copy_to_user(arg, &buts, sizeof(buts))) {
    604		__blk_trace_remove(q);
    605		return -EFAULT;
    606	}
    607	return 0;
    608}
    609
    610int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
    611		    struct block_device *bdev,
    612		    char __user *arg)
    613{
    614	int ret;
    615
    616	mutex_lock(&q->debugfs_mutex);
    617	ret = __blk_trace_setup(q, name, dev, bdev, arg);
    618	mutex_unlock(&q->debugfs_mutex);
    619
    620	return ret;
    621}
    622EXPORT_SYMBOL_GPL(blk_trace_setup);
    623
    624#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
    625static int compat_blk_trace_setup(struct request_queue *q, char *name,
    626				  dev_t dev, struct block_device *bdev,
    627				  char __user *arg)
    628{
    629	struct blk_user_trace_setup buts;
    630	struct compat_blk_user_trace_setup cbuts;
    631	int ret;
    632
    633	if (copy_from_user(&cbuts, arg, sizeof(cbuts)))
    634		return -EFAULT;
    635
    636	buts = (struct blk_user_trace_setup) {
    637		.act_mask = cbuts.act_mask,
    638		.buf_size = cbuts.buf_size,
    639		.buf_nr = cbuts.buf_nr,
    640		.start_lba = cbuts.start_lba,
    641		.end_lba = cbuts.end_lba,
    642		.pid = cbuts.pid,
    643	};
    644
    645	ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
    646	if (ret)
    647		return ret;
    648
    649	if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) {
    650		__blk_trace_remove(q);
    651		return -EFAULT;
    652	}
    653
    654	return 0;
    655}
    656#endif
    657
    658static int __blk_trace_startstop(struct request_queue *q, int start)
    659{
    660	int ret;
    661	struct blk_trace *bt;
    662
    663	bt = rcu_dereference_protected(q->blk_trace,
    664				       lockdep_is_held(&q->debugfs_mutex));
    665	if (bt == NULL)
    666		return -EINVAL;
    667
    668	/*
    669	 * For starting a trace, we can transition from a setup or stopped
    670	 * trace. For stopping a trace, the state must be running
    671	 */
    672	ret = -EINVAL;
    673	if (start) {
    674		if (bt->trace_state == Blktrace_setup ||
    675		    bt->trace_state == Blktrace_stopped) {
    676			blktrace_seq++;
    677			smp_mb();
    678			bt->trace_state = Blktrace_running;
    679			raw_spin_lock_irq(&running_trace_lock);
    680			list_add(&bt->running_list, &running_trace_list);
    681			raw_spin_unlock_irq(&running_trace_lock);
    682
    683			trace_note_time(bt);
    684			ret = 0;
    685		}
    686	} else {
    687		if (bt->trace_state == Blktrace_running) {
    688			bt->trace_state = Blktrace_stopped;
    689			raw_spin_lock_irq(&running_trace_lock);
    690			list_del_init(&bt->running_list);
    691			raw_spin_unlock_irq(&running_trace_lock);
    692			relay_flush(bt->rchan);
    693			ret = 0;
    694		}
    695	}
    696
    697	return ret;
    698}
    699
    700int blk_trace_startstop(struct request_queue *q, int start)
    701{
    702	int ret;
    703
    704	mutex_lock(&q->debugfs_mutex);
    705	ret = __blk_trace_startstop(q, start);
    706	mutex_unlock(&q->debugfs_mutex);
    707
    708	return ret;
    709}
    710EXPORT_SYMBOL_GPL(blk_trace_startstop);
    711
    712/*
    713 * When reading or writing the blktrace sysfs files, the references to the
    714 * opened sysfs or device files should prevent the underlying block device
    715 * from being removed. So no further delete protection is really needed.
    716 */
    717
    718/**
    719 * blk_trace_ioctl: - handle the ioctls associated with tracing
    720 * @bdev:	the block device
    721 * @cmd:	the ioctl cmd
    722 * @arg:	the argument data, if any
    723 *
    724 **/
    725int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
    726{
    727	struct request_queue *q;
    728	int ret, start = 0;
    729	char b[BDEVNAME_SIZE];
    730
    731	q = bdev_get_queue(bdev);
    732	if (!q)
    733		return -ENXIO;
    734
    735	mutex_lock(&q->debugfs_mutex);
    736
    737	switch (cmd) {
    738	case BLKTRACESETUP:
    739		bdevname(bdev, b);
    740		ret = __blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
    741		break;
    742#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
    743	case BLKTRACESETUP32:
    744		bdevname(bdev, b);
    745		ret = compat_blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
    746		break;
    747#endif
    748	case BLKTRACESTART:
    749		start = 1;
    750		fallthrough;
    751	case BLKTRACESTOP:
    752		ret = __blk_trace_startstop(q, start);
    753		break;
    754	case BLKTRACETEARDOWN:
    755		ret = __blk_trace_remove(q);
    756		break;
    757	default:
    758		ret = -ENOTTY;
    759		break;
    760	}
    761
    762	mutex_unlock(&q->debugfs_mutex);
    763	return ret;
    764}
    765
    766/**
    767 * blk_trace_shutdown: - stop and cleanup trace structures
    768 * @q:    the request queue associated with the device
    769 *
    770 **/
    771void blk_trace_shutdown(struct request_queue *q)
    772{
    773	if (rcu_dereference_protected(q->blk_trace,
    774				      lockdep_is_held(&q->debugfs_mutex))) {
    775		__blk_trace_startstop(q, 0);
    776		__blk_trace_remove(q);
    777	}
    778}
    779
    780#ifdef CONFIG_BLK_CGROUP
    781static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
    782{
    783	struct cgroup_subsys_state *blkcg_css;
    784	struct blk_trace *bt;
    785
    786	/* We don't use the 'bt' value here except as an optimization... */
    787	bt = rcu_dereference_protected(q->blk_trace, 1);
    788	if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
    789		return 0;
    790
    791	blkcg_css = bio_blkcg_css(bio);
    792	if (!blkcg_css)
    793		return 0;
    794	return cgroup_id(blkcg_css->cgroup);
    795}
    796#else
    797static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
    798{
    799	return 0;
    800}
    801#endif
    802
    803static u64
    804blk_trace_request_get_cgid(struct request *rq)
    805{
    806	if (!rq->bio)
    807		return 0;
    808	/* Use the first bio */
    809	return blk_trace_bio_get_cgid(rq->q, rq->bio);
    810}
    811
    812/*
    813 * blktrace probes
    814 */
    815
    816/**
    817 * blk_add_trace_rq - Add a trace for a request oriented action
    818 * @rq:		the source request
    819 * @error:	return status to log
    820 * @nr_bytes:	number of completed bytes
    821 * @what:	the action
    822 * @cgid:	the cgroup info
    823 *
    824 * Description:
    825 *     Records an action against a request. Will log the bio offset + size.
    826 *
    827 **/
    828static void blk_add_trace_rq(struct request *rq, blk_status_t error,
    829			     unsigned int nr_bytes, u32 what, u64 cgid)
    830{
    831	struct blk_trace *bt;
    832
    833	rcu_read_lock();
    834	bt = rcu_dereference(rq->q->blk_trace);
    835	if (likely(!bt)) {
    836		rcu_read_unlock();
    837		return;
    838	}
    839
    840	if (blk_rq_is_passthrough(rq))
    841		what |= BLK_TC_ACT(BLK_TC_PC);
    842	else
    843		what |= BLK_TC_ACT(BLK_TC_FS);
    844
    845	__blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq),
    846			rq->cmd_flags, what, blk_status_to_errno(error), 0,
    847			NULL, cgid);
    848	rcu_read_unlock();
    849}
    850
    851static void blk_add_trace_rq_insert(void *ignore, struct request *rq)
    852{
    853	blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_INSERT,
    854			 blk_trace_request_get_cgid(rq));
    855}
    856
    857static void blk_add_trace_rq_issue(void *ignore, struct request *rq)
    858{
    859	blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ISSUE,
    860			 blk_trace_request_get_cgid(rq));
    861}
    862
    863static void blk_add_trace_rq_merge(void *ignore, struct request *rq)
    864{
    865	blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_BACKMERGE,
    866			 blk_trace_request_get_cgid(rq));
    867}
    868
    869static void blk_add_trace_rq_requeue(void *ignore, struct request *rq)
    870{
    871	blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_REQUEUE,
    872			 blk_trace_request_get_cgid(rq));
    873}
    874
    875static void blk_add_trace_rq_complete(void *ignore, struct request *rq,
    876			blk_status_t error, unsigned int nr_bytes)
    877{
    878	blk_add_trace_rq(rq, error, nr_bytes, BLK_TA_COMPLETE,
    879			 blk_trace_request_get_cgid(rq));
    880}
    881
    882/**
    883 * blk_add_trace_bio - Add a trace for a bio oriented action
    884 * @q:		queue the io is for
    885 * @bio:	the source bio
    886 * @what:	the action
    887 * @error:	error, if any
    888 *
    889 * Description:
    890 *     Records an action against a bio. Will log the bio offset + size.
    891 *
    892 **/
    893static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
    894			      u32 what, int error)
    895{
    896	struct blk_trace *bt;
    897
    898	rcu_read_lock();
    899	bt = rcu_dereference(q->blk_trace);
    900	if (likely(!bt)) {
    901		rcu_read_unlock();
    902		return;
    903	}
    904
    905	__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
    906			bio_op(bio), bio->bi_opf, what, error, 0, NULL,
    907			blk_trace_bio_get_cgid(q, bio));
    908	rcu_read_unlock();
    909}
    910
    911static void blk_add_trace_bio_bounce(void *ignore, struct bio *bio)
    912{
    913	blk_add_trace_bio(bio->bi_bdev->bd_disk->queue, bio, BLK_TA_BOUNCE, 0);
    914}
    915
    916static void blk_add_trace_bio_complete(void *ignore,
    917				       struct request_queue *q, struct bio *bio)
    918{
    919	blk_add_trace_bio(q, bio, BLK_TA_COMPLETE,
    920			  blk_status_to_errno(bio->bi_status));
    921}
    922
    923static void blk_add_trace_bio_backmerge(void *ignore, struct bio *bio)
    924{
    925	blk_add_trace_bio(bio->bi_bdev->bd_disk->queue, bio, BLK_TA_BACKMERGE,
    926			0);
    927}
    928
    929static void blk_add_trace_bio_frontmerge(void *ignore, struct bio *bio)
    930{
    931	blk_add_trace_bio(bio->bi_bdev->bd_disk->queue, bio, BLK_TA_FRONTMERGE,
    932			0);
    933}
    934
    935static void blk_add_trace_bio_queue(void *ignore, struct bio *bio)
    936{
    937	blk_add_trace_bio(bio->bi_bdev->bd_disk->queue, bio, BLK_TA_QUEUE, 0);
    938}
    939
    940static void blk_add_trace_getrq(void *ignore, struct bio *bio)
    941{
    942	blk_add_trace_bio(bio->bi_bdev->bd_disk->queue, bio, BLK_TA_GETRQ, 0);
    943}
    944
    945static void blk_add_trace_plug(void *ignore, struct request_queue *q)
    946{
    947	struct blk_trace *bt;
    948
    949	rcu_read_lock();
    950	bt = rcu_dereference(q->blk_trace);
    951	if (bt)
    952		__blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0);
    953	rcu_read_unlock();
    954}
    955
    956static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
    957				    unsigned int depth, bool explicit)
    958{
    959	struct blk_trace *bt;
    960
    961	rcu_read_lock();
    962	bt = rcu_dereference(q->blk_trace);
    963	if (bt) {
    964		__be64 rpdu = cpu_to_be64(depth);
    965		u32 what;
    966
    967		if (explicit)
    968			what = BLK_TA_UNPLUG_IO;
    969		else
    970			what = BLK_TA_UNPLUG_TIMER;
    971
    972		__blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0);
    973	}
    974	rcu_read_unlock();
    975}
    976
    977static void blk_add_trace_split(void *ignore, struct bio *bio, unsigned int pdu)
    978{
    979	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
    980	struct blk_trace *bt;
    981
    982	rcu_read_lock();
    983	bt = rcu_dereference(q->blk_trace);
    984	if (bt) {
    985		__be64 rpdu = cpu_to_be64(pdu);
    986
    987		__blk_add_trace(bt, bio->bi_iter.bi_sector,
    988				bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf,
    989				BLK_TA_SPLIT,
    990				blk_status_to_errno(bio->bi_status),
    991				sizeof(rpdu), &rpdu,
    992				blk_trace_bio_get_cgid(q, bio));
    993	}
    994	rcu_read_unlock();
    995}
    996
    997/**
    998 * blk_add_trace_bio_remap - Add a trace for a bio-remap operation
    999 * @ignore:	trace callback data parameter (not used)
   1000 * @bio:	the source bio
   1001 * @dev:	source device
   1002 * @from:	source sector
   1003 *
   1004 * Called after a bio is remapped to a different device and/or sector.
   1005 **/
   1006static void blk_add_trace_bio_remap(void *ignore, struct bio *bio, dev_t dev,
   1007				    sector_t from)
   1008{
   1009	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
   1010	struct blk_trace *bt;
   1011	struct blk_io_trace_remap r;
   1012
   1013	rcu_read_lock();
   1014	bt = rcu_dereference(q->blk_trace);
   1015	if (likely(!bt)) {
   1016		rcu_read_unlock();
   1017		return;
   1018	}
   1019
   1020	r.device_from = cpu_to_be32(dev);
   1021	r.device_to   = cpu_to_be32(bio_dev(bio));
   1022	r.sector_from = cpu_to_be64(from);
   1023
   1024	__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
   1025			bio_op(bio), bio->bi_opf, BLK_TA_REMAP,
   1026			blk_status_to_errno(bio->bi_status),
   1027			sizeof(r), &r, blk_trace_bio_get_cgid(q, bio));
   1028	rcu_read_unlock();
   1029}
   1030
   1031/**
   1032 * blk_add_trace_rq_remap - Add a trace for a request-remap operation
   1033 * @ignore:	trace callback data parameter (not used)
   1034 * @rq:		the source request
   1035 * @dev:	target device
   1036 * @from:	source sector
   1037 *
   1038 * Description:
   1039 *     Device mapper remaps request to other devices.
   1040 *     Add a trace for that action.
   1041 *
   1042 **/
   1043static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev,
   1044				   sector_t from)
   1045{
   1046	struct blk_trace *bt;
   1047	struct blk_io_trace_remap r;
   1048
   1049	rcu_read_lock();
   1050	bt = rcu_dereference(rq->q->blk_trace);
   1051	if (likely(!bt)) {
   1052		rcu_read_unlock();
   1053		return;
   1054	}
   1055
   1056	r.device_from = cpu_to_be32(dev);
   1057	r.device_to   = cpu_to_be32(disk_devt(rq->q->disk));
   1058	r.sector_from = cpu_to_be64(from);
   1059
   1060	__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
   1061			rq_data_dir(rq), 0, BLK_TA_REMAP, 0,
   1062			sizeof(r), &r, blk_trace_request_get_cgid(rq));
   1063	rcu_read_unlock();
   1064}
   1065
   1066/**
   1067 * blk_add_driver_data - Add binary message with driver-specific data
   1068 * @rq:		io request
   1069 * @data:	driver-specific data
   1070 * @len:	length of driver-specific data
   1071 *
   1072 * Description:
   1073 *     Some drivers might want to write driver-specific data per request.
   1074 *
   1075 **/
   1076void blk_add_driver_data(struct request *rq, void *data, size_t len)
   1077{
   1078	struct blk_trace *bt;
   1079
   1080	rcu_read_lock();
   1081	bt = rcu_dereference(rq->q->blk_trace);
   1082	if (likely(!bt)) {
   1083		rcu_read_unlock();
   1084		return;
   1085	}
   1086
   1087	__blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0,
   1088				BLK_TA_DRV_DATA, 0, len, data,
   1089				blk_trace_request_get_cgid(rq));
   1090	rcu_read_unlock();
   1091}
   1092EXPORT_SYMBOL_GPL(blk_add_driver_data);
   1093
   1094static void blk_register_tracepoints(void)
   1095{
   1096	int ret;
   1097
   1098	ret = register_trace_block_rq_insert(blk_add_trace_rq_insert, NULL);
   1099	WARN_ON(ret);
   1100	ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL);
   1101	WARN_ON(ret);
   1102	ret = register_trace_block_rq_merge(blk_add_trace_rq_merge, NULL);
   1103	WARN_ON(ret);
   1104	ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL);
   1105	WARN_ON(ret);
   1106	ret = register_trace_block_rq_complete(blk_add_trace_rq_complete, NULL);
   1107	WARN_ON(ret);
   1108	ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL);
   1109	WARN_ON(ret);
   1110	ret = register_trace_block_bio_complete(blk_add_trace_bio_complete, NULL);
   1111	WARN_ON(ret);
   1112	ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL);
   1113	WARN_ON(ret);
   1114	ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL);
   1115	WARN_ON(ret);
   1116	ret = register_trace_block_bio_queue(blk_add_trace_bio_queue, NULL);
   1117	WARN_ON(ret);
   1118	ret = register_trace_block_getrq(blk_add_trace_getrq, NULL);
   1119	WARN_ON(ret);
   1120	ret = register_trace_block_plug(blk_add_trace_plug, NULL);
   1121	WARN_ON(ret);
   1122	ret = register_trace_block_unplug(blk_add_trace_unplug, NULL);
   1123	WARN_ON(ret);
   1124	ret = register_trace_block_split(blk_add_trace_split, NULL);
   1125	WARN_ON(ret);
   1126	ret = register_trace_block_bio_remap(blk_add_trace_bio_remap, NULL);
   1127	WARN_ON(ret);
   1128	ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
   1129	WARN_ON(ret);
   1130}
   1131
   1132static void blk_unregister_tracepoints(void)
   1133{
   1134	unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
   1135	unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL);
   1136	unregister_trace_block_split(blk_add_trace_split, NULL);
   1137	unregister_trace_block_unplug(blk_add_trace_unplug, NULL);
   1138	unregister_trace_block_plug(blk_add_trace_plug, NULL);
   1139	unregister_trace_block_getrq(blk_add_trace_getrq, NULL);
   1140	unregister_trace_block_bio_queue(blk_add_trace_bio_queue, NULL);
   1141	unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL);
   1142	unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL);
   1143	unregister_trace_block_bio_complete(blk_add_trace_bio_complete, NULL);
   1144	unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL);
   1145	unregister_trace_block_rq_complete(blk_add_trace_rq_complete, NULL);
   1146	unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL);
   1147	unregister_trace_block_rq_merge(blk_add_trace_rq_merge, NULL);
   1148	unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL);
   1149	unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL);
   1150
   1151	tracepoint_synchronize_unregister();
   1152}
   1153
   1154/*
   1155 * struct blk_io_tracer formatting routines
   1156 */
   1157
   1158static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
   1159{
   1160	int i = 0;
   1161	int tc = t->action >> BLK_TC_SHIFT;
   1162
   1163	if ((t->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE) {
   1164		rwbs[i++] = 'N';
   1165		goto out;
   1166	}
   1167
   1168	if (tc & BLK_TC_FLUSH)
   1169		rwbs[i++] = 'F';
   1170
   1171	if (tc & BLK_TC_DISCARD)
   1172		rwbs[i++] = 'D';
   1173	else if (tc & BLK_TC_WRITE)
   1174		rwbs[i++] = 'W';
   1175	else if (t->bytes)
   1176		rwbs[i++] = 'R';
   1177	else
   1178		rwbs[i++] = 'N';
   1179
   1180	if (tc & BLK_TC_FUA)
   1181		rwbs[i++] = 'F';
   1182	if (tc & BLK_TC_AHEAD)
   1183		rwbs[i++] = 'A';
   1184	if (tc & BLK_TC_SYNC)
   1185		rwbs[i++] = 'S';
   1186	if (tc & BLK_TC_META)
   1187		rwbs[i++] = 'M';
   1188out:
   1189	rwbs[i] = '\0';
   1190}
   1191
   1192static inline
   1193const struct blk_io_trace *te_blk_io_trace(const struct trace_entry *ent)
   1194{
   1195	return (const struct blk_io_trace *)ent;
   1196}
   1197
   1198static inline const void *pdu_start(const struct trace_entry *ent, bool has_cg)
   1199{
   1200	return (void *)(te_blk_io_trace(ent) + 1) + (has_cg ? sizeof(u64) : 0);
   1201}
   1202
   1203static inline u64 t_cgid(const struct trace_entry *ent)
   1204{
   1205	return *(u64 *)(te_blk_io_trace(ent) + 1);
   1206}
   1207
   1208static inline int pdu_real_len(const struct trace_entry *ent, bool has_cg)
   1209{
   1210	return te_blk_io_trace(ent)->pdu_len - (has_cg ? sizeof(u64) : 0);
   1211}
   1212
   1213static inline u32 t_action(const struct trace_entry *ent)
   1214{
   1215	return te_blk_io_trace(ent)->action;
   1216}
   1217
   1218static inline u32 t_bytes(const struct trace_entry *ent)
   1219{
   1220	return te_blk_io_trace(ent)->bytes;
   1221}
   1222
   1223static inline u32 t_sec(const struct trace_entry *ent)
   1224{
   1225	return te_blk_io_trace(ent)->bytes >> 9;
   1226}
   1227
   1228static inline unsigned long long t_sector(const struct trace_entry *ent)
   1229{
   1230	return te_blk_io_trace(ent)->sector;
   1231}
   1232
   1233static inline __u16 t_error(const struct trace_entry *ent)
   1234{
   1235	return te_blk_io_trace(ent)->error;
   1236}
   1237
   1238static __u64 get_pdu_int(const struct trace_entry *ent, bool has_cg)
   1239{
   1240	const __be64 *val = pdu_start(ent, has_cg);
   1241	return be64_to_cpu(*val);
   1242}
   1243
   1244typedef void (blk_log_action_t) (struct trace_iterator *iter, const char *act,
   1245	bool has_cg);
   1246
   1247static void blk_log_action_classic(struct trace_iterator *iter, const char *act,
   1248	bool has_cg)
   1249{
   1250	char rwbs[RWBS_LEN];
   1251	unsigned long long ts  = iter->ts;
   1252	unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC);
   1253	unsigned secs	       = (unsigned long)ts;
   1254	const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
   1255
   1256	fill_rwbs(rwbs, t);
   1257
   1258	trace_seq_printf(&iter->seq,
   1259			 "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ",
   1260			 MAJOR(t->device), MINOR(t->device), iter->cpu,
   1261			 secs, nsec_rem, iter->ent->pid, act, rwbs);
   1262}
   1263
   1264static void blk_log_action(struct trace_iterator *iter, const char *act,
   1265	bool has_cg)
   1266{
   1267	char rwbs[RWBS_LEN];
   1268	const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
   1269
   1270	fill_rwbs(rwbs, t);
   1271	if (has_cg) {
   1272		u64 id = t_cgid(iter->ent);
   1273
   1274		if (blk_tracer_flags.val & TRACE_BLK_OPT_CGNAME) {
   1275			char blkcg_name_buf[NAME_MAX + 1] = "<...>";
   1276
   1277			cgroup_path_from_kernfs_id(id, blkcg_name_buf,
   1278				sizeof(blkcg_name_buf));
   1279			trace_seq_printf(&iter->seq, "%3d,%-3d %s %2s %3s ",
   1280				 MAJOR(t->device), MINOR(t->device),
   1281				 blkcg_name_buf, act, rwbs);
   1282		} else {
   1283			/*
   1284			 * The cgid portion used to be "INO,GEN".  Userland
   1285			 * builds a FILEID_INO32_GEN fid out of them and
   1286			 * opens the cgroup using open_by_handle_at(2).
   1287			 * While 32bit ino setups are still the same, 64bit
   1288			 * ones now use the 64bit ino as the whole ID and
   1289			 * no longer use generation.
   1290			 *
   1291			 * Regardless of the content, always output
   1292			 * "LOW32,HIGH32" so that FILEID_INO32_GEN fid can
   1293			 * be mapped back to @id on both 64 and 32bit ino
   1294			 * setups.  See __kernfs_fh_to_dentry().
   1295			 */
   1296			trace_seq_printf(&iter->seq,
   1297				 "%3d,%-3d %llx,%-llx %2s %3s ",
   1298				 MAJOR(t->device), MINOR(t->device),
   1299				 id & U32_MAX, id >> 32, act, rwbs);
   1300		}
   1301	} else
   1302		trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ",
   1303				 MAJOR(t->device), MINOR(t->device), act, rwbs);
   1304}
   1305
   1306static void blk_log_dump_pdu(struct trace_seq *s,
   1307	const struct trace_entry *ent, bool has_cg)
   1308{
   1309	const unsigned char *pdu_buf;
   1310	int pdu_len;
   1311	int i, end;
   1312
   1313	pdu_buf = pdu_start(ent, has_cg);
   1314	pdu_len = pdu_real_len(ent, has_cg);
   1315
   1316	if (!pdu_len)
   1317		return;
   1318
   1319	/* find the last zero that needs to be printed */
   1320	for (end = pdu_len - 1; end >= 0; end--)
   1321		if (pdu_buf[end])
   1322			break;
   1323	end++;
   1324
   1325	trace_seq_putc(s, '(');
   1326
   1327	for (i = 0; i < pdu_len; i++) {
   1328
   1329		trace_seq_printf(s, "%s%02x",
   1330				 i == 0 ? "" : " ", pdu_buf[i]);
   1331
   1332		/*
   1333		 * stop when the rest is just zeros and indicate so
   1334		 * with a ".." appended
   1335		 */
   1336		if (i == end && end != pdu_len - 1) {
   1337			trace_seq_puts(s, " ..) ");
   1338			return;
   1339		}
   1340	}
   1341
   1342	trace_seq_puts(s, ") ");
   1343}
   1344
   1345static void blk_log_generic(struct trace_seq *s, const struct trace_entry *ent, bool has_cg)
   1346{
   1347	char cmd[TASK_COMM_LEN];
   1348
   1349	trace_find_cmdline(ent->pid, cmd);
   1350
   1351	if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) {
   1352		trace_seq_printf(s, "%u ", t_bytes(ent));
   1353		blk_log_dump_pdu(s, ent, has_cg);
   1354		trace_seq_printf(s, "[%s]\n", cmd);
   1355	} else {
   1356		if (t_sec(ent))
   1357			trace_seq_printf(s, "%llu + %u [%s]\n",
   1358						t_sector(ent), t_sec(ent), cmd);
   1359		else
   1360			trace_seq_printf(s, "[%s]\n", cmd);
   1361	}
   1362}
   1363
   1364static void blk_log_with_error(struct trace_seq *s,
   1365			      const struct trace_entry *ent, bool has_cg)
   1366{
   1367	if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) {
   1368		blk_log_dump_pdu(s, ent, has_cg);
   1369		trace_seq_printf(s, "[%d]\n", t_error(ent));
   1370	} else {
   1371		if (t_sec(ent))
   1372			trace_seq_printf(s, "%llu + %u [%d]\n",
   1373					 t_sector(ent),
   1374					 t_sec(ent), t_error(ent));
   1375		else
   1376			trace_seq_printf(s, "%llu [%d]\n",
   1377					 t_sector(ent), t_error(ent));
   1378	}
   1379}
   1380
   1381static void blk_log_remap(struct trace_seq *s, const struct trace_entry *ent, bool has_cg)
   1382{
   1383	const struct blk_io_trace_remap *__r = pdu_start(ent, has_cg);
   1384
   1385	trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n",
   1386			 t_sector(ent), t_sec(ent),
   1387			 MAJOR(be32_to_cpu(__r->device_from)),
   1388			 MINOR(be32_to_cpu(__r->device_from)),
   1389			 be64_to_cpu(__r->sector_from));
   1390}
   1391
   1392static void blk_log_plug(struct trace_seq *s, const struct trace_entry *ent, bool has_cg)
   1393{
   1394	char cmd[TASK_COMM_LEN];
   1395
   1396	trace_find_cmdline(ent->pid, cmd);
   1397
   1398	trace_seq_printf(s, "[%s]\n", cmd);
   1399}
   1400
   1401static void blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent, bool has_cg)
   1402{
   1403	char cmd[TASK_COMM_LEN];
   1404
   1405	trace_find_cmdline(ent->pid, cmd);
   1406
   1407	trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(ent, has_cg));
   1408}
   1409
   1410static void blk_log_split(struct trace_seq *s, const struct trace_entry *ent, bool has_cg)
   1411{
   1412	char cmd[TASK_COMM_LEN];
   1413
   1414	trace_find_cmdline(ent->pid, cmd);
   1415
   1416	trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent),
   1417			 get_pdu_int(ent, has_cg), cmd);
   1418}
   1419
   1420static void blk_log_msg(struct trace_seq *s, const struct trace_entry *ent,
   1421			bool has_cg)
   1422{
   1423
   1424	trace_seq_putmem(s, pdu_start(ent, has_cg),
   1425		pdu_real_len(ent, has_cg));
   1426	trace_seq_putc(s, '\n');
   1427}
   1428
   1429/*
   1430 * struct tracer operations
   1431 */
   1432
   1433static void blk_tracer_print_header(struct seq_file *m)
   1434{
   1435	if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
   1436		return;
   1437	seq_puts(m, "# DEV   CPU TIMESTAMP     PID ACT FLG\n"
   1438		    "#  |     |     |           |   |   |\n");
   1439}
   1440
   1441static void blk_tracer_start(struct trace_array *tr)
   1442{
   1443	blk_tracer_enabled = true;
   1444}
   1445
   1446static int blk_tracer_init(struct trace_array *tr)
   1447{
   1448	blk_tr = tr;
   1449	blk_tracer_start(tr);
   1450	return 0;
   1451}
   1452
   1453static void blk_tracer_stop(struct trace_array *tr)
   1454{
   1455	blk_tracer_enabled = false;
   1456}
   1457
   1458static void blk_tracer_reset(struct trace_array *tr)
   1459{
   1460	blk_tracer_stop(tr);
   1461}
   1462
   1463static const struct {
   1464	const char *act[2];
   1465	void	   (*print)(struct trace_seq *s, const struct trace_entry *ent,
   1466			    bool has_cg);
   1467} what2act[] = {
   1468	[__BLK_TA_QUEUE]	= {{  "Q", "queue" },	   blk_log_generic },
   1469	[__BLK_TA_BACKMERGE]	= {{  "M", "backmerge" },  blk_log_generic },
   1470	[__BLK_TA_FRONTMERGE]	= {{  "F", "frontmerge" }, blk_log_generic },
   1471	[__BLK_TA_GETRQ]	= {{  "G", "getrq" },	   blk_log_generic },
   1472	[__BLK_TA_SLEEPRQ]	= {{  "S", "sleeprq" },	   blk_log_generic },
   1473	[__BLK_TA_REQUEUE]	= {{  "R", "requeue" },	   blk_log_with_error },
   1474	[__BLK_TA_ISSUE]	= {{  "D", "issue" },	   blk_log_generic },
   1475	[__BLK_TA_COMPLETE]	= {{  "C", "complete" },   blk_log_with_error },
   1476	[__BLK_TA_PLUG]		= {{  "P", "plug" },	   blk_log_plug },
   1477	[__BLK_TA_UNPLUG_IO]	= {{  "U", "unplug_io" },  blk_log_unplug },
   1478	[__BLK_TA_UNPLUG_TIMER]	= {{ "UT", "unplug_timer" }, blk_log_unplug },
   1479	[__BLK_TA_INSERT]	= {{  "I", "insert" },	   blk_log_generic },
   1480	[__BLK_TA_SPLIT]	= {{  "X", "split" },	   blk_log_split },
   1481	[__BLK_TA_BOUNCE]	= {{  "B", "bounce" },	   blk_log_generic },
   1482	[__BLK_TA_REMAP]	= {{  "A", "remap" },	   blk_log_remap },
   1483};
   1484
   1485static enum print_line_t print_one_line(struct trace_iterator *iter,
   1486					bool classic)
   1487{
   1488	struct trace_array *tr = iter->tr;
   1489	struct trace_seq *s = &iter->seq;
   1490	const struct blk_io_trace *t;
   1491	u16 what;
   1492	bool long_act;
   1493	blk_log_action_t *log_action;
   1494	bool has_cg;
   1495
   1496	t	   = te_blk_io_trace(iter->ent);
   1497	what	   = (t->action & ((1 << BLK_TC_SHIFT) - 1)) & ~__BLK_TA_CGROUP;
   1498	long_act   = !!(tr->trace_flags & TRACE_ITER_VERBOSE);
   1499	log_action = classic ? &blk_log_action_classic : &blk_log_action;
   1500	has_cg	   = t->action & __BLK_TA_CGROUP;
   1501
   1502	if ((t->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE) {
   1503		log_action(iter, long_act ? "message" : "m", has_cg);
   1504		blk_log_msg(s, iter->ent, has_cg);
   1505		return trace_handle_return(s);
   1506	}
   1507
   1508	if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act)))
   1509		trace_seq_printf(s, "Unknown action %x\n", what);
   1510	else {
   1511		log_action(iter, what2act[what].act[long_act], has_cg);
   1512		what2act[what].print(s, iter->ent, has_cg);
   1513	}
   1514
   1515	return trace_handle_return(s);
   1516}
   1517
   1518static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
   1519					       int flags, struct trace_event *event)
   1520{
   1521	return print_one_line(iter, false);
   1522}
   1523
   1524static void blk_trace_synthesize_old_trace(struct trace_iterator *iter)
   1525{
   1526	struct trace_seq *s = &iter->seq;
   1527	struct blk_io_trace *t = (struct blk_io_trace *)iter->ent;
   1528	const int offset = offsetof(struct blk_io_trace, sector);
   1529	struct blk_io_trace old = {
   1530		.magic	  = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION,
   1531		.time     = iter->ts,
   1532	};
   1533
   1534	trace_seq_putmem(s, &old, offset);
   1535	trace_seq_putmem(s, &t->sector,
   1536			 sizeof(old) - offset + t->pdu_len);
   1537}
   1538
   1539static enum print_line_t
   1540blk_trace_event_print_binary(struct trace_iterator *iter, int flags,
   1541			     struct trace_event *event)
   1542{
   1543	blk_trace_synthesize_old_trace(iter);
   1544
   1545	return trace_handle_return(&iter->seq);
   1546}
   1547
   1548static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)
   1549{
   1550	if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
   1551		return TRACE_TYPE_UNHANDLED;
   1552
   1553	return print_one_line(iter, true);
   1554}
   1555
   1556static int
   1557blk_tracer_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
   1558{
   1559	/* don't output context-info for blk_classic output */
   1560	if (bit == TRACE_BLK_OPT_CLASSIC) {
   1561		if (set)
   1562			tr->trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
   1563		else
   1564			tr->trace_flags |= TRACE_ITER_CONTEXT_INFO;
   1565	}
   1566	return 0;
   1567}
   1568
   1569static struct tracer blk_tracer __read_mostly = {
   1570	.name		= "blk",
   1571	.init		= blk_tracer_init,
   1572	.reset		= blk_tracer_reset,
   1573	.start		= blk_tracer_start,
   1574	.stop		= blk_tracer_stop,
   1575	.print_header	= blk_tracer_print_header,
   1576	.print_line	= blk_tracer_print_line,
   1577	.flags		= &blk_tracer_flags,
   1578	.set_flag	= blk_tracer_set_flag,
   1579};
   1580
   1581static struct trace_event_functions trace_blk_event_funcs = {
   1582	.trace		= blk_trace_event_print,
   1583	.binary		= blk_trace_event_print_binary,
   1584};
   1585
   1586static struct trace_event trace_blk_event = {
   1587	.type		= TRACE_BLK,
   1588	.funcs		= &trace_blk_event_funcs,
   1589};
   1590
   1591static int __init init_blk_tracer(void)
   1592{
   1593	if (!register_trace_event(&trace_blk_event)) {
   1594		pr_warn("Warning: could not register block events\n");
   1595		return 1;
   1596	}
   1597
   1598	if (register_tracer(&blk_tracer) != 0) {
   1599		pr_warn("Warning: could not register the block tracer\n");
   1600		unregister_trace_event(&trace_blk_event);
   1601		return 1;
   1602	}
   1603
   1604	return 0;
   1605}
   1606
   1607device_initcall(init_blk_tracer);
   1608
   1609static int blk_trace_remove_queue(struct request_queue *q)
   1610{
   1611	struct blk_trace *bt;
   1612
   1613	bt = rcu_replace_pointer(q->blk_trace, NULL,
   1614				 lockdep_is_held(&q->debugfs_mutex));
   1615	if (bt == NULL)
   1616		return -EINVAL;
   1617
   1618	if (bt->trace_state == Blktrace_running) {
   1619		bt->trace_state = Blktrace_stopped;
   1620		raw_spin_lock_irq(&running_trace_lock);
   1621		list_del_init(&bt->running_list);
   1622		raw_spin_unlock_irq(&running_trace_lock);
   1623		relay_flush(bt->rchan);
   1624	}
   1625
   1626	put_probe_ref();
   1627	synchronize_rcu();
   1628	blk_trace_free(q, bt);
   1629	return 0;
   1630}
   1631
   1632/*
   1633 * Setup everything required to start tracing
   1634 */
   1635static int blk_trace_setup_queue(struct request_queue *q,
   1636				 struct block_device *bdev)
   1637{
   1638	struct blk_trace *bt = NULL;
   1639	int ret = -ENOMEM;
   1640
   1641	bt = kzalloc(sizeof(*bt), GFP_KERNEL);
   1642	if (!bt)
   1643		return -ENOMEM;
   1644
   1645	bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char));
   1646	if (!bt->msg_data)
   1647		goto free_bt;
   1648
   1649	bt->dev = bdev->bd_dev;
   1650	bt->act_mask = (u16)-1;
   1651
   1652	blk_trace_setup_lba(bt, bdev);
   1653
   1654	rcu_assign_pointer(q->blk_trace, bt);
   1655	get_probe_ref();
   1656	return 0;
   1657
   1658free_bt:
   1659	blk_trace_free(q, bt);
   1660	return ret;
   1661}
   1662
   1663/*
   1664 * sysfs interface to enable and configure tracing
   1665 */
   1666
   1667static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
   1668					 struct device_attribute *attr,
   1669					 char *buf);
   1670static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
   1671					  struct device_attribute *attr,
   1672					  const char *buf, size_t count);
   1673#define BLK_TRACE_DEVICE_ATTR(_name) \
   1674	DEVICE_ATTR(_name, S_IRUGO | S_IWUSR, \
   1675		    sysfs_blk_trace_attr_show, \
   1676		    sysfs_blk_trace_attr_store)
   1677
   1678static BLK_TRACE_DEVICE_ATTR(enable);
   1679static BLK_TRACE_DEVICE_ATTR(act_mask);
   1680static BLK_TRACE_DEVICE_ATTR(pid);
   1681static BLK_TRACE_DEVICE_ATTR(start_lba);
   1682static BLK_TRACE_DEVICE_ATTR(end_lba);
   1683
   1684static struct attribute *blk_trace_attrs[] = {
   1685	&dev_attr_enable.attr,
   1686	&dev_attr_act_mask.attr,
   1687	&dev_attr_pid.attr,
   1688	&dev_attr_start_lba.attr,
   1689	&dev_attr_end_lba.attr,
   1690	NULL
   1691};
   1692
   1693struct attribute_group blk_trace_attr_group = {
   1694	.name  = "trace",
   1695	.attrs = blk_trace_attrs,
   1696};
   1697
   1698static const struct {
   1699	int mask;
   1700	const char *str;
   1701} mask_maps[] = {
   1702	{ BLK_TC_READ,		"read"		},
   1703	{ BLK_TC_WRITE,		"write"		},
   1704	{ BLK_TC_FLUSH,		"flush"		},
   1705	{ BLK_TC_SYNC,		"sync"		},
   1706	{ BLK_TC_QUEUE,		"queue"		},
   1707	{ BLK_TC_REQUEUE,	"requeue"	},
   1708	{ BLK_TC_ISSUE,		"issue"		},
   1709	{ BLK_TC_COMPLETE,	"complete"	},
   1710	{ BLK_TC_FS,		"fs"		},
   1711	{ BLK_TC_PC,		"pc"		},
   1712	{ BLK_TC_NOTIFY,	"notify"	},
   1713	{ BLK_TC_AHEAD,		"ahead"		},
   1714	{ BLK_TC_META,		"meta"		},
   1715	{ BLK_TC_DISCARD,	"discard"	},
   1716	{ BLK_TC_DRV_DATA,	"drv_data"	},
   1717	{ BLK_TC_FUA,		"fua"		},
   1718};
   1719
   1720static int blk_trace_str2mask(const char *str)
   1721{
   1722	int i;
   1723	int mask = 0;
   1724	char *buf, *s, *token;
   1725
   1726	buf = kstrdup(str, GFP_KERNEL);
   1727	if (buf == NULL)
   1728		return -ENOMEM;
   1729	s = strstrip(buf);
   1730
   1731	while (1) {
   1732		token = strsep(&s, ",");
   1733		if (token == NULL)
   1734			break;
   1735
   1736		if (*token == '\0')
   1737			continue;
   1738
   1739		for (i = 0; i < ARRAY_SIZE(mask_maps); i++) {
   1740			if (strcasecmp(token, mask_maps[i].str) == 0) {
   1741				mask |= mask_maps[i].mask;
   1742				break;
   1743			}
   1744		}
   1745		if (i == ARRAY_SIZE(mask_maps)) {
   1746			mask = -EINVAL;
   1747			break;
   1748		}
   1749	}
   1750	kfree(buf);
   1751
   1752	return mask;
   1753}
   1754
   1755static ssize_t blk_trace_mask2str(char *buf, int mask)
   1756{
   1757	int i;
   1758	char *p = buf;
   1759
   1760	for (i = 0; i < ARRAY_SIZE(mask_maps); i++) {
   1761		if (mask & mask_maps[i].mask) {
   1762			p += sprintf(p, "%s%s",
   1763				    (p == buf) ? "" : ",", mask_maps[i].str);
   1764		}
   1765	}
   1766	*p++ = '\n';
   1767
   1768	return p - buf;
   1769}
   1770
   1771static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
   1772					 struct device_attribute *attr,
   1773					 char *buf)
   1774{
   1775	struct block_device *bdev = dev_to_bdev(dev);
   1776	struct request_queue *q = bdev_get_queue(bdev);
   1777	struct blk_trace *bt;
   1778	ssize_t ret = -ENXIO;
   1779
   1780	mutex_lock(&q->debugfs_mutex);
   1781
   1782	bt = rcu_dereference_protected(q->blk_trace,
   1783				       lockdep_is_held(&q->debugfs_mutex));
   1784	if (attr == &dev_attr_enable) {
   1785		ret = sprintf(buf, "%u\n", !!bt);
   1786		goto out_unlock_bdev;
   1787	}
   1788
   1789	if (bt == NULL)
   1790		ret = sprintf(buf, "disabled\n");
   1791	else if (attr == &dev_attr_act_mask)
   1792		ret = blk_trace_mask2str(buf, bt->act_mask);
   1793	else if (attr == &dev_attr_pid)
   1794		ret = sprintf(buf, "%u\n", bt->pid);
   1795	else if (attr == &dev_attr_start_lba)
   1796		ret = sprintf(buf, "%llu\n", bt->start_lba);
   1797	else if (attr == &dev_attr_end_lba)
   1798		ret = sprintf(buf, "%llu\n", bt->end_lba);
   1799
   1800out_unlock_bdev:
   1801	mutex_unlock(&q->debugfs_mutex);
   1802	return ret;
   1803}
   1804
   1805static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
   1806					  struct device_attribute *attr,
   1807					  const char *buf, size_t count)
   1808{
   1809	struct block_device *bdev = dev_to_bdev(dev);
   1810	struct request_queue *q = bdev_get_queue(bdev);
   1811	struct blk_trace *bt;
   1812	u64 value;
   1813	ssize_t ret = -EINVAL;
   1814
   1815	if (count == 0)
   1816		goto out;
   1817
   1818	if (attr == &dev_attr_act_mask) {
   1819		if (kstrtoull(buf, 0, &value)) {
   1820			/* Assume it is a list of trace category names */
   1821			ret = blk_trace_str2mask(buf);
   1822			if (ret < 0)
   1823				goto out;
   1824			value = ret;
   1825		}
   1826	} else {
   1827		if (kstrtoull(buf, 0, &value))
   1828			goto out;
   1829	}
   1830
   1831	mutex_lock(&q->debugfs_mutex);
   1832
   1833	bt = rcu_dereference_protected(q->blk_trace,
   1834				       lockdep_is_held(&q->debugfs_mutex));
   1835	if (attr == &dev_attr_enable) {
   1836		if (!!value == !!bt) {
   1837			ret = 0;
   1838			goto out_unlock_bdev;
   1839		}
   1840		if (value)
   1841			ret = blk_trace_setup_queue(q, bdev);
   1842		else
   1843			ret = blk_trace_remove_queue(q);
   1844		goto out_unlock_bdev;
   1845	}
   1846
   1847	ret = 0;
   1848	if (bt == NULL) {
   1849		ret = blk_trace_setup_queue(q, bdev);
   1850		bt = rcu_dereference_protected(q->blk_trace,
   1851				lockdep_is_held(&q->debugfs_mutex));
   1852	}
   1853
   1854	if (ret == 0) {
   1855		if (attr == &dev_attr_act_mask)
   1856			bt->act_mask = value;
   1857		else if (attr == &dev_attr_pid)
   1858			bt->pid = value;
   1859		else if (attr == &dev_attr_start_lba)
   1860			bt->start_lba = value;
   1861		else if (attr == &dev_attr_end_lba)
   1862			bt->end_lba = value;
   1863	}
   1864
   1865out_unlock_bdev:
   1866	mutex_unlock(&q->debugfs_mutex);
   1867out:
   1868	return ret ? ret : count;
   1869}
   1870
   1871int blk_trace_init_sysfs(struct device *dev)
   1872{
   1873	return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
   1874}
   1875
   1876void blk_trace_remove_sysfs(struct device *dev)
   1877{
   1878	sysfs_remove_group(&dev->kobj, &blk_trace_attr_group);
   1879}
   1880
   1881#endif /* CONFIG_BLK_DEV_IO_TRACE */
   1882
   1883#ifdef CONFIG_EVENT_TRACING
   1884
   1885/**
   1886 * blk_fill_rwbs - Fill the buffer rwbs by mapping op to character string.
   1887 * @rwbs:	buffer to be filled
   1888 * @op:		REQ_OP_XXX for the tracepoint
   1889 *
   1890 * Description:
   1891 *     Maps the REQ_OP_XXX to character and fills the buffer provided by the
   1892 *     caller with resulting string.
   1893 *
   1894 **/
   1895void blk_fill_rwbs(char *rwbs, unsigned int op)
   1896{
   1897	int i = 0;
   1898
   1899	if (op & REQ_PREFLUSH)
   1900		rwbs[i++] = 'F';
   1901
   1902	switch (op & REQ_OP_MASK) {
   1903	case REQ_OP_WRITE:
   1904		rwbs[i++] = 'W';
   1905		break;
   1906	case REQ_OP_DISCARD:
   1907		rwbs[i++] = 'D';
   1908		break;
   1909	case REQ_OP_SECURE_ERASE:
   1910		rwbs[i++] = 'D';
   1911		rwbs[i++] = 'E';
   1912		break;
   1913	case REQ_OP_FLUSH:
   1914		rwbs[i++] = 'F';
   1915		break;
   1916	case REQ_OP_READ:
   1917		rwbs[i++] = 'R';
   1918		break;
   1919	default:
   1920		rwbs[i++] = 'N';
   1921	}
   1922
   1923	if (op & REQ_FUA)
   1924		rwbs[i++] = 'F';
   1925	if (op & REQ_RAHEAD)
   1926		rwbs[i++] = 'A';
   1927	if (op & REQ_SYNC)
   1928		rwbs[i++] = 'S';
   1929	if (op & REQ_META)
   1930		rwbs[i++] = 'M';
   1931
   1932	rwbs[i] = '\0';
   1933}
   1934EXPORT_SYMBOL_GPL(blk_fill_rwbs);
   1935
   1936#endif /* CONFIG_EVENT_TRACING */
   1937