cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

task_iter.c (16582B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/* Copyright (c) 2020 Facebook */
      3
      4#include <linux/init.h>
      5#include <linux/namei.h>
      6#include <linux/pid_namespace.h>
      7#include <linux/fs.h>
      8#include <linux/fdtable.h>
      9#include <linux/filter.h>
     10#include <linux/btf_ids.h>
     11#include "mmap_unlock_work.h"
     12
     13struct bpf_iter_seq_task_common {
     14	struct pid_namespace *ns;
     15};
     16
     17struct bpf_iter_seq_task_info {
     18	/* The first field must be struct bpf_iter_seq_task_common.
     19	 * this is assumed by {init, fini}_seq_pidns() callback functions.
     20	 */
     21	struct bpf_iter_seq_task_common common;
     22	u32 tid;
     23};
     24
     25static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
     26					     u32 *tid,
     27					     bool skip_if_dup_files)
     28{
     29	struct task_struct *task = NULL;
     30	struct pid *pid;
     31
     32	rcu_read_lock();
     33retry:
     34	pid = find_ge_pid(*tid, ns);
     35	if (pid) {
     36		*tid = pid_nr_ns(pid, ns);
     37		task = get_pid_task(pid, PIDTYPE_PID);
     38		if (!task) {
     39			++*tid;
     40			goto retry;
     41		} else if (skip_if_dup_files && !thread_group_leader(task) &&
     42			   task->files == task->group_leader->files) {
     43			put_task_struct(task);
     44			task = NULL;
     45			++*tid;
     46			goto retry;
     47		}
     48	}
     49	rcu_read_unlock();
     50
     51	return task;
     52}
     53
     54static void *task_seq_start(struct seq_file *seq, loff_t *pos)
     55{
     56	struct bpf_iter_seq_task_info *info = seq->private;
     57	struct task_struct *task;
     58
     59	task = task_seq_get_next(info->common.ns, &info->tid, false);
     60	if (!task)
     61		return NULL;
     62
     63	if (*pos == 0)
     64		++*pos;
     65	return task;
     66}
     67
     68static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos)
     69{
     70	struct bpf_iter_seq_task_info *info = seq->private;
     71	struct task_struct *task;
     72
     73	++*pos;
     74	++info->tid;
     75	put_task_struct((struct task_struct *)v);
     76	task = task_seq_get_next(info->common.ns, &info->tid, false);
     77	if (!task)
     78		return NULL;
     79
     80	return task;
     81}
     82
     83struct bpf_iter__task {
     84	__bpf_md_ptr(struct bpf_iter_meta *, meta);
     85	__bpf_md_ptr(struct task_struct *, task);
     86};
     87
     88DEFINE_BPF_ITER_FUNC(task, struct bpf_iter_meta *meta, struct task_struct *task)
     89
     90static int __task_seq_show(struct seq_file *seq, struct task_struct *task,
     91			   bool in_stop)
     92{
     93	struct bpf_iter_meta meta;
     94	struct bpf_iter__task ctx;
     95	struct bpf_prog *prog;
     96
     97	meta.seq = seq;
     98	prog = bpf_iter_get_info(&meta, in_stop);
     99	if (!prog)
    100		return 0;
    101
    102	ctx.meta = &meta;
    103	ctx.task = task;
    104	return bpf_iter_run_prog(prog, &ctx);
    105}
    106
    107static int task_seq_show(struct seq_file *seq, void *v)
    108{
    109	return __task_seq_show(seq, v, false);
    110}
    111
    112static void task_seq_stop(struct seq_file *seq, void *v)
    113{
    114	if (!v)
    115		(void)__task_seq_show(seq, v, true);
    116	else
    117		put_task_struct((struct task_struct *)v);
    118}
    119
    120static const struct seq_operations task_seq_ops = {
    121	.start	= task_seq_start,
    122	.next	= task_seq_next,
    123	.stop	= task_seq_stop,
    124	.show	= task_seq_show,
    125};
    126
    127struct bpf_iter_seq_task_file_info {
    128	/* The first field must be struct bpf_iter_seq_task_common.
    129	 * this is assumed by {init, fini}_seq_pidns() callback functions.
    130	 */
    131	struct bpf_iter_seq_task_common common;
    132	struct task_struct *task;
    133	u32 tid;
    134	u32 fd;
    135};
    136
    137static struct file *
    138task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
    139{
    140	struct pid_namespace *ns = info->common.ns;
    141	u32 curr_tid = info->tid;
    142	struct task_struct *curr_task;
    143	unsigned int curr_fd = info->fd;
    144
    145	/* If this function returns a non-NULL file object,
    146	 * it held a reference to the task/file.
    147	 * Otherwise, it does not hold any reference.
    148	 */
    149again:
    150	if (info->task) {
    151		curr_task = info->task;
    152		curr_fd = info->fd;
    153	} else {
    154                curr_task = task_seq_get_next(ns, &curr_tid, true);
    155                if (!curr_task) {
    156                        info->task = NULL;
    157                        info->tid = curr_tid;
    158                        return NULL;
    159                }
    160
    161                /* set info->task and info->tid */
    162		info->task = curr_task;
    163		if (curr_tid == info->tid) {
    164			curr_fd = info->fd;
    165		} else {
    166			info->tid = curr_tid;
    167			curr_fd = 0;
    168		}
    169	}
    170
    171	rcu_read_lock();
    172	for (;; curr_fd++) {
    173		struct file *f;
    174		f = task_lookup_next_fd_rcu(curr_task, &curr_fd);
    175		if (!f)
    176			break;
    177		if (!get_file_rcu(f))
    178			continue;
    179
    180		/* set info->fd */
    181		info->fd = curr_fd;
    182		rcu_read_unlock();
    183		return f;
    184	}
    185
    186	/* the current task is done, go to the next task */
    187	rcu_read_unlock();
    188	put_task_struct(curr_task);
    189	info->task = NULL;
    190	info->fd = 0;
    191	curr_tid = ++(info->tid);
    192	goto again;
    193}
    194
    195static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
    196{
    197	struct bpf_iter_seq_task_file_info *info = seq->private;
    198	struct file *file;
    199
    200	info->task = NULL;
    201	file = task_file_seq_get_next(info);
    202	if (file && *pos == 0)
    203		++*pos;
    204
    205	return file;
    206}
    207
    208static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos)
    209{
    210	struct bpf_iter_seq_task_file_info *info = seq->private;
    211
    212	++*pos;
    213	++info->fd;
    214	fput((struct file *)v);
    215	return task_file_seq_get_next(info);
    216}
    217
    218struct bpf_iter__task_file {
    219	__bpf_md_ptr(struct bpf_iter_meta *, meta);
    220	__bpf_md_ptr(struct task_struct *, task);
    221	u32 fd __aligned(8);
    222	__bpf_md_ptr(struct file *, file);
    223};
    224
    225DEFINE_BPF_ITER_FUNC(task_file, struct bpf_iter_meta *meta,
    226		     struct task_struct *task, u32 fd,
    227		     struct file *file)
    228
    229static int __task_file_seq_show(struct seq_file *seq, struct file *file,
    230				bool in_stop)
    231{
    232	struct bpf_iter_seq_task_file_info *info = seq->private;
    233	struct bpf_iter__task_file ctx;
    234	struct bpf_iter_meta meta;
    235	struct bpf_prog *prog;
    236
    237	meta.seq = seq;
    238	prog = bpf_iter_get_info(&meta, in_stop);
    239	if (!prog)
    240		return 0;
    241
    242	ctx.meta = &meta;
    243	ctx.task = info->task;
    244	ctx.fd = info->fd;
    245	ctx.file = file;
    246	return bpf_iter_run_prog(prog, &ctx);
    247}
    248
    249static int task_file_seq_show(struct seq_file *seq, void *v)
    250{
    251	return __task_file_seq_show(seq, v, false);
    252}
    253
    254static void task_file_seq_stop(struct seq_file *seq, void *v)
    255{
    256	struct bpf_iter_seq_task_file_info *info = seq->private;
    257
    258	if (!v) {
    259		(void)__task_file_seq_show(seq, v, true);
    260	} else {
    261		fput((struct file *)v);
    262		put_task_struct(info->task);
    263		info->task = NULL;
    264	}
    265}
    266
    267static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux)
    268{
    269	struct bpf_iter_seq_task_common *common = priv_data;
    270
    271	common->ns = get_pid_ns(task_active_pid_ns(current));
    272	return 0;
    273}
    274
    275static void fini_seq_pidns(void *priv_data)
    276{
    277	struct bpf_iter_seq_task_common *common = priv_data;
    278
    279	put_pid_ns(common->ns);
    280}
    281
    282static const struct seq_operations task_file_seq_ops = {
    283	.start	= task_file_seq_start,
    284	.next	= task_file_seq_next,
    285	.stop	= task_file_seq_stop,
    286	.show	= task_file_seq_show,
    287};
    288
    289struct bpf_iter_seq_task_vma_info {
    290	/* The first field must be struct bpf_iter_seq_task_common.
    291	 * this is assumed by {init, fini}_seq_pidns() callback functions.
    292	 */
    293	struct bpf_iter_seq_task_common common;
    294	struct task_struct *task;
    295	struct vm_area_struct *vma;
    296	u32 tid;
    297	unsigned long prev_vm_start;
    298	unsigned long prev_vm_end;
    299};
    300
    301enum bpf_task_vma_iter_find_op {
    302	task_vma_iter_first_vma,   /* use mm->mmap */
    303	task_vma_iter_next_vma,    /* use curr_vma->vm_next */
    304	task_vma_iter_find_vma,    /* use find_vma() to find next vma */
    305};
    306
    307static struct vm_area_struct *
    308task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
    309{
    310	struct pid_namespace *ns = info->common.ns;
    311	enum bpf_task_vma_iter_find_op op;
    312	struct vm_area_struct *curr_vma;
    313	struct task_struct *curr_task;
    314	u32 curr_tid = info->tid;
    315
    316	/* If this function returns a non-NULL vma, it holds a reference to
    317	 * the task_struct, and holds read lock on vma->mm->mmap_lock.
    318	 * If this function returns NULL, it does not hold any reference or
    319	 * lock.
    320	 */
    321	if (info->task) {
    322		curr_task = info->task;
    323		curr_vma = info->vma;
    324		/* In case of lock contention, drop mmap_lock to unblock
    325		 * the writer.
    326		 *
    327		 * After relock, call find(mm, prev_vm_end - 1) to find
    328		 * new vma to process.
    329		 *
    330		 *   +------+------+-----------+
    331		 *   | VMA1 | VMA2 | VMA3      |
    332		 *   +------+------+-----------+
    333		 *   |      |      |           |
    334		 *  4k     8k     16k         400k
    335		 *
    336		 * For example, curr_vma == VMA2. Before unlock, we set
    337		 *
    338		 *    prev_vm_start = 8k
    339		 *    prev_vm_end   = 16k
    340		 *
    341		 * There are a few cases:
    342		 *
    343		 * 1) VMA2 is freed, but VMA3 exists.
    344		 *
    345		 *    find_vma() will return VMA3, just process VMA3.
    346		 *
    347		 * 2) VMA2 still exists.
    348		 *
    349		 *    find_vma() will return VMA2, process VMA2->next.
    350		 *
    351		 * 3) no more vma in this mm.
    352		 *
    353		 *    Process the next task.
    354		 *
    355		 * 4) find_vma() returns a different vma, VMA2'.
    356		 *
    357		 *    4.1) If VMA2 covers same range as VMA2', skip VMA2',
    358		 *         because we already covered the range;
    359		 *    4.2) VMA2 and VMA2' covers different ranges, process
    360		 *         VMA2'.
    361		 */
    362		if (mmap_lock_is_contended(curr_task->mm)) {
    363			info->prev_vm_start = curr_vma->vm_start;
    364			info->prev_vm_end = curr_vma->vm_end;
    365			op = task_vma_iter_find_vma;
    366			mmap_read_unlock(curr_task->mm);
    367			if (mmap_read_lock_killable(curr_task->mm))
    368				goto finish;
    369		} else {
    370			op = task_vma_iter_next_vma;
    371		}
    372	} else {
    373again:
    374		curr_task = task_seq_get_next(ns, &curr_tid, true);
    375		if (!curr_task) {
    376			info->tid = curr_tid + 1;
    377			goto finish;
    378		}
    379
    380		if (curr_tid != info->tid) {
    381			info->tid = curr_tid;
    382			/* new task, process the first vma */
    383			op = task_vma_iter_first_vma;
    384		} else {
    385			/* Found the same tid, which means the user space
    386			 * finished data in previous buffer and read more.
    387			 * We dropped mmap_lock before returning to user
    388			 * space, so it is necessary to use find_vma() to
    389			 * find the next vma to process.
    390			 */
    391			op = task_vma_iter_find_vma;
    392		}
    393
    394		if (!curr_task->mm)
    395			goto next_task;
    396
    397		if (mmap_read_lock_killable(curr_task->mm))
    398			goto finish;
    399	}
    400
    401	switch (op) {
    402	case task_vma_iter_first_vma:
    403		curr_vma = curr_task->mm->mmap;
    404		break;
    405	case task_vma_iter_next_vma:
    406		curr_vma = curr_vma->vm_next;
    407		break;
    408	case task_vma_iter_find_vma:
    409		/* We dropped mmap_lock so it is necessary to use find_vma
    410		 * to find the next vma. This is similar to the  mechanism
    411		 * in show_smaps_rollup().
    412		 */
    413		curr_vma = find_vma(curr_task->mm, info->prev_vm_end - 1);
    414		/* case 1) and 4.2) above just use curr_vma */
    415
    416		/* check for case 2) or case 4.1) above */
    417		if (curr_vma &&
    418		    curr_vma->vm_start == info->prev_vm_start &&
    419		    curr_vma->vm_end == info->prev_vm_end)
    420			curr_vma = curr_vma->vm_next;
    421		break;
    422	}
    423	if (!curr_vma) {
    424		/* case 3) above, or case 2) 4.1) with vma->next == NULL */
    425		mmap_read_unlock(curr_task->mm);
    426		goto next_task;
    427	}
    428	info->task = curr_task;
    429	info->vma = curr_vma;
    430	return curr_vma;
    431
    432next_task:
    433	put_task_struct(curr_task);
    434	info->task = NULL;
    435	curr_tid++;
    436	goto again;
    437
    438finish:
    439	if (curr_task)
    440		put_task_struct(curr_task);
    441	info->task = NULL;
    442	info->vma = NULL;
    443	return NULL;
    444}
    445
    446static void *task_vma_seq_start(struct seq_file *seq, loff_t *pos)
    447{
    448	struct bpf_iter_seq_task_vma_info *info = seq->private;
    449	struct vm_area_struct *vma;
    450
    451	vma = task_vma_seq_get_next(info);
    452	if (vma && *pos == 0)
    453		++*pos;
    454
    455	return vma;
    456}
    457
    458static void *task_vma_seq_next(struct seq_file *seq, void *v, loff_t *pos)
    459{
    460	struct bpf_iter_seq_task_vma_info *info = seq->private;
    461
    462	++*pos;
    463	return task_vma_seq_get_next(info);
    464}
    465
    466struct bpf_iter__task_vma {
    467	__bpf_md_ptr(struct bpf_iter_meta *, meta);
    468	__bpf_md_ptr(struct task_struct *, task);
    469	__bpf_md_ptr(struct vm_area_struct *, vma);
    470};
    471
    472DEFINE_BPF_ITER_FUNC(task_vma, struct bpf_iter_meta *meta,
    473		     struct task_struct *task, struct vm_area_struct *vma)
    474
    475static int __task_vma_seq_show(struct seq_file *seq, bool in_stop)
    476{
    477	struct bpf_iter_seq_task_vma_info *info = seq->private;
    478	struct bpf_iter__task_vma ctx;
    479	struct bpf_iter_meta meta;
    480	struct bpf_prog *prog;
    481
    482	meta.seq = seq;
    483	prog = bpf_iter_get_info(&meta, in_stop);
    484	if (!prog)
    485		return 0;
    486
    487	ctx.meta = &meta;
    488	ctx.task = info->task;
    489	ctx.vma = info->vma;
    490	return bpf_iter_run_prog(prog, &ctx);
    491}
    492
    493static int task_vma_seq_show(struct seq_file *seq, void *v)
    494{
    495	return __task_vma_seq_show(seq, false);
    496}
    497
    498static void task_vma_seq_stop(struct seq_file *seq, void *v)
    499{
    500	struct bpf_iter_seq_task_vma_info *info = seq->private;
    501
    502	if (!v) {
    503		(void)__task_vma_seq_show(seq, true);
    504	} else {
    505		/* info->vma has not been seen by the BPF program. If the
    506		 * user space reads more, task_vma_seq_get_next should
    507		 * return this vma again. Set prev_vm_start to ~0UL,
    508		 * so that we don't skip the vma returned by the next
    509		 * find_vma() (case task_vma_iter_find_vma in
    510		 * task_vma_seq_get_next()).
    511		 */
    512		info->prev_vm_start = ~0UL;
    513		info->prev_vm_end = info->vma->vm_end;
    514		mmap_read_unlock(info->task->mm);
    515		put_task_struct(info->task);
    516		info->task = NULL;
    517	}
    518}
    519
    520static const struct seq_operations task_vma_seq_ops = {
    521	.start	= task_vma_seq_start,
    522	.next	= task_vma_seq_next,
    523	.stop	= task_vma_seq_stop,
    524	.show	= task_vma_seq_show,
    525};
    526
    527static const struct bpf_iter_seq_info task_seq_info = {
    528	.seq_ops		= &task_seq_ops,
    529	.init_seq_private	= init_seq_pidns,
    530	.fini_seq_private	= fini_seq_pidns,
    531	.seq_priv_size		= sizeof(struct bpf_iter_seq_task_info),
    532};
    533
    534static struct bpf_iter_reg task_reg_info = {
    535	.target			= "task",
    536	.feature		= BPF_ITER_RESCHED,
    537	.ctx_arg_info_size	= 1,
    538	.ctx_arg_info		= {
    539		{ offsetof(struct bpf_iter__task, task),
    540		  PTR_TO_BTF_ID_OR_NULL },
    541	},
    542	.seq_info		= &task_seq_info,
    543};
    544
    545static const struct bpf_iter_seq_info task_file_seq_info = {
    546	.seq_ops		= &task_file_seq_ops,
    547	.init_seq_private	= init_seq_pidns,
    548	.fini_seq_private	= fini_seq_pidns,
    549	.seq_priv_size		= sizeof(struct bpf_iter_seq_task_file_info),
    550};
    551
    552static struct bpf_iter_reg task_file_reg_info = {
    553	.target			= "task_file",
    554	.feature		= BPF_ITER_RESCHED,
    555	.ctx_arg_info_size	= 2,
    556	.ctx_arg_info		= {
    557		{ offsetof(struct bpf_iter__task_file, task),
    558		  PTR_TO_BTF_ID_OR_NULL },
    559		{ offsetof(struct bpf_iter__task_file, file),
    560		  PTR_TO_BTF_ID_OR_NULL },
    561	},
    562	.seq_info		= &task_file_seq_info,
    563};
    564
    565static const struct bpf_iter_seq_info task_vma_seq_info = {
    566	.seq_ops		= &task_vma_seq_ops,
    567	.init_seq_private	= init_seq_pidns,
    568	.fini_seq_private	= fini_seq_pidns,
    569	.seq_priv_size		= sizeof(struct bpf_iter_seq_task_vma_info),
    570};
    571
    572static struct bpf_iter_reg task_vma_reg_info = {
    573	.target			= "task_vma",
    574	.feature		= BPF_ITER_RESCHED,
    575	.ctx_arg_info_size	= 2,
    576	.ctx_arg_info		= {
    577		{ offsetof(struct bpf_iter__task_vma, task),
    578		  PTR_TO_BTF_ID_OR_NULL },
    579		{ offsetof(struct bpf_iter__task_vma, vma),
    580		  PTR_TO_BTF_ID_OR_NULL },
    581	},
    582	.seq_info		= &task_vma_seq_info,
    583};
    584
    585BPF_CALL_5(bpf_find_vma, struct task_struct *, task, u64, start,
    586	   bpf_callback_t, callback_fn, void *, callback_ctx, u64, flags)
    587{
    588	struct mmap_unlock_irq_work *work = NULL;
    589	struct vm_area_struct *vma;
    590	bool irq_work_busy = false;
    591	struct mm_struct *mm;
    592	int ret = -ENOENT;
    593
    594	if (flags)
    595		return -EINVAL;
    596
    597	if (!task)
    598		return -ENOENT;
    599
    600	mm = task->mm;
    601	if (!mm)
    602		return -ENOENT;
    603
    604	irq_work_busy = bpf_mmap_unlock_get_irq_work(&work);
    605
    606	if (irq_work_busy || !mmap_read_trylock(mm))
    607		return -EBUSY;
    608
    609	vma = find_vma(mm, start);
    610
    611	if (vma && vma->vm_start <= start && vma->vm_end > start) {
    612		callback_fn((u64)(long)task, (u64)(long)vma,
    613			    (u64)(long)callback_ctx, 0, 0);
    614		ret = 0;
    615	}
    616	bpf_mmap_unlock_mm(work, mm);
    617	return ret;
    618}
    619
    620const struct bpf_func_proto bpf_find_vma_proto = {
    621	.func		= bpf_find_vma,
    622	.ret_type	= RET_INTEGER,
    623	.arg1_type	= ARG_PTR_TO_BTF_ID,
    624	.arg1_btf_id	= &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
    625	.arg2_type	= ARG_ANYTHING,
    626	.arg3_type	= ARG_PTR_TO_FUNC,
    627	.arg4_type	= ARG_PTR_TO_STACK_OR_NULL,
    628	.arg5_type	= ARG_ANYTHING,
    629};
    630
    631DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work);
    632
    633static void do_mmap_read_unlock(struct irq_work *entry)
    634{
    635	struct mmap_unlock_irq_work *work;
    636
    637	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT)))
    638		return;
    639
    640	work = container_of(entry, struct mmap_unlock_irq_work, irq_work);
    641	mmap_read_unlock_non_owner(work->mm);
    642}
    643
    644static int __init task_iter_init(void)
    645{
    646	struct mmap_unlock_irq_work *work;
    647	int ret, cpu;
    648
    649	for_each_possible_cpu(cpu) {
    650		work = per_cpu_ptr(&mmap_unlock_work, cpu);
    651		init_irq_work(&work->irq_work, do_mmap_read_unlock);
    652	}
    653
    654	task_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK];
    655	ret = bpf_iter_reg_target(&task_reg_info);
    656	if (ret)
    657		return ret;
    658
    659	task_file_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK];
    660	task_file_reg_info.ctx_arg_info[1].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_FILE];
    661	ret =  bpf_iter_reg_target(&task_file_reg_info);
    662	if (ret)
    663		return ret;
    664
    665	task_vma_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK];
    666	task_vma_reg_info.ctx_arg_info[1].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA];
    667	return bpf_iter_reg_target(&task_vma_reg_info);
    668}
    669late_initcall(task_iter_init);