cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

fcntl.c (23862B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 *  linux/fs/fcntl.c
      4 *
      5 *  Copyright (C) 1991, 1992  Linus Torvalds
      6 */
      7
      8#include <linux/syscalls.h>
      9#include <linux/init.h>
     10#include <linux/mm.h>
     11#include <linux/sched/task.h>
     12#include <linux/fs.h>
     13#include <linux/file.h>
     14#include <linux/fdtable.h>
     15#include <linux/capability.h>
     16#include <linux/dnotify.h>
     17#include <linux/slab.h>
     18#include <linux/module.h>
     19#include <linux/pipe_fs_i.h>
     20#include <linux/security.h>
     21#include <linux/ptrace.h>
     22#include <linux/signal.h>
     23#include <linux/rcupdate.h>
     24#include <linux/pid_namespace.h>
     25#include <linux/user_namespace.h>
     26#include <linux/memfd.h>
     27#include <linux/compat.h>
     28#include <linux/mount.h>
     29
     30#include <linux/poll.h>
     31#include <asm/siginfo.h>
     32#include <linux/uaccess.h>
     33
     34#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
     35
     36static int setfl(int fd, struct file * filp, unsigned long arg)
     37{
     38	struct inode * inode = file_inode(filp);
     39	int error = 0;
     40
     41	/*
     42	 * O_APPEND cannot be cleared if the file is marked as append-only
     43	 * and the file is open for write.
     44	 */
     45	if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
     46		return -EPERM;
     47
     48	/* O_NOATIME can only be set by the owner or superuser */
     49	if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
     50		if (!inode_owner_or_capable(file_mnt_user_ns(filp), inode))
     51			return -EPERM;
     52
     53	/* required for strict SunOS emulation */
     54	if (O_NONBLOCK != O_NDELAY)
     55	       if (arg & O_NDELAY)
     56		   arg |= O_NONBLOCK;
     57
     58	/* Pipe packetized mode is controlled by O_DIRECT flag */
     59	if (!S_ISFIFO(inode->i_mode) &&
     60	    (arg & O_DIRECT) &&
     61	    !(filp->f_mode & FMODE_CAN_ODIRECT))
     62		return -EINVAL;
     63
     64	if (filp->f_op->check_flags)
     65		error = filp->f_op->check_flags(arg);
     66	if (error)
     67		return error;
     68
     69	/*
     70	 * ->fasync() is responsible for setting the FASYNC bit.
     71	 */
     72	if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) {
     73		error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
     74		if (error < 0)
     75			goto out;
     76		if (error > 0)
     77			error = 0;
     78	}
     79	spin_lock(&filp->f_lock);
     80	filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
     81	spin_unlock(&filp->f_lock);
     82
     83 out:
     84	return error;
     85}
     86
     87static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
     88                     int force)
     89{
     90	write_lock_irq(&filp->f_owner.lock);
     91	if (force || !filp->f_owner.pid) {
     92		put_pid(filp->f_owner.pid);
     93		filp->f_owner.pid = get_pid(pid);
     94		filp->f_owner.pid_type = type;
     95
     96		if (pid) {
     97			const struct cred *cred = current_cred();
     98			filp->f_owner.uid = cred->uid;
     99			filp->f_owner.euid = cred->euid;
    100		}
    101	}
    102	write_unlock_irq(&filp->f_owner.lock);
    103}
    104
    105void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
    106		int force)
    107{
    108	security_file_set_fowner(filp);
    109	f_modown(filp, pid, type, force);
    110}
    111EXPORT_SYMBOL(__f_setown);
    112
    113int f_setown(struct file *filp, unsigned long arg, int force)
    114{
    115	enum pid_type type;
    116	struct pid *pid = NULL;
    117	int who = arg, ret = 0;
    118
    119	type = PIDTYPE_TGID;
    120	if (who < 0) {
    121		/* avoid overflow below */
    122		if (who == INT_MIN)
    123			return -EINVAL;
    124
    125		type = PIDTYPE_PGID;
    126		who = -who;
    127	}
    128
    129	rcu_read_lock();
    130	if (who) {
    131		pid = find_vpid(who);
    132		if (!pid)
    133			ret = -ESRCH;
    134	}
    135
    136	if (!ret)
    137		__f_setown(filp, pid, type, force);
    138	rcu_read_unlock();
    139
    140	return ret;
    141}
    142EXPORT_SYMBOL(f_setown);
    143
    144void f_delown(struct file *filp)
    145{
    146	f_modown(filp, NULL, PIDTYPE_TGID, 1);
    147}
    148
    149pid_t f_getown(struct file *filp)
    150{
    151	pid_t pid = 0;
    152
    153	read_lock_irq(&filp->f_owner.lock);
    154	rcu_read_lock();
    155	if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) {
    156		pid = pid_vnr(filp->f_owner.pid);
    157		if (filp->f_owner.pid_type == PIDTYPE_PGID)
    158			pid = -pid;
    159	}
    160	rcu_read_unlock();
    161	read_unlock_irq(&filp->f_owner.lock);
    162	return pid;
    163}
    164
    165static int f_setown_ex(struct file *filp, unsigned long arg)
    166{
    167	struct f_owner_ex __user *owner_p = (void __user *)arg;
    168	struct f_owner_ex owner;
    169	struct pid *pid;
    170	int type;
    171	int ret;
    172
    173	ret = copy_from_user(&owner, owner_p, sizeof(owner));
    174	if (ret)
    175		return -EFAULT;
    176
    177	switch (owner.type) {
    178	case F_OWNER_TID:
    179		type = PIDTYPE_PID;
    180		break;
    181
    182	case F_OWNER_PID:
    183		type = PIDTYPE_TGID;
    184		break;
    185
    186	case F_OWNER_PGRP:
    187		type = PIDTYPE_PGID;
    188		break;
    189
    190	default:
    191		return -EINVAL;
    192	}
    193
    194	rcu_read_lock();
    195	pid = find_vpid(owner.pid);
    196	if (owner.pid && !pid)
    197		ret = -ESRCH;
    198	else
    199		 __f_setown(filp, pid, type, 1);
    200	rcu_read_unlock();
    201
    202	return ret;
    203}
    204
    205static int f_getown_ex(struct file *filp, unsigned long arg)
    206{
    207	struct f_owner_ex __user *owner_p = (void __user *)arg;
    208	struct f_owner_ex owner = {};
    209	int ret = 0;
    210
    211	read_lock_irq(&filp->f_owner.lock);
    212	rcu_read_lock();
    213	if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type))
    214		owner.pid = pid_vnr(filp->f_owner.pid);
    215	rcu_read_unlock();
    216	switch (filp->f_owner.pid_type) {
    217	case PIDTYPE_PID:
    218		owner.type = F_OWNER_TID;
    219		break;
    220
    221	case PIDTYPE_TGID:
    222		owner.type = F_OWNER_PID;
    223		break;
    224
    225	case PIDTYPE_PGID:
    226		owner.type = F_OWNER_PGRP;
    227		break;
    228
    229	default:
    230		WARN_ON(1);
    231		ret = -EINVAL;
    232		break;
    233	}
    234	read_unlock_irq(&filp->f_owner.lock);
    235
    236	if (!ret) {
    237		ret = copy_to_user(owner_p, &owner, sizeof(owner));
    238		if (ret)
    239			ret = -EFAULT;
    240	}
    241	return ret;
    242}
    243
    244#ifdef CONFIG_CHECKPOINT_RESTORE
    245static int f_getowner_uids(struct file *filp, unsigned long arg)
    246{
    247	struct user_namespace *user_ns = current_user_ns();
    248	uid_t __user *dst = (void __user *)arg;
    249	uid_t src[2];
    250	int err;
    251
    252	read_lock_irq(&filp->f_owner.lock);
    253	src[0] = from_kuid(user_ns, filp->f_owner.uid);
    254	src[1] = from_kuid(user_ns, filp->f_owner.euid);
    255	read_unlock_irq(&filp->f_owner.lock);
    256
    257	err  = put_user(src[0], &dst[0]);
    258	err |= put_user(src[1], &dst[1]);
    259
    260	return err;
    261}
    262#else
    263static int f_getowner_uids(struct file *filp, unsigned long arg)
    264{
    265	return -EINVAL;
    266}
    267#endif
    268
    269static bool rw_hint_valid(enum rw_hint hint)
    270{
    271	switch (hint) {
    272	case RWH_WRITE_LIFE_NOT_SET:
    273	case RWH_WRITE_LIFE_NONE:
    274	case RWH_WRITE_LIFE_SHORT:
    275	case RWH_WRITE_LIFE_MEDIUM:
    276	case RWH_WRITE_LIFE_LONG:
    277	case RWH_WRITE_LIFE_EXTREME:
    278		return true;
    279	default:
    280		return false;
    281	}
    282}
    283
    284static long fcntl_rw_hint(struct file *file, unsigned int cmd,
    285			  unsigned long arg)
    286{
    287	struct inode *inode = file_inode(file);
    288	u64 __user *argp = (u64 __user *)arg;
    289	enum rw_hint hint;
    290	u64 h;
    291
    292	switch (cmd) {
    293	case F_GET_RW_HINT:
    294		h = inode->i_write_hint;
    295		if (copy_to_user(argp, &h, sizeof(*argp)))
    296			return -EFAULT;
    297		return 0;
    298	case F_SET_RW_HINT:
    299		if (copy_from_user(&h, argp, sizeof(h)))
    300			return -EFAULT;
    301		hint = (enum rw_hint) h;
    302		if (!rw_hint_valid(hint))
    303			return -EINVAL;
    304
    305		inode_lock(inode);
    306		inode->i_write_hint = hint;
    307		inode_unlock(inode);
    308		return 0;
    309	default:
    310		return -EINVAL;
    311	}
    312}
    313
    314static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
    315		struct file *filp)
    316{
    317	void __user *argp = (void __user *)arg;
    318	struct flock flock;
    319	long err = -EINVAL;
    320
    321	switch (cmd) {
    322	case F_DUPFD:
    323		err = f_dupfd(arg, filp, 0);
    324		break;
    325	case F_DUPFD_CLOEXEC:
    326		err = f_dupfd(arg, filp, O_CLOEXEC);
    327		break;
    328	case F_GETFD:
    329		err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
    330		break;
    331	case F_SETFD:
    332		err = 0;
    333		set_close_on_exec(fd, arg & FD_CLOEXEC);
    334		break;
    335	case F_GETFL:
    336		err = filp->f_flags;
    337		break;
    338	case F_SETFL:
    339		err = setfl(fd, filp, arg);
    340		break;
    341#if BITS_PER_LONG != 32
    342	/* 32-bit arches must use fcntl64() */
    343	case F_OFD_GETLK:
    344#endif
    345	case F_GETLK:
    346		if (copy_from_user(&flock, argp, sizeof(flock)))
    347			return -EFAULT;
    348		err = fcntl_getlk(filp, cmd, &flock);
    349		if (!err && copy_to_user(argp, &flock, sizeof(flock)))
    350			return -EFAULT;
    351		break;
    352#if BITS_PER_LONG != 32
    353	/* 32-bit arches must use fcntl64() */
    354	case F_OFD_SETLK:
    355	case F_OFD_SETLKW:
    356		fallthrough;
    357#endif
    358	case F_SETLK:
    359	case F_SETLKW:
    360		if (copy_from_user(&flock, argp, sizeof(flock)))
    361			return -EFAULT;
    362		err = fcntl_setlk(fd, filp, cmd, &flock);
    363		break;
    364	case F_GETOWN:
    365		/*
    366		 * XXX If f_owner is a process group, the
    367		 * negative return value will get converted
    368		 * into an error.  Oops.  If we keep the
    369		 * current syscall conventions, the only way
    370		 * to fix this will be in libc.
    371		 */
    372		err = f_getown(filp);
    373		force_successful_syscall_return();
    374		break;
    375	case F_SETOWN:
    376		err = f_setown(filp, arg, 1);
    377		break;
    378	case F_GETOWN_EX:
    379		err = f_getown_ex(filp, arg);
    380		break;
    381	case F_SETOWN_EX:
    382		err = f_setown_ex(filp, arg);
    383		break;
    384	case F_GETOWNER_UIDS:
    385		err = f_getowner_uids(filp, arg);
    386		break;
    387	case F_GETSIG:
    388		err = filp->f_owner.signum;
    389		break;
    390	case F_SETSIG:
    391		/* arg == 0 restores default behaviour. */
    392		if (!valid_signal(arg)) {
    393			break;
    394		}
    395		err = 0;
    396		filp->f_owner.signum = arg;
    397		break;
    398	case F_GETLEASE:
    399		err = fcntl_getlease(filp);
    400		break;
    401	case F_SETLEASE:
    402		err = fcntl_setlease(fd, filp, arg);
    403		break;
    404	case F_NOTIFY:
    405		err = fcntl_dirnotify(fd, filp, arg);
    406		break;
    407	case F_SETPIPE_SZ:
    408	case F_GETPIPE_SZ:
    409		err = pipe_fcntl(filp, cmd, arg);
    410		break;
    411	case F_ADD_SEALS:
    412	case F_GET_SEALS:
    413		err = memfd_fcntl(filp, cmd, arg);
    414		break;
    415	case F_GET_RW_HINT:
    416	case F_SET_RW_HINT:
    417		err = fcntl_rw_hint(filp, cmd, arg);
    418		break;
    419	default:
    420		break;
    421	}
    422	return err;
    423}
    424
    425static int check_fcntl_cmd(unsigned cmd)
    426{
    427	switch (cmd) {
    428	case F_DUPFD:
    429	case F_DUPFD_CLOEXEC:
    430	case F_GETFD:
    431	case F_SETFD:
    432	case F_GETFL:
    433		return 1;
    434	}
    435	return 0;
    436}
    437
    438SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
    439{	
    440	struct fd f = fdget_raw(fd);
    441	long err = -EBADF;
    442
    443	if (!f.file)
    444		goto out;
    445
    446	if (unlikely(f.file->f_mode & FMODE_PATH)) {
    447		if (!check_fcntl_cmd(cmd))
    448			goto out1;
    449	}
    450
    451	err = security_file_fcntl(f.file, cmd, arg);
    452	if (!err)
    453		err = do_fcntl(fd, cmd, arg, f.file);
    454
    455out1:
    456 	fdput(f);
    457out:
    458	return err;
    459}
    460
    461#if BITS_PER_LONG == 32
    462SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
    463		unsigned long, arg)
    464{	
    465	void __user *argp = (void __user *)arg;
    466	struct fd f = fdget_raw(fd);
    467	struct flock64 flock;
    468	long err = -EBADF;
    469
    470	if (!f.file)
    471		goto out;
    472
    473	if (unlikely(f.file->f_mode & FMODE_PATH)) {
    474		if (!check_fcntl_cmd(cmd))
    475			goto out1;
    476	}
    477
    478	err = security_file_fcntl(f.file, cmd, arg);
    479	if (err)
    480		goto out1;
    481	
    482	switch (cmd) {
    483	case F_GETLK64:
    484	case F_OFD_GETLK:
    485		err = -EFAULT;
    486		if (copy_from_user(&flock, argp, sizeof(flock)))
    487			break;
    488		err = fcntl_getlk64(f.file, cmd, &flock);
    489		if (!err && copy_to_user(argp, &flock, sizeof(flock)))
    490			err = -EFAULT;
    491		break;
    492	case F_SETLK64:
    493	case F_SETLKW64:
    494	case F_OFD_SETLK:
    495	case F_OFD_SETLKW:
    496		err = -EFAULT;
    497		if (copy_from_user(&flock, argp, sizeof(flock)))
    498			break;
    499		err = fcntl_setlk64(fd, f.file, cmd, &flock);
    500		break;
    501	default:
    502		err = do_fcntl(fd, cmd, arg, f.file);
    503		break;
    504	}
    505out1:
    506	fdput(f);
    507out:
    508	return err;
    509}
    510#endif
    511
    512#ifdef CONFIG_COMPAT
    513/* careful - don't use anywhere else */
    514#define copy_flock_fields(dst, src)		\
    515	(dst)->l_type = (src)->l_type;		\
    516	(dst)->l_whence = (src)->l_whence;	\
    517	(dst)->l_start = (src)->l_start;	\
    518	(dst)->l_len = (src)->l_len;		\
    519	(dst)->l_pid = (src)->l_pid;
    520
    521static int get_compat_flock(struct flock *kfl, const struct compat_flock __user *ufl)
    522{
    523	struct compat_flock fl;
    524
    525	if (copy_from_user(&fl, ufl, sizeof(struct compat_flock)))
    526		return -EFAULT;
    527	copy_flock_fields(kfl, &fl);
    528	return 0;
    529}
    530
    531static int get_compat_flock64(struct flock *kfl, const struct compat_flock64 __user *ufl)
    532{
    533	struct compat_flock64 fl;
    534
    535	if (copy_from_user(&fl, ufl, sizeof(struct compat_flock64)))
    536		return -EFAULT;
    537	copy_flock_fields(kfl, &fl);
    538	return 0;
    539}
    540
    541static int put_compat_flock(const struct flock *kfl, struct compat_flock __user *ufl)
    542{
    543	struct compat_flock fl;
    544
    545	memset(&fl, 0, sizeof(struct compat_flock));
    546	copy_flock_fields(&fl, kfl);
    547	if (copy_to_user(ufl, &fl, sizeof(struct compat_flock)))
    548		return -EFAULT;
    549	return 0;
    550}
    551
    552static int put_compat_flock64(const struct flock *kfl, struct compat_flock64 __user *ufl)
    553{
    554	struct compat_flock64 fl;
    555
    556	BUILD_BUG_ON(sizeof(kfl->l_start) > sizeof(ufl->l_start));
    557	BUILD_BUG_ON(sizeof(kfl->l_len) > sizeof(ufl->l_len));
    558
    559	memset(&fl, 0, sizeof(struct compat_flock64));
    560	copy_flock_fields(&fl, kfl);
    561	if (copy_to_user(ufl, &fl, sizeof(struct compat_flock64)))
    562		return -EFAULT;
    563	return 0;
    564}
    565#undef copy_flock_fields
    566
    567static unsigned int
    568convert_fcntl_cmd(unsigned int cmd)
    569{
    570	switch (cmd) {
    571	case F_GETLK64:
    572		return F_GETLK;
    573	case F_SETLK64:
    574		return F_SETLK;
    575	case F_SETLKW64:
    576		return F_SETLKW;
    577	}
    578
    579	return cmd;
    580}
    581
    582/*
    583 * GETLK was successful and we need to return the data, but it needs to fit in
    584 * the compat structure.
    585 * l_start shouldn't be too big, unless the original start + end is greater than
    586 * COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return
    587 * -EOVERFLOW in that case.  l_len could be too big, in which case we just
    588 * truncate it, and only allow the app to see that part of the conflicting lock
    589 * that might make sense to it anyway
    590 */
    591static int fixup_compat_flock(struct flock *flock)
    592{
    593	if (flock->l_start > COMPAT_OFF_T_MAX)
    594		return -EOVERFLOW;
    595	if (flock->l_len > COMPAT_OFF_T_MAX)
    596		flock->l_len = COMPAT_OFF_T_MAX;
    597	return 0;
    598}
    599
    600static long do_compat_fcntl64(unsigned int fd, unsigned int cmd,
    601			     compat_ulong_t arg)
    602{
    603	struct fd f = fdget_raw(fd);
    604	struct flock flock;
    605	long err = -EBADF;
    606
    607	if (!f.file)
    608		return err;
    609
    610	if (unlikely(f.file->f_mode & FMODE_PATH)) {
    611		if (!check_fcntl_cmd(cmd))
    612			goto out_put;
    613	}
    614
    615	err = security_file_fcntl(f.file, cmd, arg);
    616	if (err)
    617		goto out_put;
    618
    619	switch (cmd) {
    620	case F_GETLK:
    621		err = get_compat_flock(&flock, compat_ptr(arg));
    622		if (err)
    623			break;
    624		err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
    625		if (err)
    626			break;
    627		err = fixup_compat_flock(&flock);
    628		if (!err)
    629			err = put_compat_flock(&flock, compat_ptr(arg));
    630		break;
    631	case F_GETLK64:
    632	case F_OFD_GETLK:
    633		err = get_compat_flock64(&flock, compat_ptr(arg));
    634		if (err)
    635			break;
    636		err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
    637		if (!err)
    638			err = put_compat_flock64(&flock, compat_ptr(arg));
    639		break;
    640	case F_SETLK:
    641	case F_SETLKW:
    642		err = get_compat_flock(&flock, compat_ptr(arg));
    643		if (err)
    644			break;
    645		err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
    646		break;
    647	case F_SETLK64:
    648	case F_SETLKW64:
    649	case F_OFD_SETLK:
    650	case F_OFD_SETLKW:
    651		err = get_compat_flock64(&flock, compat_ptr(arg));
    652		if (err)
    653			break;
    654		err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
    655		break;
    656	default:
    657		err = do_fcntl(fd, cmd, arg, f.file);
    658		break;
    659	}
    660out_put:
    661	fdput(f);
    662	return err;
    663}
    664
    665COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
    666		       compat_ulong_t, arg)
    667{
    668	return do_compat_fcntl64(fd, cmd, arg);
    669}
    670
    671COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
    672		       compat_ulong_t, arg)
    673{
    674	switch (cmd) {
    675	case F_GETLK64:
    676	case F_SETLK64:
    677	case F_SETLKW64:
    678	case F_OFD_GETLK:
    679	case F_OFD_SETLK:
    680	case F_OFD_SETLKW:
    681		return -EINVAL;
    682	}
    683	return do_compat_fcntl64(fd, cmd, arg);
    684}
    685#endif
    686
    687/* Table to convert sigio signal codes into poll band bitmaps */
    688
    689static const __poll_t band_table[NSIGPOLL] = {
    690	EPOLLIN | EPOLLRDNORM,			/* POLL_IN */
    691	EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND,	/* POLL_OUT */
    692	EPOLLIN | EPOLLRDNORM | EPOLLMSG,		/* POLL_MSG */
    693	EPOLLERR,				/* POLL_ERR */
    694	EPOLLPRI | EPOLLRDBAND,			/* POLL_PRI */
    695	EPOLLHUP | EPOLLERR			/* POLL_HUP */
    696};
    697
    698static inline int sigio_perm(struct task_struct *p,
    699                             struct fown_struct *fown, int sig)
    700{
    701	const struct cred *cred;
    702	int ret;
    703
    704	rcu_read_lock();
    705	cred = __task_cred(p);
    706	ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) ||
    707		uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) ||
    708		uid_eq(fown->uid,  cred->suid) || uid_eq(fown->uid,  cred->uid)) &&
    709	       !security_file_send_sigiotask(p, fown, sig));
    710	rcu_read_unlock();
    711	return ret;
    712}
    713
    714static void send_sigio_to_task(struct task_struct *p,
    715			       struct fown_struct *fown,
    716			       int fd, int reason, enum pid_type type)
    717{
    718	/*
    719	 * F_SETSIG can change ->signum lockless in parallel, make
    720	 * sure we read it once and use the same value throughout.
    721	 */
    722	int signum = READ_ONCE(fown->signum);
    723
    724	if (!sigio_perm(p, fown, signum))
    725		return;
    726
    727	switch (signum) {
    728		default: {
    729			kernel_siginfo_t si;
    730
    731			/* Queue a rt signal with the appropriate fd as its
    732			   value.  We use SI_SIGIO as the source, not 
    733			   SI_KERNEL, since kernel signals always get 
    734			   delivered even if we can't queue.  Failure to
    735			   queue in this case _should_ be reported; we fall
    736			   back to SIGIO in that case. --sct */
    737			clear_siginfo(&si);
    738			si.si_signo = signum;
    739			si.si_errno = 0;
    740		        si.si_code  = reason;
    741			/*
    742			 * Posix definies POLL_IN and friends to be signal
    743			 * specific si_codes for SIG_POLL.  Linux extended
    744			 * these si_codes to other signals in a way that is
    745			 * ambiguous if other signals also have signal
    746			 * specific si_codes.  In that case use SI_SIGIO instead
    747			 * to remove the ambiguity.
    748			 */
    749			if ((signum != SIGPOLL) && sig_specific_sicodes(signum))
    750				si.si_code = SI_SIGIO;
    751
    752			/* Make sure we are called with one of the POLL_*
    753			   reasons, otherwise we could leak kernel stack into
    754			   userspace.  */
    755			BUG_ON((reason < POLL_IN) || ((reason - POLL_IN) >= NSIGPOLL));
    756			if (reason - POLL_IN >= NSIGPOLL)
    757				si.si_band  = ~0L;
    758			else
    759				si.si_band = mangle_poll(band_table[reason - POLL_IN]);
    760			si.si_fd    = fd;
    761			if (!do_send_sig_info(signum, &si, p, type))
    762				break;
    763		}
    764			fallthrough;	/* fall back on the old plain SIGIO signal */
    765		case 0:
    766			do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type);
    767	}
    768}
    769
    770void send_sigio(struct fown_struct *fown, int fd, int band)
    771{
    772	struct task_struct *p;
    773	enum pid_type type;
    774	unsigned long flags;
    775	struct pid *pid;
    776	
    777	read_lock_irqsave(&fown->lock, flags);
    778
    779	type = fown->pid_type;
    780	pid = fown->pid;
    781	if (!pid)
    782		goto out_unlock_fown;
    783
    784	if (type <= PIDTYPE_TGID) {
    785		rcu_read_lock();
    786		p = pid_task(pid, PIDTYPE_PID);
    787		if (p)
    788			send_sigio_to_task(p, fown, fd, band, type);
    789		rcu_read_unlock();
    790	} else {
    791		read_lock(&tasklist_lock);
    792		do_each_pid_task(pid, type, p) {
    793			send_sigio_to_task(p, fown, fd, band, type);
    794		} while_each_pid_task(pid, type, p);
    795		read_unlock(&tasklist_lock);
    796	}
    797 out_unlock_fown:
    798	read_unlock_irqrestore(&fown->lock, flags);
    799}
    800
    801static void send_sigurg_to_task(struct task_struct *p,
    802				struct fown_struct *fown, enum pid_type type)
    803{
    804	if (sigio_perm(p, fown, SIGURG))
    805		do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type);
    806}
    807
    808int send_sigurg(struct fown_struct *fown)
    809{
    810	struct task_struct *p;
    811	enum pid_type type;
    812	struct pid *pid;
    813	unsigned long flags;
    814	int ret = 0;
    815	
    816	read_lock_irqsave(&fown->lock, flags);
    817
    818	type = fown->pid_type;
    819	pid = fown->pid;
    820	if (!pid)
    821		goto out_unlock_fown;
    822
    823	ret = 1;
    824
    825	if (type <= PIDTYPE_TGID) {
    826		rcu_read_lock();
    827		p = pid_task(pid, PIDTYPE_PID);
    828		if (p)
    829			send_sigurg_to_task(p, fown, type);
    830		rcu_read_unlock();
    831	} else {
    832		read_lock(&tasklist_lock);
    833		do_each_pid_task(pid, type, p) {
    834			send_sigurg_to_task(p, fown, type);
    835		} while_each_pid_task(pid, type, p);
    836		read_unlock(&tasklist_lock);
    837	}
    838 out_unlock_fown:
    839	read_unlock_irqrestore(&fown->lock, flags);
    840	return ret;
    841}
    842
    843static DEFINE_SPINLOCK(fasync_lock);
    844static struct kmem_cache *fasync_cache __read_mostly;
    845
    846static void fasync_free_rcu(struct rcu_head *head)
    847{
    848	kmem_cache_free(fasync_cache,
    849			container_of(head, struct fasync_struct, fa_rcu));
    850}
    851
    852/*
    853 * Remove a fasync entry. If successfully removed, return
    854 * positive and clear the FASYNC flag. If no entry exists,
    855 * do nothing and return 0.
    856 *
    857 * NOTE! It is very important that the FASYNC flag always
    858 * match the state "is the filp on a fasync list".
    859 *
    860 */
    861int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
    862{
    863	struct fasync_struct *fa, **fp;
    864	int result = 0;
    865
    866	spin_lock(&filp->f_lock);
    867	spin_lock(&fasync_lock);
    868	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
    869		if (fa->fa_file != filp)
    870			continue;
    871
    872		write_lock_irq(&fa->fa_lock);
    873		fa->fa_file = NULL;
    874		write_unlock_irq(&fa->fa_lock);
    875
    876		*fp = fa->fa_next;
    877		call_rcu(&fa->fa_rcu, fasync_free_rcu);
    878		filp->f_flags &= ~FASYNC;
    879		result = 1;
    880		break;
    881	}
    882	spin_unlock(&fasync_lock);
    883	spin_unlock(&filp->f_lock);
    884	return result;
    885}
    886
    887struct fasync_struct *fasync_alloc(void)
    888{
    889	return kmem_cache_alloc(fasync_cache, GFP_KERNEL);
    890}
    891
    892/*
    893 * NOTE! This can be used only for unused fasync entries:
    894 * entries that actually got inserted on the fasync list
    895 * need to be released by rcu - see fasync_remove_entry.
    896 */
    897void fasync_free(struct fasync_struct *new)
    898{
    899	kmem_cache_free(fasync_cache, new);
    900}
    901
    902/*
    903 * Insert a new entry into the fasync list.  Return the pointer to the
    904 * old one if we didn't use the new one.
    905 *
    906 * NOTE! It is very important that the FASYNC flag always
    907 * match the state "is the filp on a fasync list".
    908 */
    909struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new)
    910{
    911        struct fasync_struct *fa, **fp;
    912
    913	spin_lock(&filp->f_lock);
    914	spin_lock(&fasync_lock);
    915	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
    916		if (fa->fa_file != filp)
    917			continue;
    918
    919		write_lock_irq(&fa->fa_lock);
    920		fa->fa_fd = fd;
    921		write_unlock_irq(&fa->fa_lock);
    922		goto out;
    923	}
    924
    925	rwlock_init(&new->fa_lock);
    926	new->magic = FASYNC_MAGIC;
    927	new->fa_file = filp;
    928	new->fa_fd = fd;
    929	new->fa_next = *fapp;
    930	rcu_assign_pointer(*fapp, new);
    931	filp->f_flags |= FASYNC;
    932
    933out:
    934	spin_unlock(&fasync_lock);
    935	spin_unlock(&filp->f_lock);
    936	return fa;
    937}
    938
    939/*
    940 * Add a fasync entry. Return negative on error, positive if
    941 * added, and zero if did nothing but change an existing one.
    942 */
    943static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
    944{
    945	struct fasync_struct *new;
    946
    947	new = fasync_alloc();
    948	if (!new)
    949		return -ENOMEM;
    950
    951	/*
    952	 * fasync_insert_entry() returns the old (update) entry if
    953	 * it existed.
    954	 *
    955	 * So free the (unused) new entry and return 0 to let the
    956	 * caller know that we didn't add any new fasync entries.
    957	 */
    958	if (fasync_insert_entry(fd, filp, fapp, new)) {
    959		fasync_free(new);
    960		return 0;
    961	}
    962
    963	return 1;
    964}
    965
    966/*
    967 * fasync_helper() is used by almost all character device drivers
    968 * to set up the fasync queue, and for regular files by the file
    969 * lease code. It returns negative on error, 0 if it did no changes
    970 * and positive if it added/deleted the entry.
    971 */
    972int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
    973{
    974	if (!on)
    975		return fasync_remove_entry(filp, fapp);
    976	return fasync_add_entry(fd, filp, fapp);
    977}
    978
    979EXPORT_SYMBOL(fasync_helper);
    980
    981/*
    982 * rcu_read_lock() is held
    983 */
    984static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
    985{
    986	while (fa) {
    987		struct fown_struct *fown;
    988		unsigned long flags;
    989
    990		if (fa->magic != FASYNC_MAGIC) {
    991			printk(KERN_ERR "kill_fasync: bad magic number in "
    992			       "fasync_struct!\n");
    993			return;
    994		}
    995		read_lock_irqsave(&fa->fa_lock, flags);
    996		if (fa->fa_file) {
    997			fown = &fa->fa_file->f_owner;
    998			/* Don't send SIGURG to processes which have not set a
    999			   queued signum: SIGURG has its own default signalling
   1000			   mechanism. */
   1001			if (!(sig == SIGURG && fown->signum == 0))
   1002				send_sigio(fown, fa->fa_fd, band);
   1003		}
   1004		read_unlock_irqrestore(&fa->fa_lock, flags);
   1005		fa = rcu_dereference(fa->fa_next);
   1006	}
   1007}
   1008
   1009void kill_fasync(struct fasync_struct **fp, int sig, int band)
   1010{
   1011	/* First a quick test without locking: usually
   1012	 * the list is empty.
   1013	 */
   1014	if (*fp) {
   1015		rcu_read_lock();
   1016		kill_fasync_rcu(rcu_dereference(*fp), sig, band);
   1017		rcu_read_unlock();
   1018	}
   1019}
   1020EXPORT_SYMBOL(kill_fasync);
   1021
   1022static int __init fcntl_init(void)
   1023{
   1024	/*
   1025	 * Please add new bits here to ensure allocation uniqueness.
   1026	 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
   1027	 * is defined as O_NONBLOCK on some platforms and not on others.
   1028	 */
   1029	BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ !=
   1030		HWEIGHT32(
   1031			(VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) |
   1032			__FMODE_EXEC | __FMODE_NONOTIFY));
   1033
   1034	fasync_cache = kmem_cache_create("fasync_cache",
   1035					 sizeof(struct fasync_struct), 0,
   1036					 SLAB_PANIC | SLAB_ACCOUNT, NULL);
   1037	return 0;
   1038}
   1039
   1040module_init(fcntl_init)